通常Hive用来做数据仓库,做离线数据分析,一般我们直接用JDBC直接去连接Hive,有时候我们可以通过SparkSQL去查询Hive效率会高一些,开启Kerberos认证之后,这里连接方式稍微有一些变动,网上也有很多资料,我这里还是给大家贴一下吧:
软件版本:
hive1.2.1
spark2.2.0
1.Java操作Hive,我先贴一下pom.xml文件:
<properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target><log4j.version>1.2.17</log4j.version><slf4j.version>1.7.22</slf4j.version><hadoop.version>2.7.3</hadoop.version><hive.version>1.2.1</hive.version></properties><dependencies><dependency><groupId>org.apache.hive</groupId><artifactId>hive-jdbc</artifactId><version>${hive.version}</version><exclusions><exclusion><groupId>log4j</groupId><artifactId>log4j</artifactId></exclusion><exclusion><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId></exclusion></exclusions></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version><exclusions><exclusion><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId></exclusion></exclusions></dependency>
主函数代码:
package com.hadoop.ljs.hive121;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.security.UserGroupInformation;import java.sql.Connection;import java.sql.DriverManager;import java.sql.PreparedStatement;import java.sql.ResultSet;/*** @author: Created By lujisen* @company ChinaUnicom Software JiNan* @date: 2020-03-06 14:19* @version: v1.0* @description: com.hadoop.ljs*/public class HiveKerberosDemo {private static String jdbcDriver = "org.apache.hive.jdbc.HiveDriver";public static final String krb5Conf="D:\\kafkaSSL\\krb5.conf";public static final String userTicket="lujs/unicom@CHINAUNICOM";public static final String userKeytab="D:\\kafkaSSL\\lujs.unicom.keytab";private static String hiveConnectURL ="jdbc:hive2://salver158.hadoop.unicom:10000/lujs;principal=hive/_HOST@CHINAUNICOM";public static void main(String[] args) throws Exception {Class.forName(jdbcDriver);//登录Kerberos账号System.setProperty("java.security.krb5.conf", krb5Conf);Configuration conf = new Configuration();conf.set("hadoop.security.authentication", "kerberos");conf.set("hadoop.security.authorization", "true");UserGroupInformation.setConfiguration(conf);UserGroupInformation.loginUserFromKeytab(userTicket,userKeytab);Connection connection = null;ResultSet rs = null;PreparedStatement ps = null;try {connection = DriverManager.getConnection(hiveConnectURL);ps = connection.prepareStatement("select * from lujs.table1");rs = ps.executeQuery();while (rs.next()) {System.out.println("id: "+rs.getString(1)+" name: "+rs.getString(2));}} catch (Exception e) {e.printStackTrace();}}}
2.SparkSQL连接Hive,这里也贴一下pom.xml文件:
<properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding><maven.compiler.source>1.8</maven.compiler.source><maven.compiler.target>1.8</maven.compiler.target><log4j.version>1.2.17</log4j.version><slf4j.version>1.7.22</slf4j.version><spark.version>2.2.0</spark.version><hadoop.version>2.7.3</hadoop.version><hive.version>1.2.1</hive.version></properties><dependency><groupId>org.apache.spark</groupId><artifactId>spark-sql_2.11</artifactId><version>${spark.version}</version></dependency><dependency><groupId>org.apache.spark</groupId><artifactId>spark-core_2.11</artifactId><version>${spark.version}</version></dependency><dependency><groupId>org.apache.hive</groupId><artifactId>hive-jdbc</artifactId><version>${hive.version}</version><exclusions><exclusion><groupId>log4j</groupId><artifactId>log4j</artifactId></exclusion><exclusion><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId></exclusion></exclusions></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-common</artifactId><version>${hadoop.version}</version></dependency><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>${hadoop.version}</version><exclusions><exclusion><groupId>org.slf4j</groupId><artifactId>slf4j-log4j12</artifactId></exclusion></exclusions></dependency>
主函数代码:
package com.hadoop.ljs.spark220.security;import org.apache.hadoop.conf.Configuration;import org.apache.spark.SparkConf;import org.apache.spark.sql.Dataset;import org.apache.spark.sql.Row;import org.apache.spark.sql.SQLContext;import org.apache.spark.sql.SparkSession;import java.io.IOException;import java.util.Properties;import org.apache.hadoop.security.UserGroupInformation;/*** @author: Created By lujisen* @company ChinaUnicom Software JiNan* @date: 2020-03-06 15:03* @version: v1.0* @description: com.hadoop.ljs.spark220.security*/public class SparkKerberosHive {public static final String krb5Conf="D:\\kafkaSSL\\krb5.conf";public static final String userTicket="lujs/unicom@CHINAUNICOM";public static final String userKeytab="D:\\kafkaSSL\\lujs.unicom.keytab";/*注意这里后面的principal一般不变,只需要变动userTicket和userKeytab即可*/private static String hiveConnectURL ="jdbc:hive2://salver158.hadoop.unicom:10000/lujs;principal=hive/_HOST@CHINAUNICOM";public static void main(String[] args) throws IOException {// 初始化配置文件System.setProperty("java.security.krb5.conf", krb5Conf);System.setProperty("java.security.krb5.conf", krb5Conf);Configuration conf = new Configuration();conf.set("hadoop.security.authentication", "kerberos");conf.set("hadoop.security.authorization", "true");UserGroupInformation.setConfiguration(conf);UserGroupInformation.loginUserFromKeytab(userTicket,userKeytab);SparkConf sparkConf = new SparkConf().setAppName("SparkKerberosHive").setMaster("local[*]").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();Properties props = new Properties();props.setProperty("hadoop.security.authentication", "kerberos");props.setProperty("hadoop.security.authorization", "true");Dataset<Row> df = sparkSession.read().jdbc(hiveConnectURL, "lujs.table1", props);df.createOrReplaceTempView("table1");SQLContext sqlCtx = new SQLContext(sparkSession);df = sqlCtx.sql("select * from table1 limit 10");df.show();sparkSession.stop();}}
如果觉得我的文章能帮到您,请关注微信公众号“大数据开发运维架构”,并转发朋友圈,谢谢支持!!!

文章转载自大数据开发运维架构,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




