hive笔记

建表语句:

CREATE TABLE testB (id INT,name string,area string) PARTITIONED BY (create_time string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE;

从物理机导入数据:

LOAD DATA LOCAL INPATH '/root/sourceA.txt' into table testa PARTITION(create_time='2019-12-27');

从hdfs导入数据:

LOAD DATA inpath '/user/root/sourceA.txt' into table testB PARTITION(create_time='2019-12-27');

从hdfs导出数据:

insert overwrite directory '/user/ljbao/oput' select * from testb;

导出的文件查看:

hdfs dfs -cat /user/ljbao/oput/000000_0

hdfs用户创建

示例如下:

1、需要先在linux 上创建一个普通用户: hn,并修改密码

sudo -u hdfs hadoop fs -mkdir /user/用户名
sudo -u hdfs hadoop fs -mkdir /user/hn
sudo -u hdfs hadoop fs -chown 用户组:用户名 /user/用户名
sudo -u hdfs hadoop fs -chown hn:hdfs /user/hn
上传文件测试:
hdfs dfs  -put  fileName /user/hn

hive命令行交互中可以直接执行hdfs指令

示例如下:

0: jdbc:hive2://localhost:10000> dfs -ls /user/ljbao;
+------------------------------------------------------------------------------+--+
|                                  DFS Output                                  |
+------------------------------------------------------------------------------+--+
| Found 2 items                                                                |
| drwx------   - ljbao ljbao          0 2019-12-26 22:09 /user/ljbao/.staging  |
| drwxr-xr-x   - ljbao ljbao          0 2019-12-26 22:09 /user/ljbao/oput      |
+------------------------------------------------------------------------------+--+
3 rows selected (0.011 seconds)
0: jdbc:hive2://localhost:10000>

建表语句

create table if not exists ljbao.employees (
name string comment 'employee name', 
salary float comment 'salary', 
deductions map<String, float> comment 'keys are name and values are per...'
) 
tblproperties ('creator'='me','created_at'='2019-12-29 10:00:00') 
location '/user/ljbao/employees';
create table if not exists ljbao.employee4(name string comment 'employee names',salary float comment 'employee salary')   location '/user/ljbao/employees';

分区表

---创建分区表
create table ljbao.employee2(name string, salary float, subordinates array<String>) partitioned by (country string, state string);
---添加信息
alter table employee2 add partition ( country="china", state="zhejiang");
---查看hdfs存储结构
[root@hadoop1 ~]# hdfs dfs -ls -R /apps/hive/warehouse/ljbao.db
drwxrwxrwx   - hive      hadoop          0 2019-12-25 20:47 /apps/hive/warehouse/ljbao.db/a
-rwxrwxrwx   3 hive      hadoop          3 2019-12-25 20:47 /apps/hive/warehouse/ljbao.db/a/000000_0
drwxrwxrwx   - ljbao     hadoop          0 2019-12-30 01:46 /apps/hive/warehouse/ljbao.db/employee2
drwxrwxrwx   - ljbao     hadoop          0 2019-12-30 01:46 /apps/hive/warehouse/ljbao.db/employee2/country=china
drwxrwxrwx   - ljbao     hadoop          0 2019-12-30 01:46 /apps/hive/warehouse/ljbao.db/employee2/country=china/state=zhejiang
drwxrwxrwx   - root      hadoop          0 2019-12-24 01:07 /apps/hive/warehouse/ljbao.db/hive_01
drwxrwxrwx   - root      hadoop          0 2019-12-25 20:11 /apps/hive/warehouse/ljbao.db/t1
drwxrwxrwx   - anonymous hadoop          0 2019-12-26 20:21 /apps/hive/warehouse/ljbao.db/testa
drwxrwxrwx   - anonymous hadoop          0 2019-12-26 20:21 /apps/hive/warehouse/ljbao.db/testa/create_time=2019-12-27
-rwxrwxrwx   3 anonymous hadoop         55 2019-12-26 20:21 /apps/hive/warehouse/ljbao.db/testa/create_time=2019-12-27/sourceA.txt
drwxrwxrwx   - anonymous hadoop          0 2019-12-26 20:33 /apps/hive/warehouse/ljbao.db/testb
drwxrwxrwx   - anonymous hadoop          0 2019-12-26 20:33 /apps/hive/warehouse/ljbao.db/testb/create_time=2019-12-27
[root@hadoop1 ~]#

使用hive metastore api操作hive:

package hive.util;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.thrift.TException;
import org.slf4j.Logger;

import java.util.List;

public class HiveClient {
    protected final Logger logger = org.slf4j.LoggerFactory.getLogger(this.getClass());
    IMetaStoreClient client;

    public HiveClient() {
        try {
            HiveConf hiveConf = new HiveConf();
            hiveConf.addResource("hive-site.xml");
            client = RetryingMetaStoreClient.getProxy(hiveConf,true);
        } catch (MetaException ex) {
            logger.error(ex.getMessage());
        }
    }

    public List<String> getAllDatabases() {
        List<String> databases = null;
        try {
            databases = client.getAllDatabases();
        } catch (TException ex) {
            logger.error(ex.getMessage());
        }
        return databases;
    }

    public Database getDatabase(String db) {
        Database database = null;
        try {
            database = client.getDatabase(db);
        } catch (TException ex) {
            logger.error(ex.getMessage());
        }
        return database;
    }

    public List<FieldSchema> getSchema(String db, String table) {
        List<FieldSchema> schema = null;
        try {
            schema = client.getSchema(db, table);
        } catch (TException ex) {
            logger.error(ex.getMessage());
        }
        return schema;
    }

    public List<String> getAllTables(String db) {
        List<String> tables = null;
        try {
            tables = client.getAllTables(db);
        } catch (TException ex) {
            logger.error(ex.getMessage());
        }
        return tables;
    }

    public String getLocation(String db, String table) {
        String location = null;
        try {
            location = client.getTable(db, table).getSd().getLocation();
        }catch (TException ex) {
            logger.error(ex.getMessage());
        }
        return location;
    }

    public static void main(String[] args) {
        HiveClient hiveClient = new HiveClient();
        List l = hiveClient.getAllTables("ljbao");
        System.out.println(l);
    }

}

第四章-数据定义

describe database db1;
describe database extended db1;
set hive.cli.print.current.db=true;    --设置打印当前数据库
drop database if exists db1 cascade;--删除非空数据库
alter database db1 set dbproperties ('edit'='linjb');--修改数据库属性
describe extended tb4;
describe formatted tb4;--读取表的描述
create external table if not exists tb5(id int, name string) row format delimited fields terminated by ',' location '/opt/lzh/dbs/tb5';    --建立外部表
set hive.mapred.mode=strict;--严格模式,查询语句必须带有分区参数
create table tb6(id int, name string)partitioned by (country string, state string)--建表带分区参数

第五章-数据操作

导入数据:从物理机导入

create table tb7(name string, age int) row format delimited fields terminated by '' lines terminated by '\n';---建表

load_data.txt:文件数据内容

zhangsan 30
lisi 50
wangwu 60
peiqi 6
load data local inpath '/opt/lzh/load_data_local.txt' into table tb7;---导入数据
load data inpath '/apps/hive/warehouse/db1.db/tb7/load_data_local.txt' into table tb8;---从hdfs导入数据、

设置参数hive.exec.mode.local.auto=true避免使用mareduce查询

设置参数hive.limit.optimize.enable=true 控制limit从数据源抽取

设置参数set hive.exec.parallel=true并发执行