暂无图片
暂无图片
暂无图片
暂无图片
暂无图片

MergeTree表引擎

原创 水清浅 2022-06-12
326

数据TTL(time to live)

列级TTL

create table ttl_table_v1

(id String,

create_time Datetime,

code String ttl create_time + interval 10 second,

type UInt8 ttl create_time + interval 10 second)

engine = MergeTree

PARTITION BY toYYYYMM(create_time)

order by id;


insert into ttl_table_v1 values('A000',now(),'c1',1),('A001',now()+interval 10 minute,'c1',1);


强制触发TTL

OPTIMIZE TABLE ttl_table_v1 FINAL;


修改或者添加TTL

alter table ttl_table_v1 modify column code String TTL create_time + interval 1 day;


表级TTL

create table ttl_table_v2

(id String,

create_time Datetime,

code String ,

type UInt8

)

engine = MergeTree

PARTITION BY toYYYYMM(create_time)

order by id

TTL create_time + interval 1 day;


alter table ttl_table_v2 modify ttl create_time + interval 10 second;

insert into ttl_table_v2 values('A000',now(),'c1',1),('A001',now()+interval 10 minute,'c1',1);


启停TTL

system stop/start ttl merges;


多路径存储策略

默认策略:所有分区会自动保存到config.xml配置中path指定的路径下

JBOD策略:每次执行一次insert或者merge,所产生的新分区会轮询写入各个磁盘

HOT/COLD策略:适合挂载了不同类型磁盘,HTO使用SSD高性能存储媒介,COLD使用HDD高容量存储媒介。当分区数据大小累积到阈值时会自动移动到COLD区域

config.xml配置文件中由storage_configuration标签标示。又分为disks和policies两组标签,分别标示磁盘与存储策略


/opt/clickhouse/hotdata1

/opt/clickhouse/hotdata2

/opt/clickhouse/colddata

1073741824


disk_hot1

disk_hot2

0.2

disk_hot1

1073741824

disk_cold

0.2


需要对存储路径授权:

chown -R clickhouse:clickhouse /chbase/cloddata /chbase/hotdata1 /chbase/hotdata2


查询系统表

select * from system.disks;


create table jdob_table(id UInt64)ENGINE = MergeTree()

ORDER BY id

SETTINGS storage_policy = 'default_jbod';

查看系统分区表

select * from system.parts;

create table hot_cold_table(id UInt64) ENGINE = MergeTree()

order by id

SETTINGS storage_policy = 'moving_form_hot_to_cold';


alter table hot_cold_table move part 'all_1_2_1' to disk 'disk_hot1';

alter table hot_cold_table move part 'all_1_2_1' to volume 'cold';


replacingMergeTree引擎:去重引擎

create table replace_table(

id String,

code String,

create_time DateTime

)ENGINE = ReplacingMergeTree() -- 括号中可以字段为版本号,如果字段为create_time则在删除重复数据时保留时间最长的一行

partition by toYYYYMM(create_time)

order by (id,code)

primary key id;


insert into replace_table values('A001','A1',now()),('A001','A1',now()),('A001','A2',now()),('A003','A3',now());


optimize table replace_table final;


SummingMergeTree引擎:汇总引擎,可以按照预先定义的条件聚合汇总数据

create table summing_table(

id String,

city String,

v1 UInt32,

v2 Float64,

create_time DateTime

)ENGINE = SummingMergeTree()

partition by toYYYYMM(create_time)

order by (id,city)

primary key id;

insert into summing_table values('A001','tianjin',10,20,now()),('A001','tianjin',20,30,now()),('A001','beijing',20,30,now()),('A001','tianjin',10,20,now()),('A002','tianjin',60,50,now());

OPTIMIZE TABLE summing_table FINAL;


AggregatingMergeTree引擎:可以按照预先定义的条件聚合数据。

create table agg_table(

id String,

city String,

code AggregateFunction(uniq,String), -- uniq(code)

value AggregateFunction(sum,UInt32), -- sum(value)

create_time Datetime

)ENGINE = AggregatingMergeTree()

partition by toYYYYMM(create_time)

order by (id,city)

primary key id;


insert into agg_table select 'A000','tianjin',uniqState('code1'),sumState(toUInt32(100)),now();

optimize table agg_table final;

CollapsingMergeTree:以增代删的操作,对数据打标识,sign标记为1代表有效,-1为删除数据

create table collpase_table(

id String,

code Int32,

create_time Datetime,

sign Int8

)ENGINE = CollapsingMergeTree(sign)

partition by toYYYYMM(create_time)

order by id;

insert into collpase_table values('A000',100,'2020-12-25 00:00:00',1);

insert into collpase_table values('A000',100,'2020-12-25 00:00:00',-1);

insert into collpase_table values('A000',120,'2020-12-25 00:00:00',1);

optimize table collpase_table final;


VersionedCollapsingMergeTree表引擎:作用跟CollapsingMergeTree一样,但是对数据写入顺序没有要求

「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。

评论