CREATE TABLE event_log(
......
)
with (orientation=column)
PARTITION BY RANGE (time_stamp) (
PARTITION el_less_than_1731945600 VALUES LESS THAN (1731945600),
PARTITION el_less_than_1732032000 VALUES LESS THAN (1732032000),
PARTITION el_less_than_maxvalue VALUES LESS THAN (MAXVALUE)
);
建立了张列式存储表,数据量4kw+,单表部分字段简单查询毫秒级,但是一加上排序就得10多秒,想对排序字段添加索引又报错说不允许添加:SQL 错误 [0A000]: [..61.90:57412/..60.108:5432] ERROR: Global partition index does not support column store.
想请教下这种情况如何处理,排序也是常用业务,一排序就性能大低,聚合查询时也很慢,请问是我打开方式不对吗,虚心请教
补充说明,我后面重建了不带分区的相同结构的表,数据量也一致均为4kw+,对排序字段创建了索引、局部聚簇,排序效率仍然没有提升,explain PERFORMANCE查看没有命中索引
--分析查询
explain PERFORMANCE
SELECT id,src_ip, original_ip, src_host_identity, src_ip_high, src_ip_low, src_port, dest_ip, dst_host_identity, dest_ip_high, dest_ip_low, dest_port, ip_version, src_mac, dest_mac, proto
FROM event_log_cxd_with_index
where src_ip like '%0%' and proto = 'TCP'
ORDER BY time_stamp DESC
LIMIT 20;
--打印结果
Row Adapter (cost=19922333.72..19922333.72 rows=20 width=327) (actual time=27800.910..27800.913 rows=20 loops=1)
Output: id, src_ip, original_ip, src_host_identity, src_ip_high, src_ip_low, src_port, dest_ip, dst_host_identity, dest_ip_high, dest_ip_low, dest_port, ip_version, src_mac, dest_mac, proto, time_stamp
(CPU: ex c/r=180029889312447, ex row=20, ex cyc=3600597786248940, inc cyc=3979608079471620)
-> Vector Limit (cost=19922333.67..19922333.72 rows=20 width=327) (actual time=27800.901..27800.901 rows=20 loops=1)
Output: id, src_ip, original_ip, src_host_identity, src_ip_high, src_ip_low, src_port, dest_ip, dst_host_identity, dest_ip_high, dest_ip_low, dest_port, ip_version, src_mac, dest_mac, proto, time_stamp
(CPU: ex c/r=9475257332860, ex row=20, ex cyc=189505146657200, inc cyc=379010293222680)
-> Vector Sort (cost=19922333.67..19947237.45 rows=9961511 width=327) (actual time=27800.895..27800.895 rows=20 loops=1)
Output: id, src_ip, original_ip, src_host_identity, src_ip_high, src_ip_low, src_port, dest_ip, dst_host_identity, dest_ip_high, dest_ip_low, dest_port, ip_version, src_mac, dest_mac, proto, time_stamp
Sort Key: event_log_cxd_with_index.time_stamp DESC
Sort Method: top-N heapsort Memory: 21kB
(Buffers: shared hit=14120)
(CPU: ex c/r=-799311578134, ex row=9701074, ex cyc=-7754180768536294400, inc cyc=189505146565480)
-> CStore Scan on public.event_log_cxd_with_index (cost=0.00..19657261.44 rows=9961511 width=327) (actual time=28.153..19046.583 rows=9701074 loops=1)
Output: id, src_ip, original_ip, src_host_identity, src_ip_high, src_ip_low, src_port, dest_ip, dst_host_identity, dest_ip_high, dest_ip_low, dest_port, ip_version, src_mac, dest_mac, proto, time_stamp
Filter: (((event_log_cxd_with_index.src_ip)::text ~~ '%0%'::text) AND ((event_log_cxd_with_index.proto)::text = 'TCP'::text))
Rows Removed by Filter: 32275833
(Buffers: shared hit=14120)
(CPU: ex c/r=0, ex row=0, ex cyc=7754370273682860032, inc cyc=7754370273682860032)
Total runtime: 27857.006 ms
评论
有用 0列存上是不允许创建全局索引的,可以创建基于每个分区的局部索引;列存是非常适合做聚合的,尤其是少量字段的情况下,你要特别注意查询消除分区,跨分区的检索要慢很多,毕竟扫描的数据块也多。
CREATE TABLE event_log (
id int,
message VARCHAR(100),
time_stamp TIMESTAMP
)
WITH (ORIENTATION = COLUMN)
PARTITION BY RANGE (time_stamp) (
PARTITION p1 VALUES LESS THAN (‘2002-09-10 00:00:00’::TIMESTAMP),
PARTITION p2 VALUES LESS THAN (‘2002-09-11 08:00:00’::TIMESTAMP),
PARTITION p_other VALUES LESS THAN (MAXVALUE)
);
CREATE INDEX idx_event_log_timestamp ON event_log(time_stamp) local;
testdb=# \d+ event_log
Table “public.event_log”
Column | Type | Modifiers | Storage | Stats target | Description
------------±----------------------------±----------±---------±-------------±------------
id | integer | | plain | |
message | character varying(100) | | extended | |
time_stamp | timestamp without time zone | | plain | |
Indexes:
“idx_event_log_timestamp” psort (time_stamp) LOCAL TABLESPACE pg_default
Partition By RANGE(time_stamp)
Number of partitions: 3 (View pg_partition to check each partition range.)
Has OIDs: no
Options: orientation=column, compression=low
评论
有用 0
墨值悬赏

