
where VALUES @> array[标签s] -- 与where VALUES && array[标签s] -- 或where not VALUES @> array[标签s] -- 非Copy
聚合bitmap: 与、或、非and_agg(bitmaps) where KEY in (标签s) -- 与or_agg(bitmaps) where KEY in (标签s) -- 或except(bitmap1,bitmap2) -- 非Copy
create table t_tag_dict (tag int primary key, -- 标签(人群)idinfo text, -- 人群描述crt_time timestamp -- 时间);Copy
insert into t_tag_dict values (1, '男', now());insert into t_tag_dict values (2, '女', now());insert into t_tag_dict values (3, '大于24岁', now());-- ...insert into t_tag_dictselect generate_series(4,100000), md5(random()::text), clock_timestamp();Copy
create table t_user_tag (uid int8, -- 用户idtag int, -- 用户对应标签(人群)mod_time timestamp, -- 时间primary key (tag,uid));Copy
create or replace function gen_rand_tag(int,int) returns setof int as$$select case when random() > 0.5 then 1::int else 2::int end as tagunion allselect ceil(random()*$1)::int as tag from generate_series(1,$2);$$ language sql strict volatile;insert into t_user_tagselect uid, gen_rand_tag(100000,63) as tag, clock_timestamp()from generate_series(1,20000000) as uid on conflict (uid,tag) do nothing;
create sequence seq;vi test.sqlinsert into t_user_tagselect uid, gen_rand_tag(100000,63) as tag, clock_timestamp()from nextval('seq'::regclass) as uidon conflict(tag,uid) do nothing;pgbench -M prepared -n -r -P 1 -f ./test.sql -c 50 -j 50 -t 400000Copy
select count(*) from(select uid from t_user_tag where tag=1intersectselect uid from t_user_tag where tag=3) t;-- Time: 1494.789 ms (00:01.495)
select uid from t_user_tag where tag=1intersectselect uid from t_user_tag where tag=3;-- Time: 3246.184 ms (00:03.246)Copy
select count(*) from(select uid from t_user_tag where tag=1unionselect uid from t_user_tag where tag=3unionselect uid from t_user_tag where tag=10unionselect uid from t_user_tag where tag=200) t;-- Time: 3577.714 ms (00:03.578)
select uid from t_user_tag where tag=1unionselect uid from t_user_tag where tag=3unionselect uid from t_user_tag where tag=10unionselect uid from t_user_tag where tag=200;-- Time: 5682.458 ms (00:05.682)Copy
public | t_user_tag | table | postgres | 62 GB |public | t_user_tag_pkey | index | postgres | t_user_tag | 61 GB |Copy
create table t_tag_dict (tag int primary key, -- 标签(人群)idinfo text, -- 人群描述crt_time timestamp -- 时间);Copy
insert into t_tag_dict values (1, '男', now());insert into t_tag_dict values (2, '女', now());insert into t_tag_dict values (3, '大于24岁', now());-- ...insert into t_tag_dictselect generate_series(4,100000), md5(random()::text), clock_timestamp();Copy
create table t_user_tags (uid int8 primary key, -- 用户idtags int[], -- 用户标签(人群)数组mod_time timestamp -- 时间);Copy
create or replace function gen_rand_tags(int,int) returns int[] as $$select array_agg(ceil(random()*$1)::int) from generate_series(1,$2);$$ language sql strict;Copy
select gen_rand_tags(100000, 8);gen_rand_tags---------------------------------------------------{43494,46038,74102,25308,99129,40893,33653,29690}(1 row)Copy
insert into t_user_tagsselect generate_series(1,10000000),array_append(gen_rand_tags(100000, 63),1), now();insert into t_user_tagsselect generate_series(10000001,20000000),array_append(gen_rand_tags(100000, 63),2), now();Copy
create index idx_t_user_tags_1 on t_user_tags using gin (tags);Copy
select count(uid) from t_user_tags where tags @> array[1,3];
select uid from t_user_tags where tags @> array[1,3];Copy
select count(uid) from t_user_tags where tags && array[1,3,10,200];
select uid from t_user_tags where tags && array[1,3,10,200];Copy
SELECT roaringbitmap('{1,2,3}') | roaringbitmap('{3,4,5}');SELECT roaringbitmap('{1,2,3}') & roaringbitmap('{3,4,5}');SELECT roaringbitmap('{1,2,3}') # roaringbitmap('{3,4,5}');SELECT roaringbitmap('{1,2,3}') - roaringbitmap('{3,4,5}');
SELECT rb_build_agg(e) FROM generate_series(1,100) e;SELECT rb_or_agg(bitmap) FROM t1;SELECT rb_and_agg(bitmap) FROM t1;SELECT rb_xor_agg(bitmap) FROM t1;
rb_or_cardinality_aggrb_and_cardinality_aggrb_xor_cardinality_agg
Opperator Input Output Desc Example Result@> roaringbitmap,roaringbitmap bool contains roaringbitmap('{1,2,3}') @> roaringbitmap('{3,4,5}') f@> roaringbitmap,integer bool contains roaringbitmap('{1,2,3,4,5}') @> 3 t<@ roaringbitmap,roaringbitmap bool is contained by roaringbitmap('{1,2,3}') f<@ integer,roaringbitmap bool is contained by 3 t&& roaringbitmap,roaringbitmap bool overlap (have elements in common) roaringbitmap('{1,2,3}') && roaringbitmap('{3,4,5}') t= roaringbitmap,roaringbitmap bool equal roaringbitmap('{1,2,3}') = roaringbitmap('{3,4,5}') f<> roaringbitmap,roaringbitmap bool not equal roaringbitmap('{1,2,3}') <> roaringbitmap('{3,4,5}') tCopy
create table t_tag_users (tagid int primary key, -- 用户标签(人群)iduid_offset int, -- 由于userid是int8类型,roaringbitmap内部使用int4
userbits roaringbitmap, -- 用户id聚合的 bitmapmod_time timestamp -- 时间);Copy
insert into t_tag_usersselect tagid, uid_offset, rb_build_agg(uid::int) as userbits from(selectunnest(tags) as tagid,(uid (2^31)::int8) as uid_offset,mod(uid, (2^31)::int8) as uidfrom t_user_tags) tgroup by tagid, uid_offset;Copy
select sum(ub) from(select uid_offset,rb_and_cardinality_agg(userbits) as ubfrom t_tag_userswhere tagid in (1,3)group by uid_offset) t;
select uid_offset,rb_and_agg(userbits) as ubfrom t_tag_userswhere tagid in (1,3)group by uid_offset;Copy
select sum(ub) from(select uid_offset,rb_or_cardinality_agg(userbits) as ubfrom t_tag_userswhere tagid in (1,3,10,200)group by uid_offset) t;
select uid_offset,rb_or_agg(userbits) as ubfrom t_tag_userswhere tagid in (1,3,10,200)group by uid_offset;Copy
环境:
数据库 | 计算规格 | 存储规格 |
MySQL 8.0 | 8C 32G | 1500GB ESSD |
PG 12 | 8C 32G | 1500GB ESSD |
性能对比:
CASE(12.8亿 user/tags)(2000万, 64 tags/user) | 方案1(MySQL、PG)多对多:常规方案 | 方案2(PG)一对多:数组、倒排索引 | 方案3(PG)一对多:位图 | 方案3 vs 方案1提升% |
与查询圈选用户速度 | 1.5秒 | 42毫秒 | 1.5毫秒 | 99900% |
或查询圈选用户速度 | 3.6秒 | 3秒 | 1.7毫秒 | 211665% |
空间占用(表) | 62GB | 3126MB | 1390MB | 4467% |
空间占用(索引) | 61GB | 3139MB | 2MB | 3123100% |
build索引速度 | - | 20分钟 | 0 | - |


最后修改时间:2020-02-25 13:36:25
文章转载自PostgreSQL中文社区,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




