▌查询管道

哈希连接 并行哈希连接 Grace哈希连接
完全排序合并连接 部分合并连接 直接连接

SELECTtable,formatReadableQuantity(sum(rows)) AS rows,formatReadableSize(sum(data_uncompressed_bytes)) AS data_uncompressedFROM system.partsWHERE (database = 'imdb_large') AND activeGROUP BY tableORDER BY table ASC;┌─table──┬─rows───────────┬─data_uncompressed─┐│ actors │ 1.00 million │ 21.81 MiB ││ roles │ 100.00 million │ 2.63 GiB │└────────┴────────────────┴───────────────────┘
SELECT getSetting('max_threads');┌─getSetting('max_threads')─┐│ 30 │└───────────────────────────┘

SELECT *FROM roles AS rJOIN actors AS a ON r.actor_id = a.idFORMAT `Null`SETTINGS join_algorithm = 'hash';0 rows in set. Elapsed: 0.817 sec. Processed 101.00 million rows, 3.67 GB (123.57 million rows/s., 4.49 GB/s.)
SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'hash';0 rows in set. Elapsed: 5.063 sec. Processed 101.00 million rows, 3.67 GB (19.95 million rows/s., 724.03 MB/s.)
SELECTquery,formatReadableTimeDelta(query_duration_ms 1000) AS query_duration,formatReadableSize(memory_usage) AS memory_usage,formatReadableQuantity(read_rows) AS read_rows,formatReadableSize(read_bytes) AS read_dataFROM clusterAllReplicas(default, system.query_log)WHERE (type = 'QueryFinish') AND hasAll(tables, ['imdb_large.actors', 'imdb_large.roles'])ORDER BY initial_query_start_time DESCLIMIT 2FORMAT Vertical;Row 1:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'hash'query_duration: 5 secondsmemory_usage: 8.95 GiBread_rows: 101.00 millionread_data: 3.41 GiBRow 2:──────query: SELECT *FROM roles AS rJOIN actors AS a ON r.actor_id = a.idFORMAT `Null`SETTINGS join_algorithm = 'hash'query_duration: 0 secondsmemory_usage: 716.44 MiBread_rows: 101.00 millionread_data: 3.41 GiB
./clickhouse client --host ekyyw56ard.us-west-2.aws.clickhouse.cloud --secure --port 9440 --password <PASSWORD> --database=imdb_large --query "EXPLAIN pipeline graph=1, compact=0SELECT *FROM actors aJOIN roles r ON a.id = r.actor_idSETTINGS max_threads = 2, join_algorithm = 'hash';" | dot -Tpdf > pipeline.pdf


SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'hash';0 rows in set. Elapsed: 5.385 sec. Processed 101.00 million rows, 3.67 GB (18.76 million rows/s., 680.77 MB/s.)
SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'parallel_hash';0 rows in set. Elapsed: 2.639 sec. Processed 101.00 million rows, 3.67 GB (38.28 million rows/s., 1.39 GB/s.)
SELECTquery,formatReadableTimeDelta(query_duration_ms 1000) AS query_duration,formatReadableSize(memory_usage) AS memory_usage,formatReadableQuantity(read_rows) AS read_rows,formatReadableSize(read_bytes) AS read_dataFROM clusterAllReplicas(default, system.query_log)WHERE (type = 'QueryFinish') AND hasAll(tables, ['imdb_large.actors', 'imdb_large.roles'])ORDER BY initial_query_start_time DESCLIMIT 2FORMAT Vertical;Row 1:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'parallel_hash'query_duration: 2 secondsmemory_usage: 18.29 GiBread_rows: 101.00 millionread_data: 3.41 GiBRow 2:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'hash'query_duration: 5 secondsmemory_usage: 8.86 GiBread_rows: 101.00 millionread_data: 3.41 GiB
./clickhouse client --host ekyyw56ard.us-west-2.aws.clickhouse.cloud --secure --port 9440 --password <PASSWORD> --database=imdb_large --query "EXPLAIN pipeline graph=1, compact=0SELECT *FROM actors aJOIN roles r ON a.id = r.actor_idSETTINGS max_threads = 2, join_algorithm = 'parallel_hash';" | dot -Tpdf > pipeline.pdf

./clickhouse client --host ekyyw56ard.us-west-2.aws.clickhouse.cloud --secure --port 9440 --password <PASSWORD> --database=imdb_large --query "EXPLAIN pipeline graph=1, compact=0SELECT *FROM actors aJOIN roles r ON a.id = r.actor_idSETTINGS max_threads = 4, join_algorithm = 'parallel_hash';" | dot -Tpdf > pipeline.pdf




SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'hash';0 rows in set. Elapsed: 5.038 sec. Processed 101.00 million rows, 3.67 GB (20.05 million rows/s., 727.61 MB/s.)
SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 3;0 rows in set. Elapsed: 13.117 sec. Processed 101.00 million rows, 3.67 GB (7.70 million rows/s., 279.48 MB/s.)
SELECTquery,formatReadableTimeDelta(query_duration_ms 1000) AS query_duration,formatReadableSize(memory_usage) AS memory_usage,formatReadableQuantity(read_rows) AS read_rows,formatReadableSize(read_bytes) AS read_dataFROM clusterAllReplicas(default, system.query_log)WHERE (type = 'QueryFinish') AND hasAll(tables, ['imdb_large.actors', 'imdb_large.roles'])ORDER BY initial_query_start_time DESCLIMIT 2FORMAT Vertical;Row 1:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 3query_duration: 13 secondsmemory_usage: 3.72 GiBread_rows: 101.00 millionread_data: 3.41 GiBRow 2:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'hash'query_duration: 5 secondsmemory_usage: 8.96 GiBread_rows: 101.00 millionread_data: 3.41 GiB
SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 8;0 rows in set. Elapsed: 16.366 sec. Processed 101.00 million rows, 3.67 GB (6.17 million rows/s., 224.00 MB/s.)
SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 8;0 rows in set. Elapsed: 16.366 sec. Processed 101.00 million rows, 3.67 GB (6.17 million rows/s., 224.00 MB/s.)Let’s check runtime statistics for the last two query runs:SELECTquery,formatReadableTimeDelta(query_duration_ms 1000) AS query_duration,formatReadableSize(memory_usage) AS memory_usage,formatReadableQuantity(read_rows) AS read_rows,formatReadableSize(read_bytes) AS read_dataFROM clusterAllReplicas(default, system.query_log)WHERE (type = 'QueryFinish') AND hasAll(tables, ['imdb_large.actors', 'imdb_large.roles'])ORDER BY initial_query_start_time DESCLIMIT 2FORMAT Vertical;Row 1:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 8query_duration: 16 secondsmemory_usage: 2.10 GiBread_rows: 101.00 millionread_data: 3.41 GiBRow 2:──────query: SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 3query_duration: 13 secondsmemory_usage: 3.72 GiBread_rows: 101.00 millionread_data: 3.41 GiB
./clickhouse client --host ekyyw56ard.us-west-2.aws.clickhouse.cloud --secure --port 9440 --password <PASSWORD> --database=imdb_large --query "EXPLAIN pipeline graph=1, compact=0SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT `Null`SETTINGS max_threads = 2, join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 3';" | dot -Tpdf > pipeline.pdf

./clickhouse client --host ea3kq2u4fm.eu-west-1.aws.clickhouse.cloud --secure --password <PASSWORD> --database=imdb_large --send_logs_level='trace' --query "SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT NullSETTINGS max_threads = 2, join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 3;"...... GraceHashJoin: Initialize 4 buckets... GraceHashJoin: Joining file bucket 0...... imdb_large.actors ...: Reading approx. 1000000 rows with 2 streams...... imdb_large.roles ...: Reading approx. 100000000 rows with 2 streams...... GraceHashJoin: Joining file bucket 1... GraceHashJoin: Loaded bucket 1 with 250000(/25000823) rows...... GraceHashJoin: Joining file bucket 2... GraceHashJoin: Loaded bucket 2 with 250000(/24996460) rows...... GraceHashJoin: Joining file bucket 3... GraceHashJoin: Loaded bucket 3 with 250000(/25000742) rows...... GraceHashJoin: Finished loading all 4 buckets...
./clickhouse client --host ea3kq2u4fm.eu-west-1.aws.clickhouse.cloud --secure --password <PASSWORD> --database=imdb_large --send_logs_level='trace' --query "SELECT *FROM actors AS aJOIN roles AS r ON a.id = r.actor_idFORMAT NullSETTINGS max_threads = 4, join_algorithm = 'grace_hash', grace_hash_join_initial_buckets = 8;"...... GraceHashJoin: Initialize 8 buckets... GraceHashJoin: Joining file bucket 0...... imdb_large.actors ...: Reading approx. 1000000 rows with 4 streams...... imdb_large.roles ...: Reading approx. 100000000 rows with 4 streams...... GraceHashJoin: Joining file bucket 1... GraceHashJoin: Loaded bucket 1 with 125000(/12502068) rows...... GraceHashJoin: Joining file bucket 2... GraceHashJoin: Loaded bucket 2 with 125000(/12498406) rows...... GraceHashJoin: Joining file bucket 3... GraceHashJoin: Loaded bucket 3 with 125000(/12502699) rows...... GraceHashJoin: Joining file bucket 4... GraceHashJoin: Loaded bucket 4 with 125000(/12498074) rows...... GraceHashJoin: Joining file bucket 5... GraceHashJoin: Loaded bucket 5 with 125000(/12498755) rows...... GraceHashJoin: Joining file bucket 6... GraceHashJoin: Loaded bucket 6 with 125000(/12498054) rows...... GraceHashJoin: Joining file bucket 7... GraceHashJoin: Loaded bucket 7 with 125000(/12498043) rows...... GraceHashJoin: Finished loading all 8 buckets...

作者:Tom Schreiber
文章转载自云原生数据库,如果涉嫌侵权,请发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。




