万里数据库GreatDB数据库MPP与DIRECT功能对比

原创 Dbb 2024-06-20

146

1. 单shard查询

查询语句中涉及到的表，属于同一个shard。

1.1 表扫描

create table t1(ida int, age int, name varchar(10), primary key(ida), key(age, name)) comment "shard=sd1";
insert into t1 values(1, 2, 'a'), (2, 3, ''), (4, 5, 'a');

select * from t1; // table scan

select * from t1 where ida < 1;  // index range

select ida from t1;  // index scan

select ida from t1 where age = 10;  // ref

以上两者均支持。
对于含有const的计划，XPlan不支持，譬如：

select * from t1 where ida = 1; // const
GreatDB Cluster[db]> explain select * from t1 where ida = 1;
+----+-------------+-------+------------+-------+---------------+---------+---------+-------+------+----------+-------+
| id | select_type | table | partitions | type  | possible_keys | key     | key_len | ref   | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+---------+---------+-------+------+----------+-------+
|  1 | SIMPLE      | t1    | NULL       | const | PRIMARY       | PRIMARY | 4       | const |    1 |   100.00 | NULL  |
+----+-------------+-------+------------+-------+---------------+---------+---------+-------+------+----------+-------+
1 row in set, 1 warning (0.00 sec)

1.2 聚集

select sum(ida+1), count(*), max(name), min(age), avg(age) from t1 where age < 15;

select sum(ida)+avg(age)*2 from t1 group by name;

select count(*) from t1 join t2 on t1.ida = t2.ida;

XPlan聚集支持范围为count/sum/max/min/avg，不支持其它聚集，不支持rollup。
Direct对于聚集无限制。

1.3 order by与limit

select * from t1 order by ida limit 3;

select count(*) from t1 join t2 on t1.ida = t2.ida group by t1.ida, t2.age order by t1.ida limit 10;

XPlan与Direct对于limit操作只支持limit offset子句，不包含with_ties和percent相关子句。

1.4 子查询

对于子查询，计划展开后能够转为嵌套JOIN查询的，XPlan支持计划下推到节点执行。
Direct方式下推语句到节点执行，无此限制。具体如下。

1.4.1 标量子查询

CREATE TABLE t1 (s1 INT, s2 CHAR(5) NOT NULL);
INSERT INTO t1 VALUES(100, 'abcde');
SELECT (SELECT s2 FROM t1);

两者均不支持。

1.4.2 ANY、IN、SOME or ALL子查询

CREATE TABLE t1 (s1 INT) COMMENT "shard=sd1";
INSERT INTO t1 VALUES (1);
CREATE TABLE t2 (s1 INT) COMMENT "shard=sd1";
INSERT INTO t2 VALUES (2);    

SELECT s1 FROM t1 WHERE s1 > ANY (SELECT s1 FROM t2);
SELECT s1 FROM t1 WHERE s1 <> ANY  (SELECT s1 FROM t2);
SELECT s1 FROM t1 WHERE s1 <> SOME (SELECT s1 FROM t2); 
SELECT s1 FROM t1 WHERE s1 > ALL (SELECT s1 FROM t2);
GreatDB Cluster[db]> explain SELECT s1 FROM t1 WHERE s1 > ALL (SELECT s1 FROM t2);
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra       |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
|  1 | PRIMARY     | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 1000 |    66.67 | Using where |
|  2 | SUBQUERY    | t2    | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 1000 |   100.00 | NULL        |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
2 rows in set, 1 warning (0.00 sec

SELECT s1 FROM t1 WHERE s1 = ANY (SELECT s1 FROM t2);
SELECT s1 FROM t1 WHERE s1 IN (SELECT s1 FROM t2);
GreatDB Cluster[db]> explain format=tree  SELECT s1 FROM t1 WHERE s1 IN (SELECT s1 FROM t2);
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| EXPLAIN                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| -> Nested loop inner join  (cost=100215.00 rows=1000000)
    -> Table scan on t1 {[sd1:n1]: SELECT `s1`, `_hidden_pk_` FROM `db`.`t1`WHERE  ( (`s1` is not null) ) ;}  (cost=115.00 rows=1000)
    -> Single-row index lookup on <subquery2> using <auto_distinct_key> (s1=t1.s1)
        -> Materialize with deduplication  (cost=215.00..215.00 rows=1000)
            -> Filter: (t2.s1 is not null)  (cost=115.00 rows=1000)
                -> Table scan on t2 {[sd1:n1]: SELECT `s1`, `_hidden_pk_` FROM `db`.`t2`;}  (cost=115.00 rows=1000)
 |
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
1 row in set (0.00 sec)

对于此类查询，计划展开后转换为JOIN的，譬如最下边两个查询，XPlan支持。具体可参照explain format=tree的计划。
第一个explain计划中，select_type为SUBQUERY，未转换为JOIN处理，XPlan不支持。
第二个explain format=tree计划中，已经明确转换为Nested loop inner join，XPlan支持。
Direct方式均可。

1.4.3 Exists/Not Exists子查询

SELECT * FROM t1 WHERE EXISTS (SELECT * FROM t2);
GreatDB Cluster[db]> explain SELECT * FROM t1 WHERE EXISTS (SELECT * FROM t2);
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------------------------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra                         |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------------------------+
|  1 | SIMPLE      | t2    | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 1000 |   100.00 | FirstMatch                    |
|  1 | SIMPLE      | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 1000 |   100.00 | Using join buffer (hash join) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------------------------+
2 rows in set, 1 warning (0.00 sec)


SELECT * FROM t1 WHERE EXISTS (SELECT count(*)  FROM t2);
GreatDB Cluster[db]> explain SELECT * FROM t1 WHERE EXISTS (SELECT count(*)  FROM t2);
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------+
| id | select_type | table | partitions | type | possible_keys | key  | key_len | ref  | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------+
|  1 | PRIMARY     | t1    | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 1000 |   100.00 | NULL  |
|  2 | SUBQUERY    | t2    | NULL       | ALL  | NULL          | NULL | NULL    | NULL | 1000 |   100.00 | NULL  |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------+
2 rows in set, 1 warning (0.00 sec

第一个查询，参照Extra信息，已经转换为JOIN操作，XPlan支持下推。
第二个查询，t2表的count操作为一个subquery，参照select_type，此计划未转换为JOIN处理，XPlan不支持。
Direct方式均可。

1.5 连接操作

CREATE TABLE t1 (s1 INT, s2 CHAR(5) NOT NULL) COMMENT "shard=sd1";
INSERT INTO t1 VALUES (1, 'abcde');
CREATE TABLE t2 (s1 INT, s2 CHAR(5) NOT NULL) COMMENT "shard=sd1";
INSERT INTO t2 VALUES (1, 'asdfg'); 
SELECT * FROM t1 INNER JOIN t2 ON t1.s1 = t2.s1;
SELECT * FROM t1 LEFT JOIN t2 ON t1.s1 = t2.s1;

create table n1(a int, b int, primary key(a)) COMMENT "shard=sd1";
create table n2(a int, b int, primary key(a)) COMMENT "shard=sd1";
SELECT * FROM n1 INNER JOIN n2 ON n1.a = n2.b;
SELECT * FROM n1 LEFT JOIN n2 ON n1.a = n2.b;

两者均支持。

1.6 集合操作

CREATE TABLE t1 (a INT, b INT) COMMENT "shard=sd1";
INSERT INTO t1 VALUES (4,-2), (5,9);
CREATE TABLE t2 (a INT, b INT) COMMENT "shard=sd1";
INSERT INTO t2 VALUES (1,2), (3,4);
SELECT * FROM t1 UNION SELECT * FROM t2;
SELECT * FROM t1 UNION ALL SELECT * FROM t2;
SELECT * FROM t1 MINUS SELECT * FROM t2;

SELECT sum(a) FROM t1 UNION SELECT count(b) FROM t2;
SELECT sum(a) FROM t1 UNION ALL SELECT count(b) FROM t2;
SELECT sum(a) FROM t1 MINUS SELECT count(b) FROM t2;

两者均支持。

2. 多shard查询

2.1 连接操作

支持inner join、outer join，分别介绍XPlan与Direct的不同。

2.1.1 分区表

分区表与分区表

XPlan与Direct支持join的约束条件如下：

XPlan	Direct
分区表的节点分布情况一致	分区表的节点分布情况一致
分区键等值连接	分区键等值连接
分区键属性相同：列类型、字符集、NULL/NOT NULL属性	分区键属性相同：列类型、字符集、NULL/NOT NULL属性
	分区键为主键或唯一键

create table t1(a int, b int)engine=greatdb PARTITION BY HASH(a) PARTITIONS 3;
create table t2(a int, b int)engine=greatdb PARTITION BY HASH(a) PARTITIONS 3;
select * from t1,t2 where t1.a=t2.a;         // XPlan-enable Direct-disable  
select * from t1 left join t2 on t1.a=t2.a;  // XPlan-enable Direct-disable  
select * from t1 right join t2 on t1.a=t2.a; // XPlan-enable Direct-disable  

create table h1(a int, b int, primary key(a))engine=greatdb PARTITION BY HASH(a) PARTITIONS 3;
create table h2(a int, b int, primary key(a))engine=greatdb PARTITION BY HASH(a) PARTITIONS 3;
select * from h1,h2 where h1.a=h2.a;         // XPlan-enable Direct-enable  
select * from h1 left join h2 on h1.a=h2.a;  // XPlan-enable Direct-disable  
select * from h1 right join h2 on h1.a=h2.a; // XPlan-enable Direct-disable

对于上述两种JOIN操作，XPlan均支持。
对于inner join，Direct仅支持分区键为主键的情况。
Direct不支持多节点分区表的outer join操作。

分区表与NORMAL表

create table h1(a int, b int, primary key(a))engine=greatdb PARTITION BY HASH(a) PARTITIONS 3;
insert into h1 values(1,1),(2,2),(3,3);
create table n1(a int, b int, primary key(a))engine=greatdb COMMENT "shard=sd1";
insert into n1 values(1,1),(2,2),(3,3);
explain select * from h1 INNER JOIN n1 on h1.a=n1.a;

多节点分区表与单节点NORMAL表节点信息不一致，均不支持下推。

2.1.2 GLOBAL表

GLOBAL表与分区表

create table h1(a int, b int, primary key(a))engine=greatdb PARTITION BY HASH(a) PARTITIONS 8;
create table g1(a int, b int, primary key(a))engine=greatdb comment "disttype=global";
insert into h1 values(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8);
insert into g1 select * from h1;
select * from h1 join g1 on h1.a=g1.a;       // XPlan-enable Direct-enable  
select * from h1 left join g1 on h1.a=g1.a;  // XPlan-enable Direct-disable  

create table h2(a int, b int)engine=greatdb PARTITION BY HASH(a) PARTITIONS 8;
create table g2(a int, b int)engine=greatdb comment "disttype=global";
insert into h2 values(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8);
insert into g2 select * from h2;             
select * from h2 join g2 on h2.a=g2.a;      // XPlan-enable Direct-enable  
select * from h1 left join g1 on h1.a=g1.a; // XPlan-enable Direct-disable

其中，g1、g2表为3节点GLOBAL表，分区表h1、h2为3节点分区表，节点集合均为3节点，其信息一致。
对于inner join，两者均支持下推。
Direct不支持多节点分区表的outer join操作。
GLOBAL left join 分区表，不支持下推，下推结果不对。

GLOBAL表与NORMAL表

create table g1(a int, b int)engine=greatdb comment "disttype=global";
create table n1(a int, b int)engine=greatdb;
insert into g1 values(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8);
insert into n1 select * from g1;
select * from n1 join g1 on g1.a=n1.a;       // XPlan-enable Direct-enable  
select * from n1 left join g1 on g1.a=n1.a;  // XPlan-enable Direct-enable

均支持下推。

GLOBAL表与GLOBAL表

create table g1(a int, b int)engine=greatdb comment "disttype=global";
create table g2(a int, b int)engine=greatdb comment "disttype=global";
insert into g1 values(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8);
insert into g2 select * from g1;
select * from g1 join g2 on g1.a=g2.a;       // XPlan-enable Direct-enable
select * from g1 left join g2 on g1.a=g2.a;  // XPlan-enable Direct-enable