MySQL 联接索引

原创 Kyle Hailey 2020-04-16

619

MySQL比Oracle示例更简单，因为MySQL仅具有嵌套循环，而没有哈希联接。

这是查询，就像上一篇文章中的Oracle示例一样

select max(t1.data) 
from  t1, t2  
where t1.id = t2.id 
and t1.clus  = 1

因此，下面只有4个示例全部由NL加入

在T1和T2上进行全表扫描
7.83秒
T1谓词过滤器列上的索引
7.39秒
T2连接列上的索引
0.49秒
T2连接列和T1谓词过滤器列上的索引
0.06秒

在MySQL 8.0.18之前没有在MySQL上进行“解释分析”的想法，而我在8.0.17上进行了测试，因此解释计划的成本是估计值而不是实际值，但是查询的耗时是实际值。

drop table seed;
CREATE TABLE seed ( id INT AUTO_INCREMENT PRIMARY KEY, val INT);
 insert into seed(val) values (1);
insert into seed(val) select val from seed;  /*  2 */
insert into seed(val) select val from seed;  /*  4 */
insert into seed(val) select val from seed;  /*  8 */
insert into seed(val) select val from seed;  /*  16 */
insert into seed(val) select val from seed;  /*  32 */
insert into seed(val) select val from seed;  /*  64 */
insert into seed(val) select val from seed;  /*  128 */
insert into seed(val) select val from seed;  /*  256 */
insert into seed(val) select val from seed;  /*  512 */
insert into seed(val) select val from seed;  /*  1024 */
insert into seed(val) select val from seed;  /*  2048 */
insert into seed(val) select val from seed;  /*  4096 */
insert into seed(val) select val from seed;  /*  8192 */
insert into seed(val) select val from seed;  /*  16384 */
insert into seed(val) select val from seed;  /*  32768 */
insert into seed(val) select val from seed;  /*  131072 */
insert into seed(val) select val from seed;  /*  262144 */
insert into seed(val) select val from seed;  /*  524288 */
insert into seed(val) select val from seed;  /*  1048576 */
 select count(*) from seed;

 drop table dummy;
 
  create table dummy ( 
    id  int,
    clus int,
    val int,
    data VARCHAR(40)
 );
 
 insert into dummy (
    id, clus , val, data
)
 select 
 id,
 truncate(id/169,0),
 mod(id,10000), 
 CONV(FLOOR(RAND() * 99999999999999), 10, 36) 
from seed
;

select count(*) from dummy where clus = 1;
+----------+
| count(*) |
+----------+
|      106 |
+----------+
select count(*) from dummy where val =1;
+----------+
| count(*) |
+----------+
|      106 |
+----------+

drop table t1;
drop table t2;

create table t1 as select * from dummy;
create table t2 as select * from dummy;

奇怪的是，种子表中的序列号中有间隔，因此截断中的奇异值得到106个值，即truncate（id / 169,0）。

首先，我们在没有索引的情况下运行查询（我已经创建了索引，因此在示例中使用“ USE INDEX（）”将其强制关闭）

两个节点都具有访问类型

"access_type": "ALL"

因此他们正在进行全表扫描。

explain format=json select max(t1.data) 
from 
t1 USE INDEX ()
, t2 USE INDEX ()
where t1.id = t2.id 
and t1.clus = 1
;

1 row in set (7.83 sec)

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "18102538.86"
    },
    "nested_loop": [
      {
        "table": {
          "table_name": "t1",
          "access_type": "ALL",
          "rows_examined_per_scan": 1045682,
          "rows_produced_per_join": 172,
          "filtered": "0.02",
          "cost_info": {
            "read_cost": "105376.24",
            "eval_cost": "17.21",
            "prefix_cost": "105393.45",
            "data_read_per_join": "29K"
          },
          "used_columns": [
            "id",
            "clus",
            "data"
          ],
          "attached_condition": "(`kyle`.`t1`.`clus` = 1)"
        }
      },
      {
        "table": {
          "table_name": "t2",
          "access_type": "ALL",
          "rows_examined_per_scan": 1045682,
          "rows_produced_per_join": 172,
          "filtered": "0.00",
          "using_join_buffer": "Block Nested Loop",
          "cost_info": {
            "read_cost": "917.35",
            "eval_cost": "17.21",
            "prefix_cost": "18102538.86",
            "data_read_per_join": "29K"
          },
          "used_columns": [
            "id"
          ],
          "attached_condition": "(`kyle`.`t2`.`id` = `kyle`.`t1`.`id`)"
        }
      }
    ]
  }
}

我们可以看到T1上的过滤器应该过滤大部分表（从1045682中抽出172行），所以现在在t1.clus上添加索引过滤器（我在t1上删除了“ USE INDEX（）”，所以T1上的索引是现在用于解释）。执行时间稍快一些，但与7.39和7.39相比并没有那么多。我们看到T1现在可以通过索引访问

explain format=json select max(t1.data) 
from  t1, t2  
USE INDEX ()
where t1.id = t2.id 
and t1.clus    = 1
;
 row in set (7.39 sec)

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "11085148.28"
    },
    "nested_loop": [
      {
        "table": {
          "table_name": "t1",
          "access_type": "ref",
          "possible_keys": [
            "t1_clus"
          ],
          "key": "t1_clus",
          "used_key_parts": [
            "clus"
          ],
          "key_length": "5",
          "ref": [
            "const"
          ],
          "rows_examined_per_scan": 106,
          "rows_produced_per_join": 106,
          "filtered": "100.00",
          "cost_info": {
            "read_cost": "26.50",
            "eval_cost": "10.60",
            "prefix_cost": "37.10",
            "data_read_per_join": "18K"
          },
          "used_columns": [
            "id",
            "clus",
            "data"
          ]
        }
      },
      {
        "table": {
          "table_name": "t2",
          "access_type": "ALL",
          "rows_examined_per_scan": 1045682,
          "rows_produced_per_join": 105,
          "filtered": "0.00",
          "using_join_buffer": "Block Nested Loop",
          "cost_info": {
            "read_cost": "881.98",
            "eval_cost": "10.60",
            "prefix_cost": "11085148.28",
            "data_read_per_join": "18K"
          },
          "used_columns": [
            "id"
          ],
          "attached_condition": "(`kyle`.`t2`.`id` = `kyle`.`t1`.`id`)"
        }
      }
    ]
  }
}

在T2上，我们查看了1045682后访问了105行，因此这是建立索引的好机会。现在添加索引或连接列，我们可以大大缩短执行时间。我们从7.39秒缩短至0.49或7.39 / 0.49，或提高了15倍

explain format=json select max(t1.data) 
from  t1 USE INDEX () , t2  
where t1.id = t2.id 
and t1.clus   = 1
;
1 row in set (0.49 sec)

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "105582.64"
    },
    "nested_loop": [
      {
        "table": {
          "table_name": "t1",
          "access_type": "ALL",
          "rows_examined_per_scan": 1045682,
          "rows_produced_per_join": 172,
          "filtered": "0.02",
          "cost_info": {
            "read_cost": "105376.24",
            "eval_cost": "17.21",
            "prefix_cost": "105393.45",
            "data_read_per_join": "29K"
          },
          "used_columns": [
            "id",
            "clus",
            "data"
          ],
          "attached_condition": "((`kyle`.`t1`.`clus` = 1) and (`kyle`.`t1`.`id` is not null))"
        }
      },
      {
        "table": {
          "table_name": "t2",
          "access_type": "ref",
          "possible_keys": [
            "t2_id"
          ],
          "key": "t2_id",
          "used_key_parts": [
            "id"
          ],
          "key_length": "5",
          "ref": [
            "kyle.t1.id"
          ],
          "rows_examined_per_scan": 1,
          "rows_produced_per_join": 172,
          "filtered": "100.00",
          "using_index": true,
          "cost_info": {
            "read_cost": "171.98",
            "eval_cost": "17.21",
            "prefix_cost": "105582.64",
            "data_read_per_join": "29K"
          },
          "used_columns": [
            "id"
          ]
        }
      }
    ]
  }
}

现在使用这两个索引，这使我们的执行时间减少了0.43倍，但是与之前的0.49 / 0.06相比，即8倍的改进，仍然相当不错

explain format=json select max(t1.data) 
from  t1, t2  
where t1.id = t2.id 
and t1.clus  = 1
;
1 row in set (0.06 sec)

{
  "query_block": {
    "select_id": 1,
    "cost_info": {
      "query_cost": "153.62"
    },
    "nested_loop": [
      {
        "table": {
          "table_name": "t1",
          "access_type": "ref",
          "possible_keys": [
            "t1_clus"
          ],
          "key": "t1_clus",
          "used_key_parts": [
            "clus"
          ],
          "key_length": "5",
          "ref": [
            "const"
          ],
          "rows_examined_per_scan": 106,
          "rows_produced_per_join": 106,
          "filtered": "100.00",
          "cost_info": {
            "read_cost": "26.50",
            "eval_cost": "10.60",
            "prefix_cost": "37.10",
            "data_read_per_join": "18K"
          },
          "used_columns": [
            "id",
            "clus",
            "data"
          ],
          "attached_condition": "(`kyle`.`t1`.`id` is not null)"
        }
      },
      {
        "table": {
          "table_name": "t2",
          "access_type": "ref",
          "possible_keys": [
            "t2_id"
          ],
          "key": "t2_id",
          "used_key_parts": [
            "id"
          ],
          "key_length": "5",
          "ref": [
            "kyle.t1.id"
          ],
          "rows_examined_per_scan": 1,
          "rows_produced_per_join": 106,
          "filtered": "100.00",
          "using_index": true,
          "cost_info": {
            "read_cost": "105.93",
            "eval_cost": "10.60",
            "prefix_cost": "153.63",
            "data_read_per_join": "18K"
          },
          "used_columns": [
            "id"
          ]
        }
      }
    ]
  }

原文链接：http://www.oaktable.net/content/indexes-joins-mysql

mysql

「喜欢这篇文章，您的关注和赞赏是给作者最好的鼓励」

关注作者

MySQL 联接索引

评论