MySQL优化 — exists与in谁快谁慢?
-- in语句
select a.* from aaa a where a.id in ( select id from bbb b);
-- exists语句
select a.* from aaa a where exists (select null from bbb b where b.id=a.id);
-
IN操作相当于对inner table执行一个带distinct的子查询,然后得到的查询结果集再与outer table进行连接,连接方式的索引的使用就等同于普通的两表之间的连接。 -
EXISTS操作相当于对outer table进行全表扫描,用从中检索到的每一行与inner table做循环匹配输出响应的符合条件的结果,其主要开销是对outer table的全表扫描,exists()会执行a.length次,它不缓存exists()的结果集,因为其结果集不重要,重要的是结果集中是否有记录。
/* 子表数据量小,外表数据量大*/
/* FirstMatch是mysql在处理半连接的时候使用的一种优化策略 */
mysql> explain select count(*) from salaries a where a.emp_no in (select b.emp_no from employees b) ;
+----+-------------+-------+------------+-------+---------------+---------+---------+--------------------+--------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+-------+---------------+---------+---------+--------------------+--------+----------+-------------+
| 1 | SIMPLE | b | NULL | index | PRIMARY | PRIMARY | 4 | NULL | 299113 | 100.00 | Using index |
| 1 | SIMPLE | a | NULL | ref | PRIMARY | PRIMARY | 4 | employees.b.emp_no | 9 | 100.00 | Using index |
+----+-------------+-------+------------+-------+---------------+---------+---------+--------------------+--------+----------+-------------+
2 rows in set, 1 warning (0.00 sec)
mysql> select count(*) from salaries a where a.emp_no in (select b.emp_no from employees b) ;
+----------+
| count(*) |
+----------+
| 2844047 |
+----------+
1 row in set (1.29 sec)
/* 子表数据量小,外表数据量大*/
mysql> explain select count(*) from salaries a where exists (select 1 from employees b where b.emp_no=a.emp_no);
+----+--------------------+-------+------------+--------+---------------+---------+---------+--------------------+---------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+-------+------------+--------+---------------+---------+---------+--------------------+---------+----------+--------------------------+
| 1 | PRIMARY | a | NULL | index | NULL | PRIMARY | 7 | NULL | 2838426 | 100.00 | Using where; Using index |
| 2 | DEPENDENT SUBQUERY | b | NULL | eq_ref | PRIMARY | PRIMARY | 4 | employees.a.emp_no | 1 | 100.00 | Using index |
+----+--------------------+-------+------------+--------+---------------+---------+---------+--------------------+---------+----------+--------------------------+
2 rows in set, 2 warnings (0.00 sec)
mysql> select count(*) from salaries a where exists (select 1 from employees b where b.emp_no=a.emp_no);
+----------+
| count(*) |
+----------+
| 2844047 |
+----------+
1 row in set (2.82 sec)
/* 子表与外表数量差不多,30w*/
mysql> select count(*) from employees a where exists (select 1 from dept_emp b where b.emp_no=a.emp_no) ;
+----------+
| count(*) |
+----------+
| 300024 |
+----------+
1 row in set (0.35 sec)
/* 子表与外表数量差不多,30w*/
mysql> select count(*) from employees a where a.emp_no in (select b.emp_no from dept_emp b) ;
+----------+
| count(*) |
+----------+
| 300024 |
+----------+
1 row in set (0.26 sec)
/* 外表数量少,内表数量多*/
mysql> select count(*) from dept_emp_part a where a.emp_no in (select emp_no from employees b);
+----------+
| count(*) |
+----------+
| 52245 |
+----------+
1 row in set (0.17 sec)
mysql> select count(*) from dept_emp_part a where exists (select null from employees b where a.emp_no=b.emp_no);
+----------+
| count(*) |
+----------+
| 52245 |
+----------+
1 row in set (0.07 sec)
子表数据量比外表数据量少,使用in效率更高。
子表数据量比外表数据量大,使用exists;但是注意到in语句优化器会将半连接转化成内连接(可以转化成内连接查看下执行计划),此时使用的仍然是子表的索引。
子表与外表数据量大小差不多,in与exists效率差别不大,但是总的还是in稍快点,因为in在内存里操作。
5.7版本in语句优化器会将半连接转化成内连接,in == 内连接+FirstMatch
外循环的数量越少越好
# Time: 2020-11-13T02:52:51.749743Z
# Query_time: 17.635067 Lock_time: 0.000626 Rows_sent: 1 Rows_examined: 70501201
SET timestamp=1605235971;
SELECT
bp.id package_id,
bm.id bid_id,
... -- 此处省略N多标量子查询字段
FROM
bid_package bpInner
LEFT JOIN lib_agencys la ON bpInner.agent_id = la.id
LEFT JOIN sys_company sc ON la.company_id = sc.id
LEFT JOIN sys_company_depart scd ON bpInner.pm_dept_id = scd.id
WHERE
bpInner.purchase_mode IN ( '10', '20', '30', '40', '50', '70', '80', '90' )
AND bpInner.is_auth_dept_inner = '1'
) t
GROUP BY
package_id
) j ON j.package_id = bp.id
LEFT JOIN (
SELECT
bpInner.id package_id,
GROUP_CONCAT( sc.company_name ) candidate_suppliers
FROM
bgg_invite_bidder bib,
bgg_invite_main bim,
bid_package bpInner,
sys_company sc
WHERE
1 = 1
AND bpInner.id = bim.package_id
AND bim.id = bib.invite_main_id
AND bib.supplier_cp_id = sc.id
AND bpInner.is_deleted = '0'
AND bpInner.is_auth_dept_inner='1'
AND bim.is_deleted = '0'
AND bib.is_deleted = '0'
AND sc.is_deleted = '0'
AND bim.invite_status IN ( '25', '26', '32' )
GROUP BY
bpInner.id
) cs ON bp.id = cs.package_id
WHERE
bm.is_deleted = '0'
AND bp.is_deleted = '0'
AND bm.is_auth_dept_inner = '1'
AND bm.id = bp.bid_id
and (bm.create_user_id = xxxxx or EXISTS (SELECT 1 FROM bid_main_operator bmo WHERE bmo.is_deleted=0 AND bmo.package_id=bp.id AND bmo.operator_id=xxxxx))
ORDER BY bp.Create_time DESC LIMIT 50;
bid_main_operator(operator_id)创建辅助索引;
exists改写为in
优化效果