MySQL 深潜 - MDL 锁的实现与获取机制
一 背景
二 基本概念
1 MDL_key
完整的字符串由 namespace、按层次每一级的名称组成,多种命名空间可以将不同类型的同名对象区分开。命名空间包括 GLOBAL、SCHEMA、TABLE、FUNCTION、PROCEDURE 等数据库中可以创建的不同对象类型组成。
2 enum_mdl_type
MDL_SHARED(S),可以共享访问对象的元数据,比如 SHOW CREATE TABLE 语句
MDL_SHARED_READ(SR),可以共享访问对象的数据,比如 SELECT 语句
MDL_SHARED_WRITE(SW),可以修改对象的数据,比如 INSERT / UPDATE 语句
MDL_SHARED_UPGRADABLE(SU),可升级的共享锁,后面可升级到更强的锁(比如 X 锁,阻塞并发访问),比如 DDL 的第一阶段
MDL_EXCLUSIVE(X),独占锁,阻塞其他线程对该对象的并发访问,可以修改对象的元数据,比如 DDL 的第二阶段
不同类型的 MDL 兼容性
1)范围锁
| Type of active |
Request | scoped lock |
type | IS(*) IX S X |
---------+------------------+
IS | + + + + |
IX | + + - - |
S | + - + - |
X | + - - - |
| Pending |
Request | scoped lock |
type | IS(*) IX S X |
---------+-----------------+
IS | + + + + |
IX | + + - - |
S | + + + - |
X | + + + + |
Here: "+" -- means that request can be satisfied
"-" -- means that request can't be satisfied and should wait
2)对象锁
Request | Granted requests for lock |
type | S SH SR SW SWLP SU SRO SNW SNRW X |
----------+---------------------------------------------+
S | + + + + + + + + + - |
SH | + + + + + + + + + - |
SR | + + + + + + + + - - |
SW | + + + + + + - - - - |
SWLP | + + + + + + - - - - |
SU | + + + + + - + - - - |
SRO | + + + - - + + + - - |
SNW | + + + - - - + - - - |
SNRW | + + - - - - - - - - |
X | - - - - - - - - - - |
Request | Pending requests for lock |
type | S SH SR SW SWLP SU SRO SNW SNRW X |
----------+--------------------------------------------+
S | + + + + + + + + + - |
SH | + + + + + + + + + + |
SR | + + + + + + + + - - |
SW | + + + + + + + - - - |
SWLP | + + + + + + - - - - |
SU | + + + + + + + + + - |
SRO | + + + - + + + + - - |
SNW | + + + + + + + + + - |
SNRW | + + + + + + + + + - |
X | + + + + + + + + + + |
Here: "+" -- means that request can be satisfied
"-" -- means that request can't be satisfied and should wait
/**
Check if ticket represents metadata lock of "stronger" or equal type
than specified one. I.e. if metadata lock represented by ticket won't
allow any of locks which are not allowed by specified type of lock.
@return true if ticket has stronger or equal type
false otherwise.
*/
bool MDL_ticket::has_stronger_or_equal_type(enum_mdl_type type) const {
const MDL_lock::bitmap_t *granted_incompat_map =
m_lock->incompatible_granted_types_bitmap();
return !(granted_incompat_map[type] & ~(granted_incompat_map[m_type]));
}
三 重要数据结构
1 关系示意图
2 MDL_request
3 MDL_lock
4 MDL_ticket
5 MDL_context
MDL_ticket_store::MDL_ticket_handle MDL_ticket_store::find(
const MDL_request &req) const {
if (m_count >= THRESHOLD) {
MDL_ticket_handle list_h = find_in_lists(req);
MDL_ticket_handle hash_h = find_in_hash(req);
DBUG_ASSERT(equivalent(list_h.m_ticket, hash_h.m_ticket, req.duration));
}
return (m_map == nullptr || m_count < THRESHOLD) ? find_in_lists(req)
: find_in_hash(req);
}
四 MDL 获取过程
PT_table_factor_table_ident::contextualize()
|--SELECT_LEX::add_table_to_list()
|--MDL_REQUEST_INIT -> MDL_request::init_with_source()
open_tables_for_query()
|--open_table() // 循环打开每一个表
|--open_table_get_mdl_lock()
|--MDL_context::acquire_lock() // 获取lock,如果遇到锁冲突,那么等待冲突的锁被释放
|--MDL_context::try_acquire_lock_impl()
1 MDL_context::try_acquire_lock_impl
Unobtrusive(fast path)
/**
Array of increments for "unobtrusive" types of lock requests for
per-object locks.
@sa MDL_lock::get_unobtrusive_lock_increment().
For per-object locks:
- "unobtrusive" types: S, SH, SR and SW
- "obtrusive" types: SU, SRO, SNW, SNRW, X
Number of locks acquired using "fast path" are encoded in the following
bits of MDL_lock::m_fast_path_state:
- bits 0 .. 19 - S and SH (we don't differentiate them once acquired)
- bits 20 .. 39 - SR
- bits 40 .. 59 - SW and SWLP (we don't differentiate them once acquired)
Overflow is not an issue as we are unlikely to support more than 2^20 - 1
concurrent connections in foreseeable future.
This encoding defines the below contents of increment array.
*/
{0, 1, 1, 1ULL << 20, 1ULL << 40, 1ULL << 40, 0, 0, 0, 0, 0},
/**
@returns "Fast path" increment for request for "unobtrusive" type
of lock, 0 - if it is request for "obtrusive" type of
lock.
@sa Description at method declaration for more details.
*/
MDL_lock::fast_path_state_t MDL_lock::get_unobtrusive_lock_increment(
const MDL_request *request) {
return MDL_lock::get_strategy(request->key)
->m_unobtrusive_lock_increment[request->type];
}
MDL_lock::fast_path_state_t old_state = lock->m_fast_path_state;
do {
/*
Check if hash look-up returned object marked as destroyed or
it was marked as such while it was pinned by us. If yes we
need to unpin it and retry look-up.
*/
if (old_state & MDL_lock::IS_DESTROYED) {
if (pinned) lf_hash_search_unpin(m_pins);
goto retry;
}
/*
Check that there are no granted/pending "obtrusive" locks and nobody
even is about to try to check if such lock can be acquired.
In these cases we need to take "slow path".
*/
if (old_state & MDL_lock::HAS_OBTRUSIVE) goto slow_path;
} while (!lock->fast_path_state_cas(
&old_state, old_state + unobtrusive_lock_increment));
/*
Since this MDL_ticket is not visible to any threads other than
the current one, we can set MDL_ticket::m_lock member without
protect of MDL_lock::m_rwlock. MDL_lock won't be deleted
underneath our feet as MDL_lock::m_fast_path_state serves as
reference counter in this case.
*/
ticket->m_lock = lock;
ticket->m_is_fast_path = true;
m_ticket_store.push_front(mdl_request->duration, ticket);
mdl_request->ticket = ticket;
mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED);
Obtrusive(slow path)
/**
"Materialize" requests for locks which were satisfied using
"fast path" by properly including them into corresponding
MDL_lock::m_granted bitmaps/lists and removing it from
packed counter in MDL_lock::m_fast_path_state.
*/
void MDL_context::materialize_fast_path_locks() {
int i;
for (i = 0; i < MDL_DURATION_END; i++) {
MDL_ticket_store::List_iterator it = m_ticket_store.list_iterator(i);
MDL_ticket *matf = m_ticket_store.materialized_front(i);
for (MDL_ticket *ticket = it++; ticket != matf; ticket = it++) {
if (ticket->m_is_fast_path) {
MDL_lock *lock = ticket->m_lock;
MDL_lock::fast_path_state_t unobtrusive_lock_increment =
lock->get_unobtrusive_lock_increment(ticket->get_type());
ticket->m_is_fast_path = false;
mysql_prlock_wrlock(&lock->m_rwlock);
lock->m_granted.add_ticket(ticket);
/*
Atomically decrement counter in MDL_lock::m_fast_path_state.
This needs to happen under protection of MDL_lock::m_rwlock to make
it atomic with addition of ticket to MDL_lock::m_granted list and
to enforce invariant [INV1].
*/
MDL_lock::fast_path_state_t old_state = lock->m_fast_path_state;
while (!lock->fast_path_state_cas(
&old_state, ((old_state - unobtrusive_lock_increment) |
MDL_lock::HAS_SLOW_PATH))) {
}
mysql_prlock_unlock(&lock->m_rwlock);
}
}
}
m_ticket_store.set_materialized();
}
bool MDL_lock::can_grant_lock(enum_mdl_type type_arg,
const MDL_context *requestor_ctx) const {
bool can_grant = false;
bitmap_t waiting_incompat_map = incompatible_waiting_types_bitmap()[type_arg];
bitmap_t granted_incompat_map = incompatible_granted_types_bitmap()[type_arg];
/*
New lock request can be satisfied iff:
- There are no incompatible types of satisfied requests
in other contexts
- There are no waiting requests which have higher priority
than this request.
*/
if (!(m_waiting.bitmap() & waiting_incompat_map)) {
if (!(fast_path_granted_bitmap() & granted_incompat_map)) {
if (!(m_granted.bitmap() & granted_incompat_map))
can_grant = true;
else {
Ticket_iterator it(m_granted);
MDL_ticket *ticket;
/*
There is an incompatible lock. Check that it belongs to some
other context.
*/
while ((ticket = it++)) {
if (ticket->get_ctx() != requestor_ctx &&
ticket->is_incompatible_when_granted(type_arg))
break;
}
if (ticket == NULL) /* Incompatible locks are our own. */
can_grant = true;
}
}
}
return can_grant;
}
2 锁等待和通知
// WS_EMPTY since EMPTY conflicts with #define in system headers on some
// platforms.
enum enum_wait_status { WS_EMPTY = 0, GRANTED, VICTIM, TIMEOUT, KILLED };
GRANTED,该线程获取到了等待的 MDL 锁
VICTIM,该线程作为死锁的受害者,要求重新执行事务
TIMEOUT,等待超时
KILLED,该线程在等待过程中被 kill 掉
/**
Wait for the status to be assigned to this wait slot.
*/
MDL_wait::enum_wait_status MDL_wait::timed_wait(
MDL_context_owner *owner, struct timespec *abs_timeout,
bool set_status_on_timeout, const PSI_stage_info *wait_state_name) {
enum_wait_status result;
int wait_result = 0;
mysql_mutex_lock(&m_LOCK_wait_status);
while (!m_wait_status && !owner->is_killed() && !is_timeout(wait_result)) {
wait_result = mysql_cond_timedwait(&m_COND_wait_status, &m_LOCK_wait_status,
abs_timeout);
}
if (m_wait_status == WS_EMPTY) {
if (owner->is_killed())
m_wait_status = KILLED;
else if (set_status_on_timeout)
m_wait_status = TIMEOUT;
}
result = m_wait_status;
mysql_mutex_unlock(&m_LOCK_wait_status);
return result;
}
void MDL_lock::reschedule_waiters() {
MDL_lock::Ticket_iterator it(m_waiting);
MDL_ticket *ticket;
while ((ticket = it++)) {
if (can_grant_lock(ticket->get_type(), ticket->get_ctx())) {
if (!ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED)) {
m_waiting.remove_ticket(ticket);
m_granted.add_ticket(ticket);
...
/**
Set the status unless it's already set. Return false if set,
true otherwise.
*/
bool MDL_wait::set_status(enum_wait_status status_arg) {
bool was_occupied = true;
mysql_mutex_lock(&m_LOCK_wait_status);
if (m_wait_status == WS_EMPTY) {
was_occupied = false;
m_wait_status = status_arg;
mysql_cond_signal(&m_COND_wait_status);
}
mysql_mutex_unlock(&m_LOCK_wait_status);
return was_occupied;
}
3 死锁检测
/** Inform the deadlock detector there is an edge in the wait-for graph. */
void will_wait_for(MDL_wait_for_subgraph *waiting_for_arg) {
/*
Before starting wait for any resource we need to materialize
all "fast path" tickets belonging to this thread. Otherwise
locks acquired which are represented by these tickets won't
be present in wait-for graph and could cause missed deadlocks.
It is OK for context which doesn't wait for any resource to
have "fast path" tickets, as such context can't participate
in any deadlock.
*/
materialize_fast_path_locks();
mysql_prlock_wrlock(&m_LOCK_waiting_for);
m_waiting_for = waiting_for_arg;
mysql_prlock_unlock(&m_LOCK_waiting_for);
}
MDL_wait_for_subgraph
Deadlock_detection_visitor
/**
Inspect a wait-for graph edge from one MDL context to another.
@retval true A loop is found.
@retval false No loop is found.
*/
bool Deadlock_detection_visitor::inspect_edge(MDL_context *node) {
m_found_deadlock = node == m_start_node;
return m_found_deadlock;
}
/**
Change the deadlock victim to a new one if it has lower deadlock
weight.
@param new_victim New candidate for deadlock victim.
*/
void Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim) {
if (m_victim == NULL ||
m_victim->get_deadlock_weight() >= new_victim->get_deadlock_weight()) {
/* Swap victims, unlock the old one. */
MDL_context *tmp = m_victim;
m_victim = new_victim;
m_victim->lock_deadlock_victim();
if (tmp) tmp->unlock_deadlock_victim();
}
}
检测过程
MDL_context::find_deadlock()
|--MDL_context::visit_subgraph(MDL_wait_for_graph_visitor *) // 如果存在m_waiting_for的话,调用对应ticket的accept_visitor()
|--MDL_ticket::accept_visitor(MDL_wait_for_graph_visitor *) // 根据对应MDL_lock的锁获取情况检测
|--MDL_lock::visit_subgraph() // 递归遍历锁的授予链表(m_granted)和等待链表(m_waiting)去判断是否存在等待起始节点(死锁)情况
// 依次递归授予链表和等待链表的MDL_context来寻找死锁
|--Deadlock_detection_visitor::enter_node() // 首先进入当前节点
|--遍历授予链表(m_granted),判断兼容性
|--如果不兼容的话,调用Deadlock_detection_visitor::inspect_edge()判断是否死锁
|--遍历等待链表(m_waiting),同上
|--遍历授予链表,判断兼容性
|--如果不兼容的话,递归调用MDL_context::visit_subgraph()寻找连通子图。如果线程等待的ticket已经有明确的状态,非WS_EMPTY,可以直接返回
|--遍历等待链表,同上
|--Deadlock_detection_visitor::leave_node() // 离开当前节点
/*
We do a breadth-first search first -- that is, inspect all
edges of the current node, and only then follow up to the next
node. In workloads that involve wait-for graph loops this
has proven to be a more efficient strategy [citation missing].
*/
while ((ticket = granted_it++)) {
/* Filter out edges that point to the same node. */
if (ticket->get_ctx() != src_ctx &&
ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
gvisitor->inspect_edge(ticket->get_ctx())) {
goto end_leave_node;
}
}
...
/* Recurse and inspect all adjacent nodes. */
granted_it.rewind();
while ((ticket = granted_it++)) {
if (ticket->get_ctx() != src_ctx &&
ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
ticket->get_ctx()->visit_subgraph(gvisitor)) {
goto end_leave_node;
}
}
...
受害者权重
DEADLOCK_WEIGHT_DML,DML 类型语句的权重最小为 0
DEADLOCK_WEIGHT_ULL,用户手动上锁的权重居中为 50
DEADLOCK_WEIGHT_DDL,DDL 类型语句的权重最大为 100
五 MDL 监控
[mysqld]
performance_schema = ON
UPDATE performance_schema.setup_instruments
SET ENABLED = 'YES', TIMED = 'YES'
WHERE NAME = 'wait/lock/metadata/sql/mdl';
connect-1 > BEGIN; |
Query OK, 0 rows affected (0.00 sec) |
|
connect-1 > SELECT * FROM t1; |
+------+------+------+ |
| a | b | c | |
+------+------+------+ |
| 1 | 2 | 3 | |
| 4 | 5 | 6 | |
+------+------+------+ |
2 rows in set (0.00 sec) | # DDL will hang
| connect-2 > ALTER TABLE t1 ADD INDEX i1(a);
mysql > SELECT * FROM performance_schema.metadata_locks;
+-------------+--------------------+----------------+-------------+-----------------------+---------------------+---------------+-------------+--------------------+-----------------+----------------+
| OBJECT_TYPE | OBJECT_SCHEMA | OBJECT_NAME | COLUMN_NAME | OBJECT_INSTANCE_BEGIN | LOCK_TYPE | LOCK_DURATION | LOCK_STATUS | SOURCE | OWNER_THREAD_ID | OWNER_EVENT_ID |
+-------------+--------------------+----------------+-------------+-----------------------+---------------------+---------------+-------------+--------------------+-----------------+----------------+
| TABLE | test | t1 | NULL | 140734873224192 | SHARED_READ | TRANSACTION | GRANTED | sql_parse.cc:6759 | 68 | 23 |
| GLOBAL | NULL | NULL | NULL | 140734862726080 | INTENTION_EXCLUSIVE | STATEMENT | GRANTED | sql_base.cc:6137 | 69 | 6 |
| SCHEMA | test | NULL | NULL | 140734862726240 | INTENTION_EXCLUSIVE | TRANSACTION | GRANTED | sql_base.cc:6124 | 69 | 6 |
| TABLE | test | t1 | NULL | 140734862726400 | SHARED_UPGRADABLE | TRANSACTION | GRANTED | sql_parse.cc:6759 | 69 | 6 |
| BACKUP LOCK | NULL | NULL | NULL | 140734862726560 | INTENTION_EXCLUSIVE | TRANSACTION | GRANTED | sql_base.cc:6144 | 69 | 6 |
| TABLESPACE | NULL | test/t1 | NULL | 140734862727040 | INTENTION_EXCLUSIVE | TRANSACTION | GRANTED | lock.cc:811 | 69 | 6 |
| TABLE | test | #sql-5a52_a | NULL | 140734862726720 | EXCLUSIVE | STATEMENT | GRANTED | sql_table.cc:17089 | 69 | 6 |
| TABLE | test | t1 | NULL | 140734862726880 | EXCLUSIVE | TRANSACTION | PENDING | mdl.cc:4337 | 69 | 6 |
| TABLE | performance_schema | metadata_locks | NULL | 140734887891904 | SHARED_READ | TRANSACTION | GRANTED | sql_parse.cc:6759 | 67 | 4 |
+-------------+--------------------+----------------+-------------+-----------------------+---------------------+---------------+-------------+--------------------+-----------------+----------------+
9 rows in set (0.00 sec)
六 PolarDB 在 MDL 上的优化
七 参考
[2] MySQL · 源码分析 · 常用SQL语句的MDL加锁源码分析:http://mysql.taobao.org/monthly/2018/02/01/
阿里巴巴前端委员会推荐,带你把握最新前端技术趋势!《2022 前端技术趋势解读》由阿里巴巴前端委员会推荐,多位前端技术专家倾情分享的一本电子手册。本册电子手册主要通过对五大领域技术实战及多样化技术趋势分享,让我们聚焦体验,回归初心,结合具体业务实践、技术探索,和一线技术专家一起,共同交流前端在体验方向的发展及经验,带大家把握最新前端技术趋势。点击阅读原文查看详情!