Skip to content

Commit 337bf8a

Browse files
committed
MDEV-36122 Assertion ctx0->old_table->get_ref_count() == 1
trx_purge_close_tables(): Before releasing any metadata locks (MDL), release all table references, in case an ALTER TABLE…ALGORITHM=COPY operation has confused our logic. trx_purge_table_acquire(), trx_purge_table_open(): Do not acquire any table reference before successfully acquiring a necessary metadata lock. In this way, if purge is waiting for MDL, a concurrent ha_innobase::commit_inplace_alter_table(commit=true) that is holding a conflicting MDL_EXCLUSIVE will only observe its own reference on the table that it may need to replace. dict_acquire_mdl_shared<false>(): Unless we hold an MDL or one is not needed, do not hold a table reference when releasing dict_sys.latch. After loading a table into the dictionary cache, we will look up the table again, because the table could be evicted or dropped while we were not holding any dict_sys.latch. Reviewed by: Debarun Banerjee
1 parent 1f4a901 commit 337bf8a

File tree

2 files changed

+84
-66
lines changed

2 files changed

+84
-66
lines changed

storage/innobase/dict/dict0dict.cc

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -694,35 +694,37 @@ dict_acquire_mdl_shared(dict_table_t *table,
694694
MDL_context *mdl_context, MDL_ticket **mdl,
695695
dict_table_op_t table_op)
696696
{
697-
table_id_t table_id= table->id;
698697
char db_buf[NAME_LEN + 1], db_buf1[NAME_LEN + 1];
699698
char tbl_buf[NAME_LEN + 1], tbl_buf1[NAME_LEN + 1];
700699
size_t db_len, tbl_len;
701-
bool unaccessible= false;
702700

703701
if (!table->parse_name<!trylock>(db_buf, tbl_buf, &db_len, &tbl_len))
704702
/* The name of an intermediate table starts with #sql */
705703
return table;
706704

707705
retry:
708-
if (!unaccessible && (!table->is_readable() || table->corrupted))
706+
ut_ad(!trylock == dict_sys.frozen());
707+
ut_ad(trylock || table->get_ref_count());
708+
709+
if (!table->is_readable() || table->corrupted)
709710
{
711+
if (!trylock)
712+
table->release();
710713
if (*mdl)
711714
{
712715
mdl_context->release_lock(*mdl);
713716
*mdl= nullptr;
714717
}
715-
unaccessible= true;
718+
return nullptr;
716719
}
717720

718-
if (!trylock)
719-
table->release();
720-
721-
if (unaccessible)
722-
return nullptr;
721+
const table_id_t table_id{table->id};
723722

724723
if (!trylock)
724+
{
725+
table->release();
725726
dict_sys.unfreeze();
727+
}
726728

727729
{
728730
MDL_request request;
@@ -748,43 +750,53 @@ dict_acquire_mdl_shared(dict_table_t *table,
748750
}
749751
}
750752

753+
size_t db1_len, tbl1_len;
754+
lookup:
751755
dict_sys.freeze(SRW_LOCK_CALL);
752756
table= dict_sys.find_table(table_id);
753757
if (table)
758+
{
759+
if (!table->is_accessible())
760+
{
761+
table= nullptr;
762+
unlock_and_return_without_mdl:
763+
if (trylock)
764+
dict_sys.unfreeze();
765+
return_without_mdl:
766+
if (*mdl)
767+
{
768+
mdl_context->release_lock(*mdl);
769+
*mdl= nullptr;
770+
}
771+
return table;
772+
}
773+
754774
table->acquire();
755-
if (!table && table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED)
775+
776+
if (!table->parse_name<true>(db_buf1, tbl_buf1, &db1_len, &tbl1_len))
777+
{
778+
/* The table was renamed to #sql prefix.
779+
Release MDL (if any) for the old name and return. */
780+
goto unlock_and_return_without_mdl;
781+
}
782+
}
783+
else if (table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED)
756784
{
757785
dict_sys.unfreeze();
758786
dict_sys.lock(SRW_LOCK_CALL);
759787
table= dict_load_table_on_id(table_id,
760788
table_op == DICT_TABLE_OP_LOAD_TABLESPACE
761789
? DICT_ERR_IGNORE_RECOVER_LOCK
762790
: DICT_ERR_IGNORE_FK_NOKEY);
763-
if (table)
764-
table->acquire();
765791
dict_sys.unlock();
766-
dict_sys.freeze(SRW_LOCK_CALL);
767-
}
768-
769-
if (!table || !table->is_accessible())
770-
{
771-
return_without_mdl:
772-
if (trylock)
773-
dict_sys.unfreeze();
774-
if (*mdl)
775-
{
776-
mdl_context->release_lock(*mdl);
777-
*mdl= nullptr;
778-
}
779-
return nullptr;
780-
}
781-
782-
size_t db1_len, tbl1_len;
783-
784-
if (!table->parse_name<true>(db_buf1, tbl_buf1, &db1_len, &tbl1_len))
785-
{
786-
/* The table was renamed to #sql prefix.
787-
Release MDL (if any) for the old name and return. */
792+
/* At this point, the freshly loaded table may already have been evicted.
793+
We must look it up again while holding a shared dict_sys.latch. We keep
794+
trying this until the table is found in the cache or it cannot be found
795+
in the dictionary (because the table has been dropped or rebuilt). */
796+
if (table)
797+
goto lookup;
798+
if (!trylock)
799+
dict_sys.freeze(SRW_LOCK_CALL);
788800
goto return_without_mdl;
789801
}
790802

storage/innobase/trx/trx0purge.cc

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,16 +1051,25 @@ inline trx_purge_rec_t purge_sys_t::fetch_next_rec()
10511051
/** Close all tables that were opened in a purge batch for a worker.
10521052
@param node purge task context
10531053
@param thd purge coordinator thread handle */
1054-
static void trx_purge_close_tables(purge_node_t *node, THD *thd)
1054+
static void trx_purge_close_tables(purge_node_t *node, THD *thd) noexcept
10551055
{
10561056
for (auto &t : node->tables)
10571057
{
1058-
if (!t.second.first);
1059-
else if (t.second.first == reinterpret_cast<dict_table_t*>(-1));
1060-
else
1058+
dict_table_t *table= t.second.first;
1059+
if (table != nullptr && table != reinterpret_cast<dict_table_t*>(-1))
1060+
dict_table_close(table);
1061+
}
1062+
1063+
MDL_context *mdl_context= static_cast<MDL_context*>(thd_mdl_context(thd));
1064+
1065+
for (auto &t : node->tables)
1066+
{
1067+
dict_table_t *table= t.second.first;
1068+
if (table != nullptr && table != reinterpret_cast<dict_table_t*>(-1))
10611069
{
1062-
dict_table_close(t.second.first, false, thd, t.second.second);
10631070
t.second.first= reinterpret_cast<dict_table_t*>(-1);
1071+
if (mdl_context != nullptr && t.second.second != nullptr)
1072+
mdl_context->release_lock(t.second.second);
10641073
}
10651074
}
10661075
}
@@ -1072,44 +1081,43 @@ void purge_sys_t::wait_FTS(bool also_sys)
10721081
}
10731082

10741083
__attribute__((nonnull))
1075-
/** Aqcuire a metadata lock on a table.
1084+
/** Acquire a metadata lock on a table.
10761085
@param table table handle
10771086
@param mdl_context metadata lock acquisition context
1078-
@param mdl metadata lcok
1087+
@param mdl metadata lock
10791088
@return table handle
10801089
@retval nullptr if the table is not found or accessible
10811090
@retval -1 if the purge of history must be suspended due to DDL */
10821091
static dict_table_t *trx_purge_table_acquire(dict_table_t *table,
10831092
MDL_context *mdl_context,
1084-
MDL_ticket **mdl)
1093+
MDL_ticket **mdl) noexcept
10851094
{
10861095
ut_ad(dict_sys.frozen_not_locked());
10871096
*mdl= nullptr;
10881097

10891098
if (!table->is_readable() || table->corrupted)
1090-
{
1091-
table->release();
10921099
return nullptr;
1093-
}
10941100

10951101
size_t db_len= dict_get_db_name_len(table->name.m_name);
10961102
if (db_len == 0)
1097-
return table; /* InnoDB system tables are not covered by MDL */
1103+
{
1104+
/* InnoDB system tables are not covered by MDL */
1105+
got_table:
1106+
table->acquire();
1107+
return table;
1108+
}
10981109

10991110
if (purge_sys.must_wait_FTS())
1100-
{
11011111
must_wait:
1102-
table->release();
11031112
return reinterpret_cast<dict_table_t*>(-1);
1104-
}
11051113

11061114
char db_buf[NAME_LEN + 1];
11071115
char tbl_buf[NAME_LEN + 1];
11081116
size_t tbl_len;
11091117

11101118
if (!table->parse_name<true>(db_buf, tbl_buf, &db_len, &tbl_len))
11111119
/* The name of an intermediate table starts with #sql */
1112-
return table;
1120+
goto got_table;
11131121

11141122
{
11151123
MDL_request request;
@@ -1122,37 +1130,38 @@ static dict_table_t *trx_purge_table_acquire(dict_table_t *table,
11221130
goto must_wait;
11231131
}
11241132

1125-
return table;
1133+
goto got_table;
11261134
}
11271135

11281136
/** Open a table handle for the purge of committed transaction history
11291137
@param table_id InnoDB table identifier
11301138
@param mdl_context metadata lock acquisition context
1131-
@param mdl metadata lcok
1139+
@param mdl metadata lock
11321140
@return table handle
11331141
@retval nullptr if the table is not found or accessible
11341142
@retval -1 if the purge of history must be suspended due to DDL */
11351143
static dict_table_t *trx_purge_table_open(table_id_t table_id,
11361144
MDL_context *mdl_context,
1137-
MDL_ticket **mdl)
1145+
MDL_ticket **mdl) noexcept
11381146
{
1139-
dict_sys.freeze(SRW_LOCK_CALL);
1140-
1141-
dict_table_t *table= dict_sys.find_table(table_id);
1147+
dict_table_t *table;
11421148

1143-
if (table)
1144-
table->acquire();
1145-
else
1149+
for (;;)
11461150
{
1151+
dict_sys.freeze(SRW_LOCK_CALL);
1152+
table= dict_sys.find_table(table_id);
1153+
if (table)
1154+
break;
11471155
dict_sys.unfreeze();
11481156
dict_sys.lock(SRW_LOCK_CALL);
11491157
table= dict_load_table_on_id(table_id, DICT_ERR_IGNORE_FK_NOKEY);
1150-
if (table)
1151-
table->acquire();
11521158
dict_sys.unlock();
11531159
if (!table)
11541160
return nullptr;
1155-
dict_sys.freeze(SRW_LOCK_CALL);
1161+
/* At this point, the freshly loaded table may already have been evicted.
1162+
We must look it up again while holding a shared dict_sys.latch. We keep
1163+
trying this until the table is found in the cache or it cannot be found
1164+
in the dictionary (because the table has been dropped or rebuilt). */
11561165
}
11571166

11581167
table= trx_purge_table_acquire(table, mdl_context, mdl);
@@ -1171,10 +1180,7 @@ dict_table_t *purge_sys_t::close_and_reopen(table_id_t id, THD *thd,
11711180

11721181
for (que_thr_t *thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr;
11731182
thr= UT_LIST_GET_NEXT(thrs, thr))
1174-
{
1175-
purge_node_t *node= static_cast<purge_node_t*>(thr->child);
1176-
trx_purge_close_tables(node, thd);
1177-
}
1183+
trx_purge_close_tables(static_cast<purge_node_t*>(thr->child), thd);
11781184

11791185
m_active= false;
11801186
wait_FTS(false);

0 commit comments

Comments
 (0)