Skip to content

Commit d217a92

Browse files
author
Jan Lindström
committed
MDEV-24923 : Port selected Galera conflict resolution changes from 10.6
Add condition on trx->state == TRX_STATE_COMMITTED_IN_MEMORY in order to avoid unnecessary work. If a transaction has already been committed or rolled back, it will release its locks in lock_release() and let the waiting thread(s) continue execution. Let BF wait on lock_rec_has_to_wait and if necessary other BF is replayed. wsrep_trx_order_before If BF is not even replicated yet then they are ordered correctly. bg_wsrep_kill_trx Make sure victim_trx is found and check also its state. If state is TRX_STATE_COMMITTED_IN_MEMORY transaction is already committed or rolled back and will release it locks soon. wsrep_assert_no_bf_bf_wait Transaction requesting new record lock should be TRX_STATE_ACTIVE Conflicting transaction can be in states TRX_STATE_ACTIVE, TRX_STATE_COMMITTED_IN_MEMORY or in TRX_STATE_PREPARED. If conflicting transaction is already committed in memory or prepared we should wait. When transaction is committed in memory we held trx mutex, but not lock_sys->mutex. Therefore, we could end here before transaction has time to do lock_release() that is protected with lock_sys->mutex. lock_rec_has_to_wait We very well can let bf to wait normally as other BF will be replayed in case of conflict. For debug builds we will do additional sanity checks to catch unsupported bf wait if any. wsrep_kill_victim Check is victim already in TRX_STATE_COMMITTED_IN_MEMORY state and if it is we can return. lock_rec_dequeue_from_page lock_rec_unlock Remove unnecessary wsrep_assert_no_bf_bf_wait function calls. We can very well let BF wait here.
1 parent c442733 commit d217a92

File tree

3 files changed

+107
-121
lines changed

3 files changed

+107
-121
lines changed

sql/wsrep_mysqld.cc

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2788,16 +2788,18 @@ extern "C" bool wsrep_thd_ignore_table(THD *thd)
27882788
extern int
27892789
wsrep_trx_order_before(THD *thd1, THD *thd2)
27902790
{
2791-
if (wsrep_thd_trx_seqno(thd1) < wsrep_thd_trx_seqno(thd2)) {
2792-
WSREP_DEBUG("BF conflict, order: %lld %lld\n",
2793-
(long long)wsrep_thd_trx_seqno(thd1),
2794-
(long long)wsrep_thd_trx_seqno(thd2));
2795-
return 1;
2796-
}
2797-
WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n",
2798-
(long long)wsrep_thd_trx_seqno(thd1),
2799-
(long long)wsrep_thd_trx_seqno(thd2));
2800-
return 0;
2791+
const longlong trx1_seqno= wsrep_thd_trx_seqno(thd1);
2792+
const longlong trx2_seqno= wsrep_thd_trx_seqno(thd2);
2793+
WSREP_DEBUG("BF conflict, order: %lld %lld\n",
2794+
trx1_seqno, trx2_seqno);
2795+
2796+
if (trx1_seqno == WSREP_SEQNO_UNDEFINED ||
2797+
trx2_seqno == WSREP_SEQNO_UNDEFINED)
2798+
return 1; /* trx is not yet replicated */
2799+
else if (trx1_seqno < trx2_seqno)
2800+
return 1;
2801+
2802+
return 0;
28012803
}
28022804

28032805

storage/innobase/handler/ha_innodb.cc

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19596,21 +19596,32 @@ static void bg_wsrep_kill_trx(
1959619596
DBUG_ENTER("bg_wsrep_kill_trx");
1959719597

1959819598
if (thd) {
19599-
victim_trx = thd_to_trx(thd);
19600-
lock_mutex_enter();
19601-
trx_mutex_enter(victim_trx);
19602-
if (victim_trx->id != arg->trx_id)
19603-
{
19604-
trx_mutex_exit(victim_trx);
19605-
lock_mutex_exit();
19599+
victim_trx= thd_to_trx(thd);
19600+
/* Victim trx might not exist e.g. on MDL-conflict. */
19601+
if (victim_trx) {
19602+
lock_mutex_enter();
19603+
trx_mutex_enter(victim_trx);
19604+
if (victim_trx->id != arg->trx_id ||
19605+
victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
19606+
{
19607+
/* Victim was meanwhile rolled back or
19608+
committed */
19609+
trx_mutex_exit(victim_trx);
19610+
lock_mutex_exit();
19611+
wsrep_thd_UNLOCK(thd);
19612+
victim_trx= NULL;
19613+
}
19614+
} else {
19615+
/* find_thread_by_id locked
19616+
THD::LOCK_thd_data */
1960619617
wsrep_thd_UNLOCK(thd);
19607-
victim_trx = NULL;
1960819618
}
1960919619
}
1961019620

1961119621
if (!victim_trx) {
19612-
/* it can happen that trx_id was meanwhile rolled back */
19613-
DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
19622+
/* Victim trx might not exist (MDL-conflict) or victim
19623+
was meanwhile rolled back or committed because of
19624+
a KILL statement or a disconnect. */
1961419625
goto ret;
1961519626
}
1961619627

@@ -19766,7 +19777,7 @@ static void bg_wsrep_kill_trx(
1976619777
}
1976719778

1976819779
ret_awake:
19769-
awake = true;
19780+
awake= true;
1977019781

1977119782
ret_unlock:
1977219783
trx_mutex_exit(victim_trx);

storage/innobase/lock/lock0lock.cc

Lines changed: 73 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -644,71 +644,81 @@ lock_rec_get_insert_intention(
644644
return(lock->type_mode & LOCK_INSERT_INTENTION);
645645
}
646646

647+
#ifdef UNIV_DEBUG
647648
#ifdef WITH_WSREP
648-
/** Check if both conflicting lock and other record lock are brute force
649-
(BF). This case is a bug so report lock information and wsrep state.
650-
@param[in] lock_rec1 conflicting waiting record lock or NULL
651-
@param[in] lock_rec2 other waiting record lock
652-
@param[in] trx1 lock_rec1 can be NULL, trx
649+
/** Check if both conflicting lock transaction and other transaction
650+
requesting record lock are brute force (BF). If they are check is
651+
this BF-BF wait correct and if not report BF wait and assert.
652+
653+
@param[in] lock_rec other waiting record lock
654+
@param[in] trx trx requesting conflicting record lock
653655
*/
654-
static void wsrep_assert_no_bf_bf_wait(
655-
const lock_t* lock_rec1,
656-
const lock_t* lock_rec2,
657-
const trx_t* trx1)
656+
static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx)
658657
{
659-
ut_ad(!lock_rec1 || lock_get_type_low(lock_rec1) == LOCK_REC);
660-
ut_ad(lock_get_type_low(lock_rec2) == LOCK_REC);
658+
ut_ad(lock_get_type_low(lock) == LOCK_REC);
661659
ut_ad(lock_mutex_own());
660+
trx_t* lock_trx= lock->trx;
662661

663662
/* Note that we are holding lock_sys->mutex, thus we should
664663
not acquire THD::LOCK_thd_data mutex below to avoid mutexing
665664
order violation. */
666665

667-
if (!trx1->is_wsrep() || !lock_rec2->trx->is_wsrep())
666+
if (!trx->is_wsrep() || !lock_trx->is_wsrep())
668667
return;
669-
if (UNIV_LIKELY(!wsrep_thd_is_BF(trx1->mysql_thd, FALSE)))
670-
return;
671-
if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE)))
668+
if (UNIV_LIKELY(!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
669+
|| UNIV_LIKELY(!wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE)))
672670
return;
673671

674-
/* if BF - BF order is honored, we can keep trx1 waiting for the lock */
675-
if (wsrep_trx_order_before(trx1->mysql_thd, lock_rec2->trx->mysql_thd))
672+
ut_ad(trx->state == TRX_STATE_ACTIVE);
673+
674+
trx_mutex_enter(lock_trx);
675+
const trx_state_t trx2_state= lock_trx->state;
676+
trx_mutex_exit(lock_trx);
677+
678+
/* If transaction is already committed in memory or
679+
prepared we should wait. When transaction is committed in
680+
memory we held trx mutex, but not lock_sys->mutex. Therefore,
681+
we could end here before transaction has time to do
682+
lock_release() that is protected with lock_sys->mutex. */
683+
switch (trx2_state) {
684+
case TRX_STATE_COMMITTED_IN_MEMORY:
685+
case TRX_STATE_PREPARED:
676686
return;
687+
case TRX_STATE_ACTIVE:
688+
break;
689+
default:
690+
ut_ad("invalid state" == 0);
691+
}
677692

678-
/* avoiding BF-BF conflict assert, if victim is already aborting
679-
or rolling back for replaying
680-
*/
681-
if (wsrep_trx_is_aborting(lock_rec2->trx->mysql_thd))
693+
/* If BF - BF order is honored, i.e. trx already holding
694+
record lock should be ordered before this new lock request
695+
we can keep trx waiting for the lock. If conflicting
696+
transaction is already aborting or rolling back for replaying
697+
we can also let new transaction waiting. */
698+
if (wsrep_trx_order_before(lock_trx->mysql_thd, trx->mysql_thd)
699+
|| wsrep_trx_is_aborting(lock_trx->mysql_thd))
682700
return;
683701

684702
mtr_t mtr;
685703

686-
if (lock_rec1) {
687-
ib::error() << "Waiting lock on table: "
688-
<< lock_rec1->index->table->name
689-
<< " index: "
690-
<< lock_rec1->index->name()
691-
<< " that has conflicting lock ";
692-
lock_rec_print(stderr, lock_rec1, mtr);
693-
}
694-
695704
ib::error() << "Conflicting lock on table: "
696-
<< lock_rec2->index->table->name
705+
<< lock->index->table->name
697706
<< " index: "
698-
<< lock_rec2->index->name()
707+
<< lock->index->name()
699708
<< " that has lock ";
700-
lock_rec_print(stderr, lock_rec2, mtr);
709+
lock_rec_print(stderr, lock, mtr);
701710

702711
ib::error() << "WSREP state: ";
703712

704-
wsrep_report_bf_lock_wait(trx1->mysql_thd,
705-
trx1->id);
706-
wsrep_report_bf_lock_wait(lock_rec2->trx->mysql_thd,
707-
lock_rec2->trx->id);
713+
wsrep_report_bf_lock_wait(trx->mysql_thd,
714+
trx->id);
715+
wsrep_report_bf_lock_wait(lock_trx->mysql_thd,
716+
lock_trx->id);
708717
/* BF-BF wait is a bug */
709718
ut_error;
710719
}
711720
#endif /* WITH_WSREP */
721+
#endif /* UNIV_DEBUG */
712722

713723
/*********************************************************************//**
714724
Checks if a lock request for a new lock has to wait for request lock2.
@@ -832,9 +842,11 @@ lock_rec_has_to_wait(
832842
return (FALSE);
833843
}
834844

835-
/* There should not be two conflicting locks that are
836-
brute force. If there is it is a bug. */
837-
wsrep_assert_no_bf_bf_wait(NULL, lock2, trx);
845+
/* We very well can let bf to wait normally as other
846+
BF will be replayed in case of conflict. For debug
847+
builds we will do additional sanity checks to catch
848+
unsupported bf wait if any. */
849+
ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
838850
#endif /* WITH_WSREP */
839851

840852
return(TRUE);
@@ -1111,66 +1123,35 @@ lock_rec_other_has_expl_req(
11111123
#endif /* UNIV_DEBUG */
11121124

11131125
#ifdef WITH_WSREP
1114-
static
1115-
void
1116-
wsrep_kill_victim(
1117-
/*==============*/
1118-
const trx_t * const trx,
1119-
const lock_t *lock)
1126+
static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock)
11201127
{
11211128
ut_ad(lock_mutex_own());
1122-
ut_ad(trx_mutex_own(lock->trx));
1129+
ut_ad(trx->is_wsrep());
1130+
trx_t* lock_trx = lock->trx;
1131+
ut_ad(trx_mutex_own(lock_trx));
1132+
ut_ad(lock_trx != trx);
11231133

1124-
/* quit for native mysql */
1125-
if (!trx->is_wsrep()) return;
1134+
if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
1135+
return;
11261136

1127-
my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
1128-
my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE);
1129-
mtr_t mtr;
1137+
if (lock_trx->state == TRX_STATE_COMMITTED_IN_MEMORY
1138+
|| lock_trx->lock.was_chosen_as_deadlock_victim)
1139+
return;
11301140

1131-
if ((bf_this && !bf_other) ||
1132-
(bf_this && bf_other && wsrep_trx_order_before(
1133-
trx->mysql_thd, lock->trx->mysql_thd))) {
1141+
my_bool bf_other = wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE);
11341142

1135-
if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1136-
if (UNIV_UNLIKELY(wsrep_debug)) {
1137-
ib::info() << "WSREP: BF victim waiting\n";
1138-
}
1143+
if (!bf_other
1144+
|| wsrep_trx_order_before(trx->mysql_thd,
1145+
lock_trx->mysql_thd)) {
1146+
1147+
if (lock_trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1148+
if (UNIV_UNLIKELY(wsrep_debug))
1149+
WSREP_INFO("BF victim waiting");
11391150
/* cannot release lock, until our lock
11401151
is in the queue*/
1141-
} else if (lock->trx != trx) {
1142-
if (wsrep_log_conflicts) {
1143-
if (bf_this) {
1144-
ib::info() << "*** Priority TRANSACTION:";
1145-
} else {
1146-
ib::info() << "*** Victim TRANSACTION:";
1147-
}
1148-
1149-
wsrep_trx_print_locking(stderr, trx, 3000);
1150-
1151-
if (bf_other) {
1152-
ib::info() << "*** Priority TRANSACTION:";
1153-
} else {
1154-
ib::info() << "*** Victim TRANSACTION:";
1155-
}
1156-
wsrep_trx_print_locking(stderr, lock->trx, 3000);
1157-
1158-
ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
1159-
1160-
if (lock_get_type(lock) == LOCK_REC) {
1161-
lock_rec_print(stderr, lock, mtr);
1162-
} else {
1163-
lock_table_print(stderr, lock);
1164-
}
1165-
1166-
ib::info() << " SQL1: "
1167-
<< wsrep_thd_query(trx->mysql_thd);
1168-
ib::info() << " SQL2: "
1169-
<< wsrep_thd_query(lock->trx->mysql_thd);
1170-
}
1171-
1172-
wsrep_innobase_kill_one_trx(trx->mysql_thd,
1173-
trx, lock->trx, TRUE);
1152+
} else {
1153+
wsrep_innobase_kill_one_trx(trx->mysql_thd, trx,
1154+
lock_trx, true);
11741155
}
11751156
}
11761157
}
@@ -2410,10 +2391,6 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock)
24102391
/* Grant the lock */
24112392
ut_ad(lock->trx != in_lock->trx);
24122393
lock_grant(lock);
2413-
#ifdef WITH_WSREP
2414-
} else {
2415-
wsrep_assert_no_bf_bf_wait(c, lock, c->trx);
2416-
#endif /* WITH_WSREP */
24172394
}
24182395
}
24192396
} else {
@@ -4342,10 +4319,6 @@ lock_rec_unlock(
43424319
/* Grant the lock */
43434320
ut_ad(trx != lock->trx);
43444321
lock_grant(lock);
4345-
#ifdef WITH_WSREP
4346-
} else {
4347-
wsrep_assert_no_bf_bf_wait(c, lock, c->trx);
4348-
#endif /* WITH_WSREP */
43494322
}
43504323
}
43514324
} else {

0 commit comments

Comments
 (0)