I debugged the server execution corresponding to the following section of innodb.instant_alter_purge:
connection prevent_purge;
|
COMMIT;
|
START TRANSACTION WITH CONSISTENT SNAPSHOT;
|
connection default;
|
|
ALTER TABLE t1 ADD COLUMN extra TINYINT UNSIGNED NOT NULL DEFAULT 42;
|
let $wait_all_purged= 1;
|
--source include/wait_all_purged.inc
|
The assertion would fail during the wait. The purge coordinator would be invoked after the ALTER TABLE transaction commit invoked trx_purge_add_undo_to_history(), which invoked rseg->set_last_commit(86,39). The purge coordinator would first update purge_sys.view.m_low_limit_no to 40 (corresponding to the time after the ALTER TABLE) and then adjust it to 36 corresponding to the read view of the older started transaction. Shortly after this, the assertion failed within the same invocation of purge_coordinator_callback(). The state would be as follows:
(rr) p purge_sys.tail
|
$10 = {trx_no = 39, undo_no = 0}
|
(rr) p purge_sys.head
|
$11 = {trx_no = 39, undo_no = 0}
|
(rr) p purge_sys.view
|
$12 = {m_low_limit_id = 36, m_up_limit_id = 36,
|
m_ids = std::vector of length 0, capacity 32, m_low_limit_no = 36}
|
Simply put, TrxUndoRsegsIterator::set_next() is ‘overshooting’ the purge_sys.view when updating purge_sys.tail. I tried the following patch:
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
|
index c0fafe1ec6b..8ee1bd688cd 100644
|
--- a/storage/innobase/trx/trx0purge.cc
|
+++ b/storage/innobase/trx/trx0purge.cc
|
@@ -110,7 +110,7 @@ TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
|
ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE
|
|| srv_is_undo_tablespace(purge_sys.rseg->space->id));
|
|
- trx_id_t last_trx_no, tail_trx_no;
|
+ trx_id_t last_trx_no;
|
{
|
#ifdef SUX_LOCK_GENERIC
|
purge_sys.rseg->latch.rd_lock(SRW_LOCK_CALL);
|
@@ -119,9 +119,7 @@ TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
|
{purge_sys.rseg->latch};
|
#endif
|
last_trx_no = purge_sys.rseg->last_trx_no();
|
- tail_trx_no = purge_sys.tail.trx_no;
|
|
- purge_sys.tail.trx_no = last_trx_no;
|
purge_sys.hdr_offset = purge_sys.rseg->last_offset();
|
purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
|
|
@@ -131,10 +129,14 @@ TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
|
}
|
|
/* Only the purge coordinator task will access
|
- purge_sys.rseg_iter or purge_sys.hdr_page_no. */
|
+ purge_sys.rseg_iter, purge_sys.hdr_page_no,
|
+ or modify purge_sys.view. */
|
ut_ad(last_trx_no == m_rsegs.trx_no);
|
ut_a(purge_sys.hdr_page_no != FIL_NULL);
|
- ut_a(tail_trx_no <= last_trx_no);
|
+ ut_a(purge_sys.tail.trx_no <= last_trx_no);
|
+ if (last_trx_no <= purge_sys.low_limit_no()) {
|
+ purge_sys.tail.trx_no = last_trx_no;
|
+ }
|
|
return(true);
|
}
|
@@ -607,6 +609,9 @@ TRANSACTIONAL_TARGET static void trx_purge_truncate_history()
|
|
if (head.trx_no >= purge_sys.low_limit_no())
|
{
|
+ ut_a(head.undo_no
|
+ ? head.trx_no < purge_sys.low_limit_no()
|
+ : head.trx_no == purge_sys.low_limit_no());
|
/* This is sometimes necessary. TODO: find out why. */
|
head.trx_no= purge_sys.low_limit_no();
|
head.undo_no= 0;
|
Not only would the added assertion in trx_purge_truncate_history() fail in the test innodb.instant_alter_bugs, but the test innodb.innodb-isolation would show result differences.
vlad.lesin, since you are familiar with read views and purge, do you have any suggestions? The options are to replace the FIXME comment with a good explanation, or to adjust the code so that no adjustment in trx_purge_truncate_history() would be needed.
I debugged the server execution corresponding to the following section of innodb.instant_alter_purge:
let $wait_all_purged= 1;
The assertion would fail during the wait. The purge coordinator would be invoked after the ALTER TABLE transaction commit invoked trx_purge_add_undo_to_history(), which invoked rseg->set_last_commit(86,39). The purge coordinator would first update purge_sys.view.m_low_limit_no to 40 (corresponding to the time after the ALTER TABLE) and then adjust it to 36 corresponding to the read view of the older started transaction. Shortly after this, the assertion failed within the same invocation of purge_coordinator_callback(). The state would be as follows:
(rr) p purge_sys.tail
$10 = {trx_no = 39, undo_no = 0}
(rr) p purge_sys.head
$11 = {trx_no = 39, undo_no = 0}
(rr) p purge_sys.view
$12 = {m_low_limit_id = 36, m_up_limit_id = 36,
m_ids = std::vector of length 0, capacity 32, m_low_limit_no = 36}
Simply put, TrxUndoRsegsIterator::set_next() is ‘overshooting’ the purge_sys.view when updating purge_sys.tail. I tried the following patch:
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index c0fafe1ec6b..8ee1bd688cd 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -110,7 +110,7 @@ TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
ut_ad(purge_sys.rseg->space->id == TRX_SYS_SPACE
|| srv_is_undo_tablespace(purge_sys.rseg->space->id));
- trx_id_t last_trx_no, tail_trx_no;
+ trx_id_t last_trx_no;
{
#ifdef SUX_LOCK_GENERIC
purge_sys.rseg->latch.rd_lock(SRW_LOCK_CALL);
@@ -119,9 +119,7 @@ TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
{purge_sys.rseg->latch};
#endif
last_trx_no = purge_sys.rseg->last_trx_no();
- tail_trx_no = purge_sys.tail.trx_no;
- purge_sys.tail.trx_no = last_trx_no;
purge_sys.hdr_offset = purge_sys.rseg->last_offset();
purge_sys.hdr_page_no = purge_sys.rseg->last_page_no;
@@ -131,10 +129,14 @@ TRANSACTIONAL_INLINE inline bool TrxUndoRsegsIterator::set_next()
}
/* Only the purge coordinator task will access
- purge_sys.rseg_iter or purge_sys.hdr_page_no. */
+ purge_sys.rseg_iter, purge_sys.hdr_page_no,
+ or modify purge_sys.view. */
ut_ad(last_trx_no == m_rsegs.trx_no);
ut_a(purge_sys.hdr_page_no != FIL_NULL);
- ut_a(tail_trx_no <= last_trx_no);
+ ut_a(purge_sys.tail.trx_no <= last_trx_no);
+ if (last_trx_no <= purge_sys.low_limit_no()) {
+ purge_sys.tail.trx_no = last_trx_no;
+ }
return(true);
}
@@ -607,6 +609,9 @@ TRANSACTIONAL_TARGET static void trx_purge_truncate_history()
if (head.trx_no >= purge_sys.low_limit_no())
{
+ ut_a(head.undo_no
+ ? head.trx_no < purge_sys.low_limit_no()
+ : head.trx_no == purge_sys.low_limit_no());
/* This is sometimes necessary. TODO: find out why. */
head.trx_no= purge_sys.low_limit_no();
Not only would the added assertion in trx_purge_truncate_history() fail in the test innodb.instant_alter_bugs, but the test innodb.innodb-isolation would show result differences.
vlad.lesin, since you are familiar with read views and purge, do you have any suggestions? The options are to replace the FIXME comment with a good explanation, or to adjust the code so that no adjustment in trx_purge_truncate_history() would be needed.