Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-14407

Assertion `len < ((ulint) srv_page_size)' failed in trx_undo_rec_copy upon rollback

Details

    Description

      10.3 c2a868b18cdffc

      mysqld: /data/src/10.3/storage/innobase/include/trx0rec.ic:68: trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t*, mem_heap_t*): Assertion `len < ((ulint) srv_page_size)' failed.
      171115 13:08:05 [ERROR] mysqld got signal 6 ;
       
      #7  0x00007fdc661c9ee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
      #8  0x000055c8bc4061a5 in trx_undo_rec_copy (undo_rec=0x7fdc5bc7b082 "", heap=0x7fdc0c4a31d0) at /data/src/10.3/storage/innobase/include/trx0rec.ic:68
      #9  0x000055c8bc40cb8f in trx_undo_get_undo_rec_low (roll_ptr=22236523175227522, is_temp=true, heap=0x7fdc0c4a31d0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2103
      #10 0x000055c8bc40cc8b in trx_undo_get_undo_rec (roll_ptr=22236523175227522, is_temp=true, heap=0x7fdc0c4a31d0, trx_id=5759, name=..., undo_rec=0x7fdc5853e630) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2139
      #11 0x000055c8bc40cf35 in trx_undo_prev_version_build (index_rec=0x7fdc5bae81e9 "", index_mtr=0x7fdc5853ef90, rec=0x7fdc0c4a2ec6 "", index=0x7fdc0c204248, offsets=0x7fdc0c4a2f38, heap=0x7fdc0c4a31d0, old_vers=0x7fdc
      5853e720, v_heap=0x0, vrow=0x0, v_status=0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2223
      #12 0x000055c8bc3c1bfa in row_vers_old_has_index_entry (also_curr=0, rec=0x7fdc5bae81e9 "", mtr=0x7fdc5853ef90, index=0x7fdc0c343208, ientry=0x7fdc0c4a2938, roll_ptr=0, trx_id=0) at /data/src/10.3/storage/innobase/r
      ow/row0vers.cc:1032
      #13 0x000055c8bc5c3cda in row_undo_mod_del_mark_or_remove_sec_low (node=0x7fdc0c4a2398, thr=0x7fdc0c18dab0, index=0x7fdc0c343208, entry=0x7fdc0c4a2938, mode=2) at /data/src/10.3/storage/innobase/row/row0umod.cc:512
      #14 0x000055c8bc5c4030 in row_undo_mod_del_mark_or_remove_sec (node=0x7fdc0c4a2398, thr=0x7fdc0c18dab0, index=0x7fdc0c343208, entry=0x7fdc0c4a2938) at /data/src/10.3/storage/innobase/row/row0umod.cc:584
      #15 0x000055c8bc5c5492 in row_undo_mod_upd_exist_sec (node=0x7fdc0c4a2398, thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/row/row0umod.cc:1064
      #16 0x000055c8bc5c5d90 in row_undo_mod (node=0x7fdc0c4a2398, thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/row/row0umod.cc:1268
      #17 0x000055c8bc3aa43e in row_undo (node=0x7fdc0c4a2398, thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/row/row0undo.cc:305
      #18 0x000055c8bc3aa56a in row_undo_step (thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/row/row0undo.cc:346
      #19 0x000055c8bc2fa8c4 in que_thr_step (thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/que/que0que.cc:1052
      #20 0x000055c8bc2fab0b in que_run_threads_low (thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/que/que0que.cc:1116
      #21 0x000055c8bc2faced in que_run_threads (thr=0x7fdc0c18dab0) at /data/src/10.3/storage/innobase/que/que0que.cc:1156
      #22 0x000055c8bc40ffb5 in trx_rollback_to_savepoint_low (trx=0x7fdc600f5878, savept=0x0) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:118
      #23 0x000055c8bc410334 in trx_rollback_for_mysql_low (trx=0x7fdc600f5878) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:180
      #24 0x000055c8bc4106ff in trx_rollback_low (trx=0x7fdc600f5878) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:212
      #25 0x000055c8bc410bf5 in trx_rollback_for_mysql (trx=0x7fdc600f5878) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:291
      #26 0x000055c8bc1edf02 in innobase_rollback (hton=0x55c8be5b0900, thd=0x7fdc0c000b00, rollback_trx=true) at /data/src/10.3/storage/innobase/handler/ha_innodb.cc:4651
      #27 0x000055c8bbecb705 in ha_rollback_trans (thd=0x7fdc0c000b00, all=true) at /data/src/10.3/sql/handler.cc:1679
      #28 0x000055c8bbd69eed in trans_rollback (thd=0x7fdc0c000b00) at /data/src/10.3/sql/transaction.cc:415
      #29 0x000055c8bbc0a5ce in mysql_execute_command (thd=0x7fdc0c000b00) at /data/src/10.3/sql/sql_parse.cc:5796
      #30 0x000055c8bbc1071e in mysql_parse (thd=0x7fdc0c000b00, rawbuf=0x7fdc0c013788 "ROLLBACK /* QNO 15244 CON_ID 15 */", length=34, parser_state=0x7fdc58541610, is_com_multi=false, is_next_command=false) at /data/src/
      10.3/sql/sql_parse.cc:7922
      #31 0x000055c8bbbfe1f4 in dispatch_command (command=COM_QUERY, thd=0x7fdc0c000b00, packet=0x7fdc0c00af71 "ROLLBACK /* QNO 15244 CON_ID 15 */", packet_length=34, is_com_multi=false, is_next_command=false) at /data/sr
      c/10.3/sql/sql_parse.cc:1828
      #32 0x000055c8bbbfcc15 in do_command (thd=0x7fdc0c000b00) at /data/src/10.3/sql/sql_parse.cc:1370
      #33 0x000055c8bbd540ca in do_handle_one_connection (connect=0x55c8bebbb290) at /data/src/10.3/sql/sql_connect.cc:1418
      #34 0x000055c8bbd53e57 in handle_one_connection (arg=0x55c8bebbb290) at /data/src/10.3/sql/sql_connect.cc:1324
      #35 0x000055c8bc1d4bdc in pfs_spawn_thread (arg=0x55c8beb57b80) at /data/src/10.3/storage/perfschema/pfs.cc:1863
      #36 0x00007fdc67ea0494 in start_thread (arg=0x7fdc58542700) at pthread_create.c:333
      #37 0x00007fdc6628693f in clone () from /lib/x86_64-linux-gnu/libc.so.6
      

      perl ./mtr bug.t9 --mysqld=--sql-mode=no_engine_substitution --mysqld=--lock-wait-timeout=5 --mysqld=--innodb-lock-wait-timeout=3 --testcase-timeout=300
      

      (with --repeat if necessary)

      Attachments

        Activity

          I can only sometimes repeat the failure on my local system, and even then it is tricky. I got exactly one core dump for a non-debug, non-ASAN build in which I had specially enabled the assertion:

          diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
          index 5ae34c486cc..8e16a76c74e 100644
          --- a/storage/innobase/include/trx0rec.ic
          +++ b/storage/innobase/include/trx0rec.ic
          @@ -65,7 +65,7 @@ trx_undo_rec_copy(
           
           	len = mach_read_from_2(undo_rec)
           		- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
          -	ut_ad(len < UNIV_PAGE_SIZE);
          +	ut_a(len < UNIV_PAGE_SIZE);
           	trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
           		mem_heap_dup(heap, undo_rec, len));
           	mach_write_to_2(rec, len);
          

          The client notices a crash at line 12868 of t9.test:

          DROP FUNCTION func_1  /* QNO 1826 CON_ID 15 */;
          connection con17_0;
          ERROR HY000: Lost connection to MySQL server during query
          

          This is for the following input:

          --send
          	DROP FUNCTION func_1  /* QNO 1826 CON_ID 15 */;
          --connection con17_0
          --reap
          

          The statement that it is reaping from con17_0 was issued in line 12631:

          --connection con17_0
          --reap
          --send
          	FLUSH TABLES /* QNO 387 CON_ID 17 */;
          

          The crashing statement is a ROLLBACK for a TEMPORARY TABLE, on index col_int_key, line 12855 of the test:

          --connection con16_0
          --reap
          --send
          	ROLLBACK /* QNO 4038 CON_ID 16 */;
          

          I would suggest the following, to reduce the script (unfortunately, this may slightly reduce repeatability):

          1. truncate the script after line 12868 (where the mysqltest client noticed the server crash)
          2. remove all tables except TEMPORARY TABLE…ENGINE=InnoDB
          3. remove all SELECT statements whose results are not used
          4. remove all FLUSH TABLES (afaiu, temporary tables are never evicted in response to it)
          marko Marko Mäkelä added a comment - I can only sometimes repeat the failure on my local system, and even then it is tricky. I got exactly one core dump for a non-debug, non-ASAN build in which I had specially enabled the assertion: diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic index 5ae34c486cc..8e16a76c74e 100644 --- a/storage/innobase/include/trx0rec.ic +++ b/storage/innobase/include/trx0rec.ic @@ -65,7 +65,7 @@ trx_undo_rec_copy( len = mach_read_from_2(undo_rec) - ut_align_offset(undo_rec, UNIV_PAGE_SIZE); - ut_ad(len < UNIV_PAGE_SIZE); + ut_a(len < UNIV_PAGE_SIZE); trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>( mem_heap_dup(heap, undo_rec, len)); mach_write_to_2(rec, len); The client notices a crash at line 12868 of t9.test : DROP FUNCTION func_1 /* QNO 1826 CON_ID 15 */; connection con17_0; ERROR HY000: Lost connection to MySQL server during query This is for the following input: --send DROP FUNCTION func_1 /* QNO 1826 CON_ID 15 */ ; --connection con17_0 --reap The statement that it is reaping from con17_0 was issued in line 12631: --connection con17_0 --reap --send FLUSH TABLES /* QNO 387 CON_ID 17 */ ; The crashing statement is a ROLLBACK for a TEMPORARY TABLE , on index col_int_key , line 12855 of the test: --connection con16_0 --reap --send ROLLBACK /* QNO 4038 CON_ID 16 */ ; I would suggest the following, to reduce the script (unfortunately, this may slightly reduce repeatability): truncate the script after line 12868 (where the mysqltest client noticed the server crash) remove all tables except TEMPORARY TABLE…ENGINE=InnoDB remove all SELECT statements whose results are not used remove all FLUSH TABLES (afaiu, temporary tables are never evicted in response to it)

          Below is the minimal test case which causes the problem on the recent 10.3. Even though we know for a fact that the problem existed earlier, it was rare back then, and either this test case doesn't trigger it on the older revisions at all, or the probability is just too low.

          However recently this failure started happening much more frequently. If we rely on the probability, it changed with this commit:

          commit 921c5e931452301a09c84c53ffe35b81e6a1c71a
          Merge: 33714d2 0c1f220
          Author: Marko Mäkelä <marko.makela@mariadb.com>
          Date:   Tue Jan 30 21:18:39 2018 +0200
           
              Merge bb-10.2-ext into 10.3
              
              MDEV-11415 Remove excessive undo logging during ALTER TABLE…ALGORITHM=COPY
          

          (but it's not reproducible on bb-10.2-ext, or again, the probability is too low).

          The test case is non-concurrent, but it's still highly non-deterministic.
          It fails for me on 10.3 regular debug build and asan-debug on 3 different machines within approximately first 50 runs, but only when it's run in shm – apparently, it's too slow when it's run on HDD.
          Run with --repeat=100 --mem. You might have to increase the number of repetitions.

          Variation 1 - explicit ROLLBACK

          --source include/have_innodb.inc
          CREATE TABLE IF NOT EXISTS t1 (col1 INT, col2 INT, col3 INT, col4 TEXT, FULLTEXT KEY ftidx (col4)) ENGINE = InnoDB;
          ALTER TABLE t1 ADD COLUMN extra INT;
          INSERT INTO t1 (col1,col2,col3,col4) VALUES (2,2,2,REPEAT('2',8193)), (2,2,2,REPEAT('2',8193));
          ALTER TABLE t1 DROP COLUMN col1;
          ALTER TABLE t1 CHANGE COLUMN extra col1 INT;
          ALTER TABLE t1 ADD UNIQUE KEY uidx(col1);
          BEGIN;
          UPDATE t1 SET col1 = 3 LIMIT 1;
          ROLLBACK;
           
          # Cleanup
          DROP TABLE t1;
          

          Variation 2 - implicit rollback

          --source include/have_innodb.inc
          CREATE TABLE IF NOT EXISTS t1 (col1 INT, col2 INT, col3 INT, col4 TEXT, FULLTEXT KEY ftidx (col4)) ENGINE = InnoDB;
          ALTER TABLE t1 ADD COLUMN extra INT;
          INSERT INTO t1 (col1,col2,col3,col4) VALUES (2,2,2,REPEAT('2',8193)), (2,2,2,REPEAT('2',8193));
          ALTER TABLE t1 DROP COLUMN col1;
          ALTER TABLE t1 CHANGE COLUMN extra col1 INT;
          ALTER TABLE t1 ADD UNIQUE KEY uidx(col1);
          --error ER_DUP_ENTRY
          UPDATE t1 SET col1 = 3 LIMIT 2;
           
          # Cleanup
          DROP TABLE t1;
          

          10.3 1951e7f05ae7b606, from variation 1

          mysqld: /data/src/10.3/storage/innobase/include/trx0rec.ic:68: trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t*, mem_heap_t*): Assertion `len < ((ulint) srv_page_size)' failed.
          180203 18:29:38 [ERROR] mysqld got signal 6 ;
           
          #7  0x00007f13b5953ee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
          #8  0x0000557c8713b69b in trx_undo_rec_copy (undo_rec=0x7f13ab258000 "}\221.\270", heap=0x7f135c0e8550) at /data/src/10.3/storage/innobase/include/trx0rec.ic:68
          #9  0x0000557c871427c1 in trx_undo_get_undo_rec_low (roll_ptr=0, is_temp=false, heap=0x7f135c0e8550) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2172
          #10 0x0000557c871428bd in trx_undo_get_undo_rec (roll_ptr=0, is_temp=false, heap=0x7f135c0e8550, trx_id=590, name=..., undo_rec=0x7f13a85465b0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2208
          #11 0x0000557c87142bb7 in trx_undo_prev_version_build (index_rec=0x7f13ab3a00fe "", index_mtr=0x7f13a8546f00, rec=0x7f135c0630b8 "", index=0x7f135c0bcf38, offsets=0x7f135c063158, heap=0x7f135c0e8550, old_vers=0x7f13a85466a0, v_heap=0x0, vrow=0x0, v_status=0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2294
          #12 0x0000557c870f88a6 in row_vers_old_has_index_entry (also_curr=0, rec=0x7f13ab3a00fe "", mtr=0x7f13a8546f00, index=0x7f135c106128, ientry=0x7f135c0bfe58, roll_ptr=0, trx_id=0) at /data/src/10.3/storage/innobase/row/row0vers.cc:1032
          #13 0x0000557c872ef935 in row_undo_mod_del_mark_or_remove_sec_low (node=0x7f135c0c5ff8, thr=0x7f135c1073b0, index=0x7f135c106128, entry=0x7f135c0bfe58, mode=2) at /data/src/10.3/storage/innobase/row/row0umod.cc:510
          #14 0x0000557c872efc8b in row_undo_mod_del_mark_or_remove_sec (node=0x7f135c0c5ff8, thr=0x7f135c1073b0, index=0x7f135c106128, entry=0x7f135c0bfe58) at /data/src/10.3/storage/innobase/row/row0umod.cc:582
          #15 0x0000557c872f10ed in row_undo_mod_upd_exist_sec (node=0x7f135c0c5ff8, thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0umod.cc:1062
          #16 0x0000557c872f19d9 in row_undo_mod (node=0x7f135c0c5ff8, thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0umod.cc:1280
          #17 0x0000557c870e1869 in row_undo (node=0x7f135c0c5ff8, thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0undo.cc:307
          #18 0x0000557c870e19e5 in row_undo_step (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0undo.cc:355
          #19 0x0000557c87032366 in que_thr_step (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/que/que0que.cc:1053
          #20 0x0000557c870325b5 in que_run_threads_low (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/que/que0que.cc:1117
          #21 0x0000557c870327a6 in que_run_threads (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/que/que0que.cc:1157
          #22 0x0000557c87145072 in trx_rollback_to_savepoint_low (trx=0x7f13b0058958, savept=0x0) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:112
          #23 0x0000557c8714550f in trx_rollback_for_mysql_low (trx=0x7f13b0058958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:183
          #24 0x0000557c87145892 in trx_rollback_low (trx=0x7f13b0058958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:215
          #25 0x0000557c87145d88 in trx_rollback_for_mysql (trx=0x7f13b0058958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:294
          #26 0x0000557c86f25c87 in innobase_rollback (hton=0x557c89e0d690, thd=0x7f135c000b00, rollback_trx=true) at /data/src/10.3/storage/innobase/handler/ha_innodb.cc:4684
          #27 0x0000557c86bf72c3 in ha_rollback_trans (thd=0x7f135c000b00, all=true) at /data/src/10.3/sql/handler.cc:1717
          #28 0x0000557c86a82991 in trans_rollback (thd=0x7f135c000b00) at /data/src/10.3/sql/transaction.cc:422
          #29 0x0000557c86914c54 in mysql_execute_command (thd=0x7f135c000b00) at /data/src/10.3/sql/sql_parse.cc:5808
          #30 0x0000557c8691aff8 in mysql_parse (thd=0x7f135c000b00, rawbuf=0x7f135c014c58 "ROLLBACK", length=8, parser_state=0x7f13a85495e0, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:7977
          #31 0x0000557c8690883f in dispatch_command (command=COM_QUERY, thd=0x7f135c000b00, packet=0x7f135c1200f1 "ROLLBACK", packet_length=8, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:1825
          #32 0x0000557c8690727f in do_command (thd=0x7f135c000b00) at /data/src/10.3/sql/sql_parse.cc:1370
          #33 0x0000557c86a6eb74 in do_handle_one_connection (connect=0x557c8a406b50) at /data/src/10.3/sql/sql_connect.cc:1402
          #34 0x0000557c86a6e901 in handle_one_connection (arg=0x557c8a406b50) at /data/src/10.3/sql/sql_connect.cc:1308
          #35 0x0000557c86f0cc50 in pfs_spawn_thread (arg=0x557c8a423bf0) at /data/src/10.3/storage/perfschema/pfs.cc:1862
          #36 0x00007f13b762a494 in start_thread (arg=0x7f13a854a700) at pthread_create.c:333
          #37 0x00007f13b5a1093f in clone () from /lib/x86_64-linux-gnu/libc.so.6
          

          10.3 1951e7f05ae7b606, from variation 2

          mysqld: /data/src/10.3/storage/innobase/include/trx0rec.ic:68: trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t*, mem_heap_t*): Assertion `len < ((ulint) srv_page_size)' failed.
          180203 18:28:08 [ERROR] mysqld got signal 6 ;
           
          #7  0x00007fc8b7c8fee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
          #8  0x000055e55feec69b in trx_undo_rec_copy (undo_rec=0x7fc8b15ec000 "\363}\335\232", heap=0x7fc8541b4690) at /data/src/10.3/storage/innobase/include/trx0rec.ic:68
          #9  0x000055e55fef37c1 in trx_undo_get_undo_rec_low (roll_ptr=0, is_temp=false, heap=0x7fc8541b4690) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2172
          #10 0x000055e55fef38bd in trx_undo_get_undo_rec (roll_ptr=0, is_temp=false, heap=0x7fc8541b4690, trx_id=1598, name=..., undo_rec=0x7fc8b00935c0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2208
          #11 0x000055e55fef3bb7 in trx_undo_prev_version_build (index_rec=0x7fc8b16b00fe "", index_mtr=0x7fc8b0093f10, rec=0x7fc8541b4288 "", index=0x7fc8540f95a8, offsets=0x7fc8541b4328, heap=0x7fc8541b4690, old_vers=0x7fc8b00936b0, v_heap=0x0, vrow=0x0, v_status=0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2294
          #12 0x000055e55fea98a6 in row_vers_old_has_index_entry (also_curr=0, rec=0x7fc8b16b00fe "", mtr=0x7fc8b0093f10, index=0x7fc8541dcc08, ientry=0x7fc8540b90e8, roll_ptr=0, trx_id=0) at /data/src/10.3/storage/innobase/row/row0vers.cc:1032
          #13 0x000055e5600a0935 in row_undo_mod_del_mark_or_remove_sec_low (node=0x7fc8541dda38, thr=0x7fc8541bd750, index=0x7fc8541dcc08, entry=0x7fc8540b90e8, mode=2) at /data/src/10.3/storage/innobase/row/row0umod.cc:510
          #14 0x000055e5600a0c8b in row_undo_mod_del_mark_or_remove_sec (node=0x7fc8541dda38, thr=0x7fc8541bd750, index=0x7fc8541dcc08, entry=0x7fc8540b90e8) at /data/src/10.3/storage/innobase/row/row0umod.cc:582
          #15 0x000055e5600a20ed in row_undo_mod_upd_exist_sec (node=0x7fc8541dda38, thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0umod.cc:1062
          #16 0x000055e5600a29d9 in row_undo_mod (node=0x7fc8541dda38, thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0umod.cc:1280
          #17 0x000055e55fe92869 in row_undo (node=0x7fc8541dda38, thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0undo.cc:307
          #18 0x000055e55fe929e5 in row_undo_step (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0undo.cc:355
          #19 0x000055e55fde3366 in que_thr_step (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/que/que0que.cc:1053
          #20 0x000055e55fde35b5 in que_run_threads_low (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/que/que0que.cc:1117
          #21 0x000055e55fde37a6 in que_run_threads (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/que/que0que.cc:1157
          #22 0x000055e55fef6072 in trx_rollback_to_savepoint_low (trx=0x7fc8b1b93958, savept=0x0) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:112
          #23 0x000055e55fef650f in trx_rollback_for_mysql_low (trx=0x7fc8b1b93958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:183
          #24 0x000055e55fef6892 in trx_rollback_low (trx=0x7fc8b1b93958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:215
          #25 0x000055e55fef6d88 in trx_rollback_for_mysql (trx=0x7fc8b1b93958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:294
          #26 0x000055e55fcd6c87 in innobase_rollback (hton=0x55e561c50320, thd=0x7fc854000b00, rollback_trx=false) at /data/src/10.3/storage/innobase/handler/ha_innodb.cc:4684
          #27 0x000055e55f9a82c3 in ha_rollback_trans (thd=0x7fc854000b00, all=false) at /data/src/10.3/sql/handler.cc:1717
          #28 0x000055e55f8340b3 in trans_rollback_stmt (thd=0x7fc854000b00) at /data/src/10.3/sql/transaction.cc:582
          #29 0x000055e55f6c76e9 in mysql_execute_command (thd=0x7fc854000b00) at /data/src/10.3/sql/sql_parse.cc:6293
          #30 0x000055e55f6cbff8 in mysql_parse (thd=0x7fc854000b00, rawbuf=0x7fc854014c58 "UPDATE t1 SET col1 = 3 LIMIT 2", length=30, parser_state=0x7fc8b00965e0, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:7977
          #31 0x000055e55f6b983f in dispatch_command (command=COM_QUERY, thd=0x7fc854000b00, packet=0x7fc854120081 "UPDATE t1 SET col1 = 3 LIMIT 2", packet_length=30, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:1825
          #32 0x000055e55f6b827f in do_command (thd=0x7fc854000b00) at /data/src/10.3/sql/sql_parse.cc:1370
          #33 0x000055e55f81fb74 in do_handle_one_connection (connect=0x55e5621c3f60) at /data/src/10.3/sql/sql_connect.cc:1402
          #34 0x000055e55f81f901 in handle_one_connection (arg=0x55e5621c3f60) at /data/src/10.3/sql/sql_connect.cc:1308
          #35 0x000055e55fcbdc50 in pfs_spawn_thread (arg=0x55e562266b80) at /data/src/10.3/storage/perfschema/pfs.cc:1862
          #36 0x00007fc8b9966494 in start_thread (arg=0x7fc8b0097700) at pthread_create.c:333
          #37 0x00007fc8b7d4c93f in clone () from /lib/x86_64-linux-gnu/libc.so.6
          

          elenst Elena Stepanova added a comment - Below is the minimal test case which causes the problem on the recent 10.3 . Even though we know for a fact that the problem existed earlier, it was rare back then, and either this test case doesn't trigger it on the older revisions at all, or the probability is just too low. However recently this failure started happening much more frequently. If we rely on the probability, it changed with this commit: commit 921c5e931452301a09c84c53ffe35b81e6a1c71a Merge: 33714d2 0c1f220 Author: Marko Mäkelä <marko.makela@mariadb.com> Date: Tue Jan 30 21:18:39 2018 +0200   Merge bb-10.2-ext into 10.3 MDEV-11415 Remove excessive undo logging during ALTER TABLE…ALGORITHM=COPY (but it's not reproducible on bb-10.2-ext, or again, the probability is too low). The test case is non-concurrent, but it's still highly non-deterministic. It fails for me on 10.3 regular debug build and asan-debug on 3 different machines within approximately first 50 runs, but only when it's run in shm – apparently, it's too slow when it's run on HDD. Run with --repeat=100 --mem . You might have to increase the number of repetitions. Variation 1 - explicit ROLLBACK --source include/have_innodb.inc CREATE TABLE IF NOT EXISTS t1 (col1 INT , col2 INT , col3 INT , col4 TEXT, FULLTEXT KEY ftidx (col4)) ENGINE = InnoDB; ALTER TABLE t1 ADD COLUMN extra INT ; INSERT INTO t1 (col1,col2,col3,col4) VALUES (2,2,2,REPEAT( '2' ,8193)), (2,2,2,REPEAT( '2' ,8193)); ALTER TABLE t1 DROP COLUMN col1; ALTER TABLE t1 CHANGE COLUMN extra col1 INT ; ALTER TABLE t1 ADD UNIQUE KEY uidx(col1); BEGIN ; UPDATE t1 SET col1 = 3 LIMIT 1; ROLLBACK ;   # Cleanup DROP TABLE t1; Variation 2 - implicit rollback --source include/have_innodb.inc CREATE TABLE IF NOT EXISTS t1 (col1 INT , col2 INT , col3 INT , col4 TEXT, FULLTEXT KEY ftidx (col4)) ENGINE = InnoDB; ALTER TABLE t1 ADD COLUMN extra INT ; INSERT INTO t1 (col1,col2,col3,col4) VALUES (2,2,2,REPEAT( '2' ,8193)), (2,2,2,REPEAT( '2' ,8193)); ALTER TABLE t1 DROP COLUMN col1; ALTER TABLE t1 CHANGE COLUMN extra col1 INT ; ALTER TABLE t1 ADD UNIQUE KEY uidx(col1); --error ER_DUP_ENTRY UPDATE t1 SET col1 = 3 LIMIT 2;   # Cleanup DROP TABLE t1; 10.3 1951e7f05ae7b606, from variation 1 mysqld: /data/src/10.3/storage/innobase/include/trx0rec.ic:68: trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t*, mem_heap_t*): Assertion `len < ((ulint) srv_page_size)' failed. 180203 18:29:38 [ERROR] mysqld got signal 6 ;   #7 0x00007f13b5953ee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6 #8 0x0000557c8713b69b in trx_undo_rec_copy (undo_rec=0x7f13ab258000 "}\221.\270", heap=0x7f135c0e8550) at /data/src/10.3/storage/innobase/include/trx0rec.ic:68 #9 0x0000557c871427c1 in trx_undo_get_undo_rec_low (roll_ptr=0, is_temp=false, heap=0x7f135c0e8550) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2172 #10 0x0000557c871428bd in trx_undo_get_undo_rec (roll_ptr=0, is_temp=false, heap=0x7f135c0e8550, trx_id=590, name=..., undo_rec=0x7f13a85465b0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2208 #11 0x0000557c87142bb7 in trx_undo_prev_version_build (index_rec=0x7f13ab3a00fe "", index_mtr=0x7f13a8546f00, rec=0x7f135c0630b8 "", index=0x7f135c0bcf38, offsets=0x7f135c063158, heap=0x7f135c0e8550, old_vers=0x7f13a85466a0, v_heap=0x0, vrow=0x0, v_status=0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2294 #12 0x0000557c870f88a6 in row_vers_old_has_index_entry (also_curr=0, rec=0x7f13ab3a00fe "", mtr=0x7f13a8546f00, index=0x7f135c106128, ientry=0x7f135c0bfe58, roll_ptr=0, trx_id=0) at /data/src/10.3/storage/innobase/row/row0vers.cc:1032 #13 0x0000557c872ef935 in row_undo_mod_del_mark_or_remove_sec_low (node=0x7f135c0c5ff8, thr=0x7f135c1073b0, index=0x7f135c106128, entry=0x7f135c0bfe58, mode=2) at /data/src/10.3/storage/innobase/row/row0umod.cc:510 #14 0x0000557c872efc8b in row_undo_mod_del_mark_or_remove_sec (node=0x7f135c0c5ff8, thr=0x7f135c1073b0, index=0x7f135c106128, entry=0x7f135c0bfe58) at /data/src/10.3/storage/innobase/row/row0umod.cc:582 #15 0x0000557c872f10ed in row_undo_mod_upd_exist_sec (node=0x7f135c0c5ff8, thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0umod.cc:1062 #16 0x0000557c872f19d9 in row_undo_mod (node=0x7f135c0c5ff8, thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0umod.cc:1280 #17 0x0000557c870e1869 in row_undo (node=0x7f135c0c5ff8, thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0undo.cc:307 #18 0x0000557c870e19e5 in row_undo_step (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/row/row0undo.cc:355 #19 0x0000557c87032366 in que_thr_step (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/que/que0que.cc:1053 #20 0x0000557c870325b5 in que_run_threads_low (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/que/que0que.cc:1117 #21 0x0000557c870327a6 in que_run_threads (thr=0x7f135c1073b0) at /data/src/10.3/storage/innobase/que/que0que.cc:1157 #22 0x0000557c87145072 in trx_rollback_to_savepoint_low (trx=0x7f13b0058958, savept=0x0) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:112 #23 0x0000557c8714550f in trx_rollback_for_mysql_low (trx=0x7f13b0058958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:183 #24 0x0000557c87145892 in trx_rollback_low (trx=0x7f13b0058958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:215 #25 0x0000557c87145d88 in trx_rollback_for_mysql (trx=0x7f13b0058958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:294 #26 0x0000557c86f25c87 in innobase_rollback (hton=0x557c89e0d690, thd=0x7f135c000b00, rollback_trx=true) at /data/src/10.3/storage/innobase/handler/ha_innodb.cc:4684 #27 0x0000557c86bf72c3 in ha_rollback_trans (thd=0x7f135c000b00, all=true) at /data/src/10.3/sql/handler.cc:1717 #28 0x0000557c86a82991 in trans_rollback (thd=0x7f135c000b00) at /data/src/10.3/sql/transaction.cc:422 #29 0x0000557c86914c54 in mysql_execute_command (thd=0x7f135c000b00) at /data/src/10.3/sql/sql_parse.cc:5808 #30 0x0000557c8691aff8 in mysql_parse (thd=0x7f135c000b00, rawbuf=0x7f135c014c58 "ROLLBACK", length=8, parser_state=0x7f13a85495e0, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:7977 #31 0x0000557c8690883f in dispatch_command (command=COM_QUERY, thd=0x7f135c000b00, packet=0x7f135c1200f1 "ROLLBACK", packet_length=8, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:1825 #32 0x0000557c8690727f in do_command (thd=0x7f135c000b00) at /data/src/10.3/sql/sql_parse.cc:1370 #33 0x0000557c86a6eb74 in do_handle_one_connection (connect=0x557c8a406b50) at /data/src/10.3/sql/sql_connect.cc:1402 #34 0x0000557c86a6e901 in handle_one_connection (arg=0x557c8a406b50) at /data/src/10.3/sql/sql_connect.cc:1308 #35 0x0000557c86f0cc50 in pfs_spawn_thread (arg=0x557c8a423bf0) at /data/src/10.3/storage/perfschema/pfs.cc:1862 #36 0x00007f13b762a494 in start_thread (arg=0x7f13a854a700) at pthread_create.c:333 #37 0x00007f13b5a1093f in clone () from /lib/x86_64-linux-gnu/libc.so.6 10.3 1951e7f05ae7b606, from variation 2 mysqld: /data/src/10.3/storage/innobase/include/trx0rec.ic:68: trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t*, mem_heap_t*): Assertion `len < ((ulint) srv_page_size)' failed. 180203 18:28:08 [ERROR] mysqld got signal 6 ;   #7 0x00007fc8b7c8fee2 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6 #8 0x000055e55feec69b in trx_undo_rec_copy (undo_rec=0x7fc8b15ec000 "\363}\335\232", heap=0x7fc8541b4690) at /data/src/10.3/storage/innobase/include/trx0rec.ic:68 #9 0x000055e55fef37c1 in trx_undo_get_undo_rec_low (roll_ptr=0, is_temp=false, heap=0x7fc8541b4690) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2172 #10 0x000055e55fef38bd in trx_undo_get_undo_rec (roll_ptr=0, is_temp=false, heap=0x7fc8541b4690, trx_id=1598, name=..., undo_rec=0x7fc8b00935c0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2208 #11 0x000055e55fef3bb7 in trx_undo_prev_version_build (index_rec=0x7fc8b16b00fe "", index_mtr=0x7fc8b0093f10, rec=0x7fc8541b4288 "", index=0x7fc8540f95a8, offsets=0x7fc8541b4328, heap=0x7fc8541b4690, old_vers=0x7fc8b00936b0, v_heap=0x0, vrow=0x0, v_status=0) at /data/src/10.3/storage/innobase/trx/trx0rec.cc:2294 #12 0x000055e55fea98a6 in row_vers_old_has_index_entry (also_curr=0, rec=0x7fc8b16b00fe "", mtr=0x7fc8b0093f10, index=0x7fc8541dcc08, ientry=0x7fc8540b90e8, roll_ptr=0, trx_id=0) at /data/src/10.3/storage/innobase/row/row0vers.cc:1032 #13 0x000055e5600a0935 in row_undo_mod_del_mark_or_remove_sec_low (node=0x7fc8541dda38, thr=0x7fc8541bd750, index=0x7fc8541dcc08, entry=0x7fc8540b90e8, mode=2) at /data/src/10.3/storage/innobase/row/row0umod.cc:510 #14 0x000055e5600a0c8b in row_undo_mod_del_mark_or_remove_sec (node=0x7fc8541dda38, thr=0x7fc8541bd750, index=0x7fc8541dcc08, entry=0x7fc8540b90e8) at /data/src/10.3/storage/innobase/row/row0umod.cc:582 #15 0x000055e5600a20ed in row_undo_mod_upd_exist_sec (node=0x7fc8541dda38, thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0umod.cc:1062 #16 0x000055e5600a29d9 in row_undo_mod (node=0x7fc8541dda38, thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0umod.cc:1280 #17 0x000055e55fe92869 in row_undo (node=0x7fc8541dda38, thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0undo.cc:307 #18 0x000055e55fe929e5 in row_undo_step (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/row/row0undo.cc:355 #19 0x000055e55fde3366 in que_thr_step (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/que/que0que.cc:1053 #20 0x000055e55fde35b5 in que_run_threads_low (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/que/que0que.cc:1117 #21 0x000055e55fde37a6 in que_run_threads (thr=0x7fc8541bd750) at /data/src/10.3/storage/innobase/que/que0que.cc:1157 #22 0x000055e55fef6072 in trx_rollback_to_savepoint_low (trx=0x7fc8b1b93958, savept=0x0) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:112 #23 0x000055e55fef650f in trx_rollback_for_mysql_low (trx=0x7fc8b1b93958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:183 #24 0x000055e55fef6892 in trx_rollback_low (trx=0x7fc8b1b93958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:215 #25 0x000055e55fef6d88 in trx_rollback_for_mysql (trx=0x7fc8b1b93958) at /data/src/10.3/storage/innobase/trx/trx0roll.cc:294 #26 0x000055e55fcd6c87 in innobase_rollback (hton=0x55e561c50320, thd=0x7fc854000b00, rollback_trx=false) at /data/src/10.3/storage/innobase/handler/ha_innodb.cc:4684 #27 0x000055e55f9a82c3 in ha_rollback_trans (thd=0x7fc854000b00, all=false) at /data/src/10.3/sql/handler.cc:1717 #28 0x000055e55f8340b3 in trans_rollback_stmt (thd=0x7fc854000b00) at /data/src/10.3/sql/transaction.cc:582 #29 0x000055e55f6c76e9 in mysql_execute_command (thd=0x7fc854000b00) at /data/src/10.3/sql/sql_parse.cc:6293 #30 0x000055e55f6cbff8 in mysql_parse (thd=0x7fc854000b00, rawbuf=0x7fc854014c58 "UPDATE t1 SET col1 = 3 LIMIT 2", length=30, parser_state=0x7fc8b00965e0, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:7977 #31 0x000055e55f6b983f in dispatch_command (command=COM_QUERY, thd=0x7fc854000b00, packet=0x7fc854120081 "UPDATE t1 SET col1 = 3 LIMIT 2", packet_length=30, is_com_multi=false, is_next_command=false) at /data/src/10.3/sql/sql_parse.cc:1825 #32 0x000055e55f6b827f in do_command (thd=0x7fc854000b00) at /data/src/10.3/sql/sql_parse.cc:1370 #33 0x000055e55f81fb74 in do_handle_one_connection (connect=0x55e5621c3f60) at /data/src/10.3/sql/sql_connect.cc:1402 #34 0x000055e55f81f901 in handle_one_connection (arg=0x55e5621c3f60) at /data/src/10.3/sql/sql_connect.cc:1308 #35 0x000055e55fcbdc50 in pfs_spawn_thread (arg=0x55e562266b80) at /data/src/10.3/storage/perfschema/pfs.cc:1862 #36 0x00007fc8b9966494 in start_thread (arg=0x7fc8b0097700) at pthread_create.c:333 #37 0x00007fc8b7d4c93f in clone () from /lib/x86_64-linux-gnu/libc.so.6

          By luck, the following reduced version of the test seems to crash every time:

          --source include/have_innodb.inc
          CREATE TABLE t1 (a INT, b TEXT, c INT, FULLTEXT KEY ftidx (b)) ENGINE = InnoDB;
          INSERT INTO t1 (a,b) VALUES (2,REPEAT('2',8193)), (2,REPEAT('2',8193));
          ALTER TABLE t1 DROP COLUMN a, CHANGE COLUMN c a INT, ADD UNIQUE KEY (a);
          --error ER_DUP_ENTRY
          UPDATE t1 SET a = 3;
          DROP TABLE t1;
          

          marko Marko Mäkelä added a comment - By luck, the following reduced version of the test seems to crash every time: --source include/have_innodb.inc CREATE TABLE t1 (a INT , b TEXT, c INT , FULLTEXT KEY ftidx (b)) ENGINE = InnoDB; INSERT INTO t1 (a,b) VALUES (2,REPEAT( '2' ,8193)), (2,REPEAT( '2' ,8193)); ALTER TABLE t1 DROP COLUMN a, CHANGE COLUMN c a INT , ADD UNIQUE KEY (a); --error ER_DUP_ENTRY UPDATE t1 SET a = 3; DROP TABLE t1;

          A little simpler test:

          --source include/have_innodb.inc
          CREATE TABLE t1 (a INT UNIQUE, b TEXT, FULLTEXT KEY ftidx (b)) ENGINE = InnoDB;
          INSERT INTO t1 VALUES (NULL,REPEAT('2',8193));
          ALTER TABLE t1 FORCE, ALGORITHM=COPY;
          BEGIN;
          UPDATE t1 SET a = 0;
          ROLLBACK;
          DROP TABLE t1;
          

          This should be roughly equivalent to the above, with the difference that the undo log would contain 1 record instead of 2 during the rollback. For me, it crashes only at about 80% probability, which makes debugging challenging. It does not crash on 10.2 or bb-10.2-ext.

          marko Marko Mäkelä added a comment - A little simpler test: --source include/have_innodb.inc CREATE TABLE t1 (a INT UNIQUE , b TEXT, FULLTEXT KEY ftidx (b)) ENGINE = InnoDB; INSERT INTO t1 VALUES ( NULL ,REPEAT( '2' ,8193)); ALTER TABLE t1 FORCE , ALGORITHM=COPY; BEGIN ; UPDATE t1 SET a = 0; ROLLBACK ; DROP TABLE t1; This should be roughly equivalent to the above, with the difference that the undo log would contain 1 record instead of 2 during the rollback. For me, it crashes only at about 80% probability, which makes debugging challenging. It does not crash on 10.2 or bb-10.2-ext.

          In the stack traces, the roll_ptr=0 is simply outright wrong. The undo log page number component of the DB_ROLL_PTR can never possibly be 0.
          In a locally produced core dump, the rec in trx_undo_prev_version_build() looks corrupted: DB_TRX_ID=208 but DB_ROLL_PTR=0. The 208 is wrong, because trx_sys.m_max_trx_id=247 and trx->id=246.
          This rec is not located in a buffer pool page; it is a possibly patched copy of a buffer page record. In the clustered index root page we have the following:

          00000060: 0200 1b69 6e66 696d 756d 0002 000b 0000  ...infimum......
          00000070: 7375 7072 656d 756d 0200 0010 fff2 0000  supremum........
          00000080: 0000 0205 0000 0000 00f6 3a00 0001 5801  ..........:...X.
          00000090: 1080 0000 0000 0000 0000 0000 0100 0000  ................
          

          The record header starts at 0x78, and the payload starts at 0x7e: (DB_ROW_ID=0x205, DB_TRX_ID=246, DB_ROLL_PTR=…, a=0,b=NULL,FTS_DOC_ID=1).
          It looks like when the old version of rec was constructed, the DB_TRX_ID,DB_ROLL_PTR were not initialized correctly. The old version of record should have been the same as the latest one, only a=NULL instead of a=0. Everything else in the record looks OK except the system fields:

          (gdb) p/x offsets[1]@4+offsets[1]
          $1 = {0x6, 0x61a0001da926, 0x61800006a108, 0x8000000000000006, 0x6, 0xc, 0x13, 
            0x8000000000000013, 0x8000000000000013, 0x1b}
          (gdb) p/x *rec@0x1b
          $2 = {0x0, 0x0, 0x0, 0x0, 0x2, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0xd0, 
            0x0 <repeats 14 times>, 0x1}
          (gdb) p/x rec[-6]@6
          $3 = {0x3, 0xe, 0x0, 0x10, 0xbe, 0xbe}
          

          Note: in the memory copy of the record, the fixed-length record header bytes (the last 5 bytes) can be garbage. A tell-tale sign is the two 0xbe garbage bytes instead of a valid next-record pointer.
          The latest DB_ROLL_PTR carries the following components:

          • offset=0x110
          • page=0x158
          • rseg_id=0x3a
          • is_insert=0

          In the undo log page, we indeed do have that garbage DB_TRX_ID=0xd0 starting at 0x110. The complete undo page dump follows:

          000000 00 00 00 00 00 00 01 58 00 00 00 00 00 00 00 00
          000010 00 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00
          000020 00 00 00 00 00 00 00 00 01 10 01 44 ff ff ff ff
          000030 00 00 ff ff ff ff 00 00 00 01 00 56 00 00 00 00
          000040 00 00 01 42 0f f2 00 00 00 01 00 00 01 58 00 2c
          000050 00 00 01 58 00 2c 00 00 00 00 00 00 00 f6 00 00
          000060 00 00 00 00 00 00 00 01 01 10 00 00 00 00 00 00
          000070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
          *
          000110 01 44 0c 00 50 00 00 00 00 00 d0 00 00 00 00 00
          000120 06 00 00 00 00 02 05 01 03 f0 ff ff ff ff 00 14
          000130 05 08 00 00 00 00 00 00 00 01 00 06 00 00 00 00
          000140 02 05 01 10 00 00 00 00 00 00 00 00 00 00 00 00
          000150 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
          *
          004000
          

          I will have to find out how we wrote that garbage to the undo log record.
          Note: the first two bytes 0x0144 are the next-record pointer. The actual undo log record type is TRX_UNDO_UPD_EXIST_REC=12. Everything looks valid, except for the old values of the system columns.

          marko Marko Mäkelä added a comment - In the stack traces, the roll_ptr=0 is simply outright wrong. The undo log page number component of the DB_ROLL_PTR can never possibly be 0. In a locally produced core dump, the rec in trx_undo_prev_version_build() looks corrupted: DB_TRX_ID=208 but DB_ROLL_PTR=0 . The 208 is wrong, because trx_sys.m_max_trx_id=247 and trx->id=246 . This rec is not located in a buffer pool page; it is a possibly patched copy of a buffer page record. In the clustered index root page we have the following: 00000060: 0200 1b69 6e66 696d 756d 0002 000b 0000 ...infimum...... 00000070: 7375 7072 656d 756d 0200 0010 fff2 0000 supremum........ 00000080: 0000 0205 0000 0000 00f6 3a00 0001 5801 ..........:...X. 00000090: 1080 0000 0000 0000 0000 0000 0100 0000 ................ The record header starts at 0x78, and the payload starts at 0x7e: (DB_ROW_ID=0x205, DB_TRX_ID=246, DB_ROLL_PTR=…, a=0,b=NULL,FTS_DOC_ID=1). It looks like when the old version of rec was constructed, the DB_TRX_ID,DB_ROLL_PTR were not initialized correctly. The old version of record should have been the same as the latest one, only a=NULL instead of a=0. Everything else in the record looks OK except the system fields: (gdb) p/x offsets[1]@4+offsets[1] $1 = {0x6, 0x61a0001da926, 0x61800006a108, 0x8000000000000006, 0x6, 0xc, 0x13, 0x8000000000000013, 0x8000000000000013, 0x1b} (gdb) p/x *rec@0x1b $2 = {0x0, 0x0, 0x0, 0x0, 0x2, 0x5, 0x0, 0x0, 0x0, 0x0, 0x0, 0xd0, 0x0 <repeats 14 times>, 0x1} (gdb) p/x rec[-6]@6 $3 = {0x3, 0xe, 0x0, 0x10, 0xbe, 0xbe} Note: in the memory copy of the record, the fixed-length record header bytes (the last 5 bytes) can be garbage. A tell-tale sign is the two 0xbe garbage bytes instead of a valid next-record pointer. The latest DB_ROLL_PTR carries the following components: offset=0x110 page=0x158 rseg_id=0x3a is_insert=0 In the undo log page, we indeed do have that garbage DB_TRX_ID=0xd0 starting at 0x110 . The complete undo page dump follows: 000000 00 00 00 00 00 00 01 58 00 00 00 00 00 00 00 00 000010 00 00 00 00 00 00 00 00 00 02 00 00 00 00 00 00 000020 00 00 00 00 00 00 00 00 01 10 01 44 ff ff ff ff 000030 00 00 ff ff ff ff 00 00 00 01 00 56 00 00 00 00 000040 00 00 01 42 0f f2 00 00 00 01 00 00 01 58 00 2c 000050 00 00 01 58 00 2c 00 00 00 00 00 00 00 f6 00 00 000060 00 00 00 00 00 00 00 01 01 10 00 00 00 00 00 00 000070 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 000110 01 44 0c 00 50 00 00 00 00 00 d0 00 00 00 00 00 000120 06 00 00 00 00 02 05 01 03 f0 ff ff ff ff 00 14 000130 05 08 00 00 00 00 00 00 00 01 00 06 00 00 00 00 000140 02 05 01 10 00 00 00 00 00 00 00 00 00 00 00 00 000150 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 004000 I will have to find out how we wrote that garbage to the undo log record. Note: the first two bytes 0x0144 are the next-record pointer. The actual undo log record type is TRX_UNDO_UPD_EXIST_REC=12 . Everything looks valid, except for the old values of the system columns.

          Just in case, I will backport an applicable subset of this fix to 10.2, in order to prevent any potential corruption due to MDEV-11415 writing DB_ROLL_PTR=0 in ALGORITHM=COPY operations.

          marko Marko Mäkelä added a comment - Just in case, I will backport an applicable subset of this fix to 10.2, in order to prevent any potential corruption due to MDEV-11415 writing DB_ROLL_PTR=0 in ALGORITHM=COPY operations.

          People

            marko Marko Mäkelä
            elenst Elena Stepanova
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.