Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-27115

10.4.22 segfault at SELECT RELEASE_LOCK() in ull_get_key (bad MDL_ticket)

Details

    • Bug
    • Status: Closed (View Workflow)
    • Major
    • Resolution: Fixed
    • 10.4.22
    • 10.4.28
    • Galera
    • None

    Description

      This morning: a crash in ull_get_key because $rax (return value from MDL_ticket::get_key) held a bad pointer.

      This is the first time I've seen this particular crash. And this is on the first nodes we have running with 10.4.22.

      Versions:

      galera-4 - 26.4.9-bionic
      galera-arbitrator-4 - 26.4.9-bionic
      libmariadb3:amd64 - 1:10.4.22+maria~bionic
      mariadb-backup - 1:10.4.22+maria~bionic
      mariadb-client-10.4 - 1:10.4.22+maria~bionic
      mariadb-client-core-10.4 - 1:10.4.22+maria~bionic
      mariadb-common - 1:10.4.22+maria~bionic
      mariadb-server - 1:10.4.22+maria~bionic
      mariadb-server-10.4 - 1:10.4.22+maria~bionic
      mariadb-server-core-10.4 - 1:10.4.22+maria~bionic
      

      Short backtrace:

      (gdb) bt
      #0  ull_get_key (ptr=<optimized out>, length=0x7fdfb66c5938, not_used=<optimized out>) at ./sql/item_func.cc:4002
      #1  0x000055657d7b3413 in my_hash_key (first=1 '\001', length=0x7fdfb66c5938, record=<optimized out>, hash=0x7fde94002e58) at ./mysys/hash.c:196
      #2  hashcmp (pos=0x7fde9431ea38, length=43, key=0x7fdfb66c59a8 "\bb28527f1559cc629795224a389d14a07ba41cb80", hash=0x7fde94002e58) at ./mysys/hash.c:371
      #3  my_hash_first_from_hash_value (hash=hash@entry=0x7fde94002e58, hash_value=<optimized out>, key=key@entry=0x7fdfb66c59a8 "\bb28527f1559cc629795224a389d14a07ba41cb80", length=43, current_record=current_record@entry=0x7fdfb66c598c)
          at ./mysys/hash.c:288
      #4  0x000055657d7b3538 in my_hash_first (hash=hash@entry=0x7fde94002e58, key=key@entry=0x7fdfb66c59a8 "\bb28527f1559cc629795224a389d14a07ba41cb80", length=<optimized out>, current_record=current_record@entry=0x7fdfb66c598c)
          at ./mysys/hash.c:262
      #5  0x000055657d7b3551 in my_hash_search (hash=hash@entry=0x7fde94002e58, key=key@entry=0x7fdfb66c59a8 "\bb28527f1559cc629795224a389d14a07ba41cb80", length=<optimized out>) at ./mysys/hash.c:235
      #6  0x000055657d2afc4f in Item_func_release_lock::val_int (this=0x7fde94010800) at ./sql/item_func.cc:4243
      

      Segfault due to invalid memory access:

      (gdb) disassemble 
      Dump of assembler code for function ull_get_key(unsigned char const*, unsigned long*, char):
         0x000055657d2ab4e0 <+0>:	push   %rbp
         0x000055657d2ab4e1 <+1>:	mov    %rsp,%rbp
         0x000055657d2ab4e4 <+4>:	push   %rbx
         0x000055657d2ab4e5 <+5>:	mov    %rsi,%rbx
         0x000055657d2ab4e8 <+8>:	sub    $0x8,%rsp
         0x000055657d2ab4ec <+12>:	mov    (%rdi),%rdi
         0x000055657d2ab4ef <+15>:	callq  0x55657d122be0 <MDL_ticket::get_key() const>
      => 0x000055657d2ab4f4 <+20>:	movzwl (%rax),%edx
         0x000055657d2ab4f7 <+23>:	add    $0x8,%rax
      

      (gdb) x $rax 
      0x7fde00707063:	Cannot access memory at address 0x7fde00707063
      

      (That's a "cpp" in there.)

      /** Extract a hash key from User_level_lock. */
       
      uchar *ull_get_key(const uchar *ptr, size_t *length,
                         my_bool not_used __attribute__((unused)))
      {
        User_level_lock *ull = (User_level_lock*) ptr;
        MDL_key *key = ull->lock->get_key();
        *length= key->length();
        return (uchar*) key->ptr();
      }
      

      class User_level_lock
      {
      public:
        MDL_ticket *lock;
        int refs;
      };
      

      So, ull might be good or bad, but the MDL_ticket lock is pointing to something that is not a lock (anymore).

      (gdb) disassemble  0x55657d122be0
      Dump of assembler code for function MDL_ticket::get_key() const:
         0x000055657d122be0 <+0>:	push   %rbp
         0x000055657d122be1 <+1>:	mov    0x38(%rdi),%rax
         0x000055657d122be5 <+5>:	mov    %rsp,%rbp
         0x000055657d122be8 <+8>:	pop    %rbp
         0x000055657d122be9 <+9>:	retq   
      End of assembler dump.
      

      So, I would conclude that ull (ptr at entry) is 0x00007fde94311008 (0x8(%r10),%rdi in my_hash_first_from_hash_value) and ull->lock is then 0x00007fde9425bc10:

      (gdb) x/g $r10+8
      0x7fde9431ea40:	0x00007fde94311008
      (gdb) x 0x00007fde94311008
      0x7fde94311008:	0x00007fde9425bc10
      

      Examining that memory:

      (gdb) print *(User_level_lock*)0x00007fde94311008
      $9 = {lock = 0x7fde9425bc10, refs = 1}
       
      (gdb) print *((User_level_lock*)0x00007fde94311008)->lock
      $11 = {<MDL_wait_for_subgraph> = {_vptr.MDL_wait_for_subgraph = 0x7fde9404dd40},
             next_in_context = 0x622f746f62646c69, prev_in_context = 0x2f746f62646c6975,
             next_in_lock = 0x61672f646c697562, prev_in_lock = 0x6372732f6172656c, 
             m_type = 1920169775, m_ctx = 0x2e72656469766f72, m_lock = 0x7fde00707063}
      

      That last one is definitely garbage:

      (gdb) print sizeof(MDL_ticket)
      $12 = 64
       
      (gdb) x/8gx 0x7fde9425bc10
      0x7fde9425bc10:	0x00007fde9404dd40	0x622f746f62646c69
      0x7fde9425bc20:	0x2f746f62646c6975	0x61672f646c697562
      0x7fde9425bc30:	0x6372732f6172656c	0x705f70657273772f
      0x7fde9425bc40:	0x2e72656469766f72	0x00007fde00707063
       
      (gdb) print (char*)0x7fde9425bc10+8
      $13 = 0x7fde9425bc18 "ildbot/buildbot/build/galera/src/wsrep_provider.cpp"
      

      So, when calling get_key on the lock 0x7fde9425bc10 we got:

      MDL_key *MDL_ticket::get_key() const
      {
              return &m_lock->key;
      }
      

      That is:

      (gdb) print &((MDL_ticket*)0x7fde9425bc10)->m_lock.key
      $15 = (MDL_key *) 0x7fde00707063
      

      Assumptions:

      • ull->refs looks okay, so ULL is possibly still good;
      • ull->lock (MDL_ticket) is received in Item_func_get_lock::val_int at one point;
      • now the MDL_ticket memory is overwritten.

      Relevant SQL:

      SELECT RELEASE_LOCK('b28527f1559cc629795224a389d14a07ba41cb80')
      

      Full backtrace attached.

      Is there anything I can get you? I have the core dump, so if you're interested in other threads, I can take a peek.

      Cheers,
      Walter Doekes
      OSSO B.V.

      Attachments

        Issue Links

          Activity

            wdoekes Walter Doekes added a comment -

            Another crash, this time not in ull_get_key, but when mutating I_P_List:

            #0  I_P_List<MDL_ticket, I_P_List_adapter<MDL_ticket, &MDL_ticket::next_in_context, &MDL_ticket::prev_in_context>, I_P_List_null_counter, I_P_List_no_push_back<MDL_ticket> >::remove (this=<optimized out>, a=0x7f9f484545a0) at ./sql/sql_plist.h:129
            #1  MDL_context::release_lock (this=<optimized out>, duration=<optimized out>, ticket=0x7f9f484545a0) at ./sql/mdl.cc:2851
            #2  0x000055b3350a02c2 in MDL_context::release_lock (this=<optimized out>, ticket=<optimized out>) at ./sql/mdl.cc:2869
            #3  0x000055b33522cc7c in Item_func_release_lock::val_int (this=0x7f9f48011050) at ./sql/item_func.cc:4256
            

            ^- but, again at Item_func_release_lock::val_int

            #4  0x000055b335103af9 in Type_handler::Item_send_long (this=<optimized out>, item=0x7f9f48011050, protocol=0x7f9f48001158, buf=<optimized out>) at ./sql/sql_type.cc:7102
            #5  0x000055b334ef7b18 in Protocol::send_result_set_row (this=this@entry=0x7f9f48001158, row_items=row_items@entry=0x7f9f48010b20) at ./sql/protocol.cc:1034
            #6  0x000055b334f63413 in select_send::send_data (this=0x7f9f48011aa8, items=...) at ./sql/sql_class.cc:3070
            #7  0x000055b3350097ec in JOIN::exec_inner (this=this@entry=0x7f9f48011ad0) at ./sql/sql_select.cc:4402
            #8  0x000055b3350099e3 in JOIN::exec (this=this@entry=0x7f9f48011ad0) at ./sql/sql_select.cc:4316
            #9  0x000055b335007c0d in mysql_select (thd=thd@entry=0x7f9f48000c08, tables=0x0, wild_num=0, fields=..., conds=<optimized out>, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748608, result=0x7f9f48011aa8, unit=0x7f9f48004978, select_lex=0x7f9f480109d8) at ./sql/sql_select.cc:4755
            #10 0x000055b335008601 in handle_select (thd=thd@entry=0x7f9f48000c08, lex=lex@entry=0x7f9f480048b8, result=result@entry=0x7f9f48011aa8, setup_tables_done_option=setup_tables_done_option@entry=0) at ./sql/sql_select.cc:436
            #11 0x000055b334fa4521 in execute_sqlcom_select (thd=thd@entry=0x7f9f48000c08, all_tables=0x0) at ./sql/sql_parse.cc:6449
            #12 0x000055b334facf68 in mysql_execute_command (thd=thd@entry=0x7f9f48000c08) at ./sql/sql_parse.cc:3963
            #13 0x000055b334fb39ea in mysql_parse (thd=thd@entry=0x7f9f48000c08, rawbuf=rawbuf@entry=0x7f9f480108f0 "SELECT RELEASE_LOCK('c125edaa6c363d3a95705e48a0b1f659d141e3dd')", length=length@entry=63, parser_state=parser_state@entry=0x7fa032a594e0, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at ./sql/sql_parse.cc:7995
            #14 0x000055b334fb421a in wsrep_mysql_parse (thd=0x7f9f48000c08, rawbuf=0x7f9f480108f0 "SELECT RELEASE_LOCK('c125edaa6c363d3a95705e48a0b1f659d141e3dd')", length=63, parser_state=0x7fa032a594e0, is_com_multi=<optimized out>, is_next_command=<optimized out>) at ./sql/sql_parse.cc:7799
            #15 0x000055b334fb6ab6 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x7f9f48000c08, packet=packet@entry=0x7f9f4838e219 "", packet_length=packet_length@entry=63, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at ./sql/sql_parse.cc:1843
            #16 0x000055b334fb74ba in do_command (thd=0x7f9f48000c08) at ./sql/sql_parse.cc:1373
            #17 0x000055b33509704e in do_handle_one_connection (connect=connect@entry=0x55b3437ba018) at ./sql/sql_connect.cc:1420
            #18 0x000055b33509710d in handle_one_connection (arg=arg@entry=0x55b3437ba018) at ./sql/sql_connect.cc:1316
            #19 0x000055b3357000aa in pfs_spawn_thread (arg=0x55b343796038) at ./storage/perfschema/pfs.cc:1869
            #20 0x00007fa3f09276db in start_thread (arg=0x7fa032a5a700) at pthread_create.c:463
            #21 0x00007fa3efd2571f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
            

            Same SQL:

            Query (0x7f9f480108f0): SELECT RELEASE_LOCK('c125edaa6c363d3a95705e48a0b1f659d141e3dd')
            

            Crash at:

               0x000055b3350a028b <+43>:	je     0x55b3350a0295 <MDL_context::release_lock(enum_mdl_duration, MDL_ticket*)+53>
               0x000055b3350a028d <+45>:	mov    0x10(%rbx),%rdx
               0x000055b3350a0291 <+49>:	mov    %rdx,0x10(%rax)
               0x000055b3350a0295 <+53>:	mov    0x10(%rbx),%rdx
               0x000055b3350a0299 <+57>:	mov    %rbx,%rdi
            => 0x000055b3350a029c <+60>:	mov    %rax,(%rdx)
               0x000055b3350a029f <+63>:	add    $0x8,%rsp
               0x000055b3350a02a3 <+67>:	pop    %rbx
               0x000055b3350a02a4 <+68>:	pop    %rbp
               0x000055b3350a02a5 <+69>:	jmpq   0x55b33509e770 <MDL_ticket::destroy(MDL_ticket*)>
            

            (gdb) print/x $rax
            $4 = 0x7f9f482b3800
            (gdb) print/x $rdx
            $5 = 0x0
            

            (gdb) x/8ugx $rbx
            0x7f9f484545a0:	0x00007f9f48568f60	0x00007f9f482b3800
            0x7f9f484545b0:	0x0000000000000000	0x0000000000000000
            0x7f9f484545c0:	0x00007f9f44321040	0x0000000000000007
            0x7f9f484545d0:	0x00007f9f48000d28	0x00007f9f44320e38
            

            Let me know if I can get you some values/memory.

            wdoekes Walter Doekes added a comment - Another crash, this time not in ull_get_key, but when mutating I_P_List: #0 I_P_List<MDL_ticket, I_P_List_adapter<MDL_ticket, &MDL_ticket::next_in_context, &MDL_ticket::prev_in_context>, I_P_List_null_counter, I_P_List_no_push_back<MDL_ticket> >::remove (this=<optimized out>, a=0x7f9f484545a0) at ./sql/sql_plist.h:129 #1 MDL_context::release_lock (this=<optimized out>, duration=<optimized out>, ticket=0x7f9f484545a0) at ./sql/mdl.cc:2851 #2 0x000055b3350a02c2 in MDL_context::release_lock (this=<optimized out>, ticket=<optimized out>) at ./sql/mdl.cc:2869 #3 0x000055b33522cc7c in Item_func_release_lock::val_int (this=0x7f9f48011050) at ./sql/item_func.cc:4256 ^- but, again at Item_func_release_lock::val_int #4 0x000055b335103af9 in Type_handler::Item_send_long (this=<optimized out>, item=0x7f9f48011050, protocol=0x7f9f48001158, buf=<optimized out>) at ./sql/sql_type.cc:7102 #5 0x000055b334ef7b18 in Protocol::send_result_set_row (this=this@entry=0x7f9f48001158, row_items=row_items@entry=0x7f9f48010b20) at ./sql/protocol.cc:1034 #6 0x000055b334f63413 in select_send::send_data (this=0x7f9f48011aa8, items=...) at ./sql/sql_class.cc:3070 #7 0x000055b3350097ec in JOIN::exec_inner (this=this@entry=0x7f9f48011ad0) at ./sql/sql_select.cc:4402 #8 0x000055b3350099e3 in JOIN::exec (this=this@entry=0x7f9f48011ad0) at ./sql/sql_select.cc:4316 #9 0x000055b335007c0d in mysql_select (thd=thd@entry=0x7f9f48000c08, tables=0x0, wild_num=0, fields=..., conds=<optimized out>, og_num=0, order=0x0, group=0x0, having=0x0, proc_param=0x0, select_options=2147748608, result=0x7f9f48011aa8, unit=0x7f9f48004978, select_lex=0x7f9f480109d8) at ./sql/sql_select.cc:4755 #10 0x000055b335008601 in handle_select (thd=thd@entry=0x7f9f48000c08, lex=lex@entry=0x7f9f480048b8, result=result@entry=0x7f9f48011aa8, setup_tables_done_option=setup_tables_done_option@entry=0) at ./sql/sql_select.cc:436 #11 0x000055b334fa4521 in execute_sqlcom_select (thd=thd@entry=0x7f9f48000c08, all_tables=0x0) at ./sql/sql_parse.cc:6449 #12 0x000055b334facf68 in mysql_execute_command (thd=thd@entry=0x7f9f48000c08) at ./sql/sql_parse.cc:3963 #13 0x000055b334fb39ea in mysql_parse (thd=thd@entry=0x7f9f48000c08, rawbuf=rawbuf@entry=0x7f9f480108f0 "SELECT RELEASE_LOCK('c125edaa6c363d3a95705e48a0b1f659d141e3dd')", length=length@entry=63, parser_state=parser_state@entry=0x7fa032a594e0, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at ./sql/sql_parse.cc:7995 #14 0x000055b334fb421a in wsrep_mysql_parse (thd=0x7f9f48000c08, rawbuf=0x7f9f480108f0 "SELECT RELEASE_LOCK('c125edaa6c363d3a95705e48a0b1f659d141e3dd')", length=63, parser_state=0x7fa032a594e0, is_com_multi=<optimized out>, is_next_command=<optimized out>) at ./sql/sql_parse.cc:7799 #15 0x000055b334fb6ab6 in dispatch_command (command=command@entry=COM_QUERY, thd=thd@entry=0x7f9f48000c08, packet=packet@entry=0x7f9f4838e219 "", packet_length=packet_length@entry=63, is_com_multi=is_com_multi@entry=false, is_next_command=is_next_command@entry=false) at ./sql/sql_parse.cc:1843 #16 0x000055b334fb74ba in do_command (thd=0x7f9f48000c08) at ./sql/sql_parse.cc:1373 #17 0x000055b33509704e in do_handle_one_connection (connect=connect@entry=0x55b3437ba018) at ./sql/sql_connect.cc:1420 #18 0x000055b33509710d in handle_one_connection (arg=arg@entry=0x55b3437ba018) at ./sql/sql_connect.cc:1316 #19 0x000055b3357000aa in pfs_spawn_thread (arg=0x55b343796038) at ./storage/perfschema/pfs.cc:1869 #20 0x00007fa3f09276db in start_thread (arg=0x7fa032a5a700) at pthread_create.c:463 #21 0x00007fa3efd2571f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Same SQL: Query (0x7f9f480108f0): SELECT RELEASE_LOCK('c125edaa6c363d3a95705e48a0b1f659d141e3dd') Crash at: 0x000055b3350a028b <+43>: je 0x55b3350a0295 <MDL_context::release_lock(enum_mdl_duration, MDL_ticket*)+53> 0x000055b3350a028d <+45>: mov 0x10(%rbx),%rdx 0x000055b3350a0291 <+49>: mov %rdx,0x10(%rax) 0x000055b3350a0295 <+53>: mov 0x10(%rbx),%rdx 0x000055b3350a0299 <+57>: mov %rbx,%rdi => 0x000055b3350a029c <+60>: mov %rax,(%rdx) 0x000055b3350a029f <+63>: add $0x8,%rsp 0x000055b3350a02a3 <+67>: pop %rbx 0x000055b3350a02a4 <+68>: pop %rbp 0x000055b3350a02a5 <+69>: jmpq 0x55b33509e770 <MDL_ticket::destroy(MDL_ticket*)> (gdb) print/x $rax $4 = 0x7f9f482b3800 (gdb) print/x $rdx $5 = 0x0 (gdb) x/8ugx $rbx 0x7f9f484545a0: 0x00007f9f48568f60 0x00007f9f482b3800 0x7f9f484545b0: 0x0000000000000000 0x0000000000000000 0x7f9f484545c0: 0x00007f9f44321040 0x0000000000000007 0x7f9f484545d0: 0x00007f9f48000d28 0x00007f9f44320e38 Let me know if I can get you some values/memory.
            wdoekes Walter Doekes added a comment -

            I'm under the impression that this was caused by:

            https://jira.mariadb.org/browse/MDEV-26803?focusedCommentId=207322&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-207322

            Feel free to close and I'll reopen/create-new if it resurfaces.

            wdoekes Walter Doekes added a comment - I'm under the impression that this was caused by: https://jira.mariadb.org/browse/MDEV-26803?focusedCommentId=207322&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-207322 Feel free to close and I'll reopen/create-new if it resurfaces.

            People

              jplindst Jan Lindström (Inactive)
              wdoekes Walter Doekes
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.