Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-37736

Conflict between ALTER TABLE…ANALYZE PARTITION and the background calculation of InnoDB statistics

    XMLWordPrintable

Details

    • Can result in hang or crash

    Description

      This deadlock/crash was found during testing:-

      commt id:-origin/MDEV-37482 e0a4a1dc3cfa262266f38871869027c8359e9e4f

      # 2025-09-23T08:33:35 [965079] Thread 4 (Thread 0x7f9abfdf2640 (LWP 997119)):
      # 2025-09-23T08:33:35 [965079] #0  0x00007f9aecd0fc9b in sched_yield () at ../sysdeps/unix/syscall-template.S:120
      # 2025-09-23T08:33:35 [965079] #1  0x0000556f151d4ee2 in __gthread_yield () at /usr/include/x86_64-linux-gnu/c++/11/bits/gthr-default.h:693
      # 2025-09-23T08:33:35 [965079] #2  std::this_thread::yield () at /usr/include/c++/11/bits/std_thread.h:329
      # 2025-09-23T08:33:35 [965079] #3  purge_sys_t::wait_FTS (this=this@entry=0x556f16a4f140 <purge_sys>, also_sys=also_sys@entry=false) at /data/Server/MDEV-37482A/storage/innobase/trx/trx0purge.cc:1080
      # 2025-09-23T08:33:35 [965079] #4  0x0000556f151d7352 in purge_sys_t::close_and_reopen (this=this@entry=0x556f16a4f140 <purge_sys>, id=<optimized out>, thd=thd@entry=0x556f17edf958, mdl=mdl@entry=0x7f9abfdf19a8) at /data/Server/MDEV-37482A/storage/innobase/trx/trx0purge.cc:1188
      # 2025-09-23T08:33:35 [965079] #5  0x0000556f151dad5e in trx_purge_attach_undo_recs (thd=thd@entry=0x556f17edf958, n_work_items=n_work_items@entry=0x7f9abfdf1ac8) at /data/Server/MDEV-37482A/storage/innobase/trx/trx0purge.cc:1270
      # 2025-09-23T08:33:35 [965079] #6  0x0000556f151db560 in trx_purge (n_tasks=<optimized out>, n_tasks@entry=4, history_size=375) at /data/Server/MDEV-37482A/storage/innobase/trx/trx0purge.cc:1388
      # 2025-09-23T08:33:35 [965079] #7  0x0000556f151c47ee in purge_coordinator_state::do_purge (this=this@entry=0x556f16a4e3a0 <purge_state>) at /data/Server/MDEV-37482A/storage/innobase/srv/srv0srv.cc:1423
      # 2025-09-23T08:33:35 [965079] #8  0x0000556f151c3e8c in purge_coordinator_callback () at /data/Server/MDEV-37482A/storage/innobase/srv/srv0srv.cc:1507
      # 2025-09-23T08:33:35 [965079] #9  0x0000556f153d26b8 in tpool::task_group::execute (this=0x556f16a4e1c0 <purge_coordinator_task_group>, t=t@entry=0x556f16a4e120 <purge_coordinator_task>) at /data/Server/MDEV-37482A/tpool/task_group.cc:73
      # 2025-09-23T08:33:35 [965079] #10 0x0000556f153d2a8b in tpool::task::execute (this=0x556f16a4e120 <purge_coordinator_task>) at /data/Server/MDEV-37482A/tpool/task.cc:32
      # 2025-09-23T08:33:35 [965079] #11 0x0000556f153cefbd in tpool::thread_pool_generic::worker_main (this=0x556f17b236b0, thread_var=0x556f17b23b20) at /data/Server/MDEV-37482A/tpool/tpool_generic.cc:529
      # 2025-09-23T08:33:35 [965079] #12 0x0000556f153cf215 in std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__t=<optimized out>, __f=<optimized out>) at /usr/include/c++/11/bits/invoke.h:74
      # 2025-09-23T08:33:35 [965079] #13 std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> (__fn=<optimized out>) at /usr/include/c++/11/bits/invoke.h:96
      # 2025-09-23T08:33:35 [965079] #14 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul> (this=<optimized out>) at /usr/include/c++/11/bits/std_thread.h:259
      # 2025-09-23T08:33:35 [965079] #15 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator() (this=<optimized out>) at /usr/include/c++/11/bits/std_thread.h:266
      # 2025-09-23T08:33:35 [965079] #16 std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run (this=<optimized out>) at /usr/include/c++/11/bits/std_thread.h:211
      # 2025-09-23T08:33:35 [965079] #17 0x00007f9aed015253 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
      # 2025-09-23T08:33:35 [965079] #18 0x00007f9aecc9bac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
      # 2025-09-23T08:33:35 [965079] #19 0x00007f9aecd2d850 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
      
      

      As per Marko's analysis:-
      This hang is unrelated to these changes, and a conflict between ALTER TABLE…ANALYZE PARTITION and the background calculation of InnoDB statistics:

      Thread 22 (Thread 0x7f9abcdec640 (LWP 999238)):
      #10 0x0000556f152d7ee6 in dict_acquire_mdl_shared<false> (table=table@entry=0x7f9a68060a88, thd=thd@entry=0x7f9a14001a98, mdl=mdl@entry=0x7f9abcdeba48, table_op=table_op@entry=DICT_TABLE_OP_NORMAL) at /data/Server/MDEV-37482A/storage/innobase/dict/dict0dict.cc:803
      #11 0x0000556f152cdb25 in dict_table_open_on_id (table_id=505, dict_locked=dict_locked@entry=false, table_op=table_op@entry=DICT_TABLE_OP_NORMAL, thd=thd@entry=0x7f9a14001a98, mdl=mdl@entry=0x7f9abcdeba48) at /data/Server/MDEV-37482A/storage/innobase/dict/dict0dict.cc:835
      #12 0x0000556f1531c0e1 in dict_stats_process_entry_from_recalc_pool (thd=0x7f9a14001a98) at /data/Server/MDEV-37482A/storage/innobase/dict/dict0stats_bg.cc:302
      #13 0x0000556f1531d6d8 in dict_stats_func () at /data/Server/MDEV-37482A/storage/innobase/dict/dict0stats_bg.cc:385
      


      This background task is waiting for MDL on the name test.a in order to compute statistics for test/a#P#p1 (partition p1 of that table). That is blocking ALTER TABLE a ANALYZE PARTITION p0,p1,p2 in another thread:

      Thread 36 (Thread 0x7f9acc5b0640 (LWP 980101)):
      #0  __futex_abstimed_wait_common64 (private=0, cancel=true, abstime=0x0, op=393, expected=0, futex_word=0x556f16a54628 <recalc_pool_cond+40>) at ./nptl/futex-internal.c:57
      #1  __futex_abstimed_wait_common (cancel=true, private=0, abstime=0x0, clockid=0, expected=0, futex_word=0x556f16a54628 <recalc_pool_cond+40>) at ./nptl/futex-internal.c:87
      #2  __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x556f16a54628 <recalc_pool_cond+40>, expected=expected@entry=0, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=0) at ./nptl/futex-internal.c:139
      #3  0x00007f9aecc9aa41 in __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x556f16a54668 <recalc_pool_mutex+40>, cond=0x556f16a54600 <recalc_pool_cond>) at ./nptl/pthread_cond_wait.c:503
      #4  ___pthread_cond_wait (cond=cond@entry=0x556f16a54600 <recalc_pool_cond>, mutex=mutex@entry=0x556f16a54668 <recalc_pool_mutex+40>) at ./nptl/pthread_cond_wait.c:627
      #5  0x0000556f15440bb4 in safe_cond_wait (cond=0x556f16a54600 <recalc_pool_cond>, mp=0x556f16a54640 <recalc_pool_mutex>, file=0x556f159678b8 "/data/Server/MDEV-37482A/storage/innobase/dict/dict0stats_bg.cc", line=225) at /data/Server/MDEV-37482A/mysys/thr_mutex.c:489
      #6  0x0000556f15319d88 in dict_stats_recalc_pool_del (id=<optimized out>, have_mdl_exclusive=have_mdl_exclusive@entry=false) at /data/Server/MDEV-37482A/storage/innobase/dict/dict0stats_bg.cc:225
      #7  0x0000556f14f7969f in ha_innobase::info_low (this=0x7f9a880597e8, flag=flag@entry=28, is_analyze=is_analyze@entry=true) at /data/Server/MDEV-37482A/storage/innobase/handler/ha_innodb.cc:14884
      #8  0x0000556f14f7a587 in ha_innobase::analyze (this=<optimized out>) at /data/Server/MDEV-37482A/storage/innobase/handler/ha_innodb.cc:15221
      #9  0x0000556f14c055d4 in handler::ha_analyze (this=0x7f9a880597e8, thd=0x7f9a88000d58, check_opt=check_opt@entry=0x7f9a880063e8) at /data/Server/MDEV-37482A/sql/handler.cc:5650
      #10 0x0000556f14f03606 in ha_partition::handle_opt_part (this=this@entry=0x7f9a88058688, thd=thd@entry=0x7f9a88000d58, check_opt=check_opt@entry=0x7f9a880063e8, part_id=part_id@entry=1, flag=flag@entry=2) at /data/Server/MDEV-37482A/sql/ha_partition.cc:1411
      #11 0x0000556f14f03752 in ha_partition::handle_opt_partitions (this=this@entry=0x7f9a88058688, thd=0x7f9a88000d58, check_opt=0x7f9a880063e8, flag=flag@entry=2) at /data/Server/MDEV-37482A/sql/ha_partition.cc:1591
      #12 0x0000556f14f039d1 in ha_partition::analyze (this=0x7f9a88058688, thd=<optimized out>, check_opt=<optimized out>) at /data/Server/MDEV-37482A/sql/ha_partition.cc:1291
      #13 0x0000556f14c055d4 in handler::ha_analyze (this=0x7f9a88058688, thd=0x7f9a88000d58, check_opt=0x7f9a880063e8) at /data/Server/MDEV-37482A/sql/handler.cc:5650
      #14 0x0000556f14a3d9d7 in mysql_admin_table (thd=thd@entry=0x7f9a88000d58, tables=tables@entry=0x7f9a88015a28, check_opt=check_opt@entry=0x7f9a880063e8, operator_name=operator_name@entry=0x556f15f463d0 <msg_analyze>, lock_type=lock_type@entry=TL_READ_NO_INSERT, org_open_for_modify=org_open_for_modify@entry=true, no_errors_from_open=<optimized out>, extra_open_options=<optimized out>, prepare_func=<optimized out>, operator_func=<optimized out>, view_operator_func=<optimized out>, is_cmd_replicated=<optimized out>) at /data/Server/MDEV-37482A/sql/sql_admin.cc:939
      

      This in turn is likely to block the purge subsystem and many other threads. Purge was attempting to acquire MDL for a table that is known by table_id=0x1f8. The id of the partition test/a#P#p1 would be 0x1f9. I was able to look up that table in dict_sys.table_id_hash:

      (gdb) print ((dict_table_t*)0x7f9a68060a88).id
      $151 = 0x1f9
      (gdb) print ((dict_table_t*)0x7f9a68060a88).name
      $152 = {m_name = 0x7f9a68063550 "test/a#P#p1"}
      

      Core dump is present on pluto:
      /data/results/1758612370/001355

      Attachments

        Activity

          People

            saahil Saahil Alam
            saahil Saahil Alam
            Votes:
            0 Vote for this issue
            Watchers:
            2 Start watching this issue

            Dates

              Created:
              Updated:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.