Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-25365

Server hangs or crashes in Query_cache::move_by_type upon FLUSH QUERY CACHE

    XMLWordPrintable

Details

    • Bug
    • Status: Confirmed (View Workflow)
    • Major
    • Resolution: Unresolved
    • 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 10.10
    • 10.4, 10.5, 10.6
    • Query Cache
    • None

    Description

      SET @qcache= @@global.query_cache_type;
      SET GLOBAL query_cache_type= ON;
      SET query_cache_type= ON;
       
      CREATE TABLE t1 (a INT);
      CREATE TABLE t2 (b INT);
      CREATE TABLE t3 (c INT);
      CREATE TABLE t4 (d INT);
      CREATE VIEW v4 AS SELECT * FROM t4;
      CREATE TABLE t5 (e INT);
      CREATE VIEW v5 AS SELECT * FROM t5;
      CREATE TABLE t6 (f INT);
      CREATE VIEW v6 AS SELECT * FROM t6;
      CREATE TABLE t7 (g INT);
       
      # In the next 3 queries the exact tables or views seem to be important
      # (but not the length of the query)
       
      SELECT * FROM t1, t2;
      SELECT * FROM t2;
      SELECT t7.* FROM t3 t3a, t3 t3b, t3 t3c, t3 t3d, t3 t3e, v4 v4a, v4 v4b, v4 v4c, v4 v4d, v5, v6, t6, t7;
       
      # This most likely correlates with the first SELECT,
      # to kick it out from the cache but to keep everything else
       
      ALTER TABLE t1 FORCE;
       
      # Here the length of the query and the presence of (at least) two tables
      # seems to be important, while the exact tables are not
      # The comment compensates for the length
       
      SELECT * FROM t5, t6 /* This query needs to be of a certain length, so I am trying to make the comment at least 175 symbols long. Now I am out of ideas what to write, so.............................. */;
       
      # This is the query cache magic which causes the hang or crash
       
      FLUSH QUERY CACHE;
      RESET QUERY CACHE;
      FLUSH QUERY CACHE;
       
      # Cleanup
      DROP VIEW v4;
      DROP VIEW v5;
      DROP VIEW v6;
      DROP TABLE t1, t2, t3, t4, t5, t6, t7;
      SET GLOBAL query_cache_type= @qcache;
      

      The test case above hangs on the second FLUSH QUERY CACHE (or rather endlessly loops with 100% CPU usage). Stack trace from the running server:

      10.2 c32edd75 debug

      Thread 6 (Thread 0x7f1586858700 (LWP 957641)):
      #0  0x000055880d13dc40 in Query_cache::move_by_type (this=0x55880e4cdb00 <query_cache>, border=0x7f15868568f0, before=0x7f15868568f8, gap=0x7f1586856900, block=0x7f15741930d0) at /data/src/10.2/sql/sql_cache.cc:4349
      #1  0x000055880d13d561 in Query_cache::pack_cache (this=0x55880e4cdb00 <query_cache>) at /data/src/10.2/sql/sql_cache.cc:4253
      #2  0x000055880d1389be in Query_cache::pack (this=0x55880e4cdb00 <query_cache>, thd=0x7f1574000d90, join_limit=524288, iteration_limit=2) at /data/src/10.2/sql/sql_cache.cc:2490
      #3  0x000055880d31d71a in reload_acl_and_cache (thd=0x7f1574000d90, options=131072, tables=0x0, write_to_binlog=0x7f1586856d10) at /data/src/10.2/sql/sql_reload.cc:211
      #4  0x000055880d197a45 in mysql_execute_command (thd=0x7f1574000d90) at /data/src/10.2/sql/sql_parse.cc:5201
      #5  0x000055880d19f43b in mysql_parse (thd=0x7f1574000d90, rawbuf=0x7f15740126f8 "FLUSH QUERY CACHE", length=17, parser_state=0x7f15868575f0, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:7794
      #6  0x000055880d18d664 in dispatch_command (command=COM_QUERY, thd=0x7f1574000d90, packet=0x7f1574008b51 "FLUSH QUERY CACHE", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:1827
      #7  0x000055880d18c15f in do_command (thd=0x7f1574000d90) at /data/src/10.2/sql/sql_parse.cc:1381
      #8  0x000055880d2e6c94 in do_handle_one_connection (connect=0x55881033e120) at /data/src/10.2/sql/sql_connect.cc:1336
      #9  0x000055880d2e69f9 in handle_one_connection (arg=0x55881033e120) at /data/src/10.2/sql/sql_connect.cc:1241
      #10 0x000055880db10f3a in pfs_spawn_thread (arg=0x558810321590) at /data/src/10.2/storage/perfschema/pfs.cc:1869
      #11 0x00007f158cc0e609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #12 0x00007f158c7e8293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
       
      Thread 5 (Thread 0x7f15868a3700 (LWP 957638)):
      #0  futex_wait_cancelable (private=<optimized out>, expected=0, futex_word=0x55880e4d21c8 <COND_manager+40>) at ../sysdeps/nptl/futex-internal.h:183
      #1  __pthread_cond_wait_common (abstime=0x0, clockid=0, mutex=0x55880e4d2108 <LOCK_manager+40>, cond=0x55880e4d21a0 <COND_manager>) at pthread_cond_wait.c:508
      #2  __pthread_cond_wait (cond=0x55880e4d21a0 <COND_manager>, mutex=0x55880e4d2108 <LOCK_manager+40>) at pthread_cond_wait.c:638
      #3  0x000055880db7453b in safe_cond_wait (cond=0x55880e4d21a0 <COND_manager>, mp=0x55880e4d20e0 <LOCK_manager>, file=0x55880dc03ab0 "/data/src/10.2/include/mysql/psi/mysql_thread.h", line=1145) at /data/src/10.2/mysys/thr_mutex.c:491
      #4  0x000055880d188485 in inline_mysql_cond_wait (that=0x55880e4d21a0 <COND_manager>, mutex=0x55880e4d20e0 <LOCK_manager>, src_file=0x55880dc03be0 "/data/src/10.2/sql/sql_manager.cc", src_line=102) at /data/src/10.2/include/mysql/psi/mysql_thread.h:1145
      #5  0x000055880d188929 in handle_manager (arg=0x0) at /data/src/10.2/sql/sql_manager.cc:102
      #6  0x000055880db10f3a in pfs_spawn_thread (arg=0x558810322fe0) at /data/src/10.2/storage/perfschema/pfs.cc:1869
      #7  0x00007f158cc0e609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #8  0x00007f158c7e8293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
       
      Thread 4 (Thread 0x7f15868ee700 (LWP 957637)):
      #0  0x00007f158c70d322 in __GI___sigtimedwait (set=set@entry=0x7f15868ede10, info=info@entry=0x7f15868edd20, timeout=timeout@entry=0x0) at ../sysdeps/unix/sysv/linux/sigtimedwait.c:29
      #1  0x00007f158cc19f6c in __sigwait (set=0x7f15868ede10, sig=0x7f15868edde0) at ../sysdeps/unix/sysv/linux/sigwait.c:28
      #2  0x000055880d099d26 in signal_hand (arg=0x0) at /data/src/10.2/sql/mysqld.cc:3557
      #3  0x000055880db10f3a in pfs_spawn_thread (arg=0x558810317600) at /data/src/10.2/storage/perfschema/pfs.cc:1869
      #4  0x00007f158cc0e609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #5  0x00007f158c7e8293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
       
      Thread 3 (Thread 0x7f15870ef700 (LWP 957636)):
      #0  futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f15870eede0, clockid=<optimized out>, expected=0, futex_word=0x55880ed4f708 <COND_checkpoint+40>) at ../sysdeps/nptl/futex-internal.h:320
      #1  __pthread_cond_wait_common (abstime=0x7f15870eede0, clockid=<optimized out>, mutex=0x55880ed4f648 <LOCK_checkpoint+40>, cond=0x55880ed4f6e0 <COND_checkpoint>) at pthread_cond_wait.c:520
      #2  __pthread_cond_timedwait (cond=0x55880ed4f6e0 <COND_checkpoint>, mutex=0x55880ed4f648 <LOCK_checkpoint+40>, abstime=0x7f15870eede0) at pthread_cond_wait.c:656
      #3  0x000055880db748fd in safe_cond_timedwait (cond=0x55880ed4f6e0 <COND_checkpoint>, mp=0x55880ed4f620 <LOCK_checkpoint>, abstime=0x7f15870eede0, file=0x55880de756e0 "/data/src/10.2/include/mysql/psi/mysql_thread.h", line=1182) at /data/src/10.2/mysys/thr_mutex.c:545
      #4  0x000055880d9ecdc2 in inline_mysql_cond_timedwait (that=0x55880ed4f6e0 <COND_checkpoint>, mutex=0x55880ed4f620 <LOCK_checkpoint>, abstime=0x7f15870eede0, src_file=0x55880de75710 "/data/src/10.2/storage/maria/ma_servicethread.c", src_line=115) at /data/src/10.2/include/mysql/psi/mysql_thread.h:1182
      #5  0x000055880d9ed2d0 in my_service_thread_sleep (control=0x55880e30b9c0 <checkpoint_control>, sleep_time=29000000000) at /data/src/10.2/storage/maria/ma_servicethread.c:115
      #6  0x000055880d9e1bb4 in ma_checkpoint_background (arg=0x1e) at /data/src/10.2/storage/maria/ma_checkpoint.c:709
      #7  0x000055880db10f3a in pfs_spawn_thread (arg=0x5588102e4b80) at /data/src/10.2/storage/perfschema/pfs.cc:1869
      #8  0x00007f158cc0e609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #9  0x00007f158c7e8293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
       
      Thread 2 (Thread 0x7f1588459700 (LWP 957635)):
      #0  futex_abstimed_wait_cancelable (private=<optimized out>, abstime=0x7f1588458e90, clockid=<optimized out>, expected=0, futex_word=0x55880ed60d68 <COND_timer+40>) at ../sysdeps/nptl/futex-internal.h:320
      #1  __pthread_cond_wait_common (abstime=0x7f1588458e90, clockid=<optimized out>, mutex=0x55880ed60ca8 <LOCK_timer+40>, cond=0x55880ed60d40 <COND_timer>) at pthread_cond_wait.c:520
      #2  __pthread_cond_timedwait (cond=0x55880ed60d40 <COND_timer>, mutex=0x55880ed60ca8 <LOCK_timer+40>, abstime=0x7f1588458e90) at pthread_cond_wait.c:656
      #3  0x000055880db748fd in safe_cond_timedwait (cond=0x55880ed60d40 <COND_timer>, mp=0x55880ed60c80 <LOCK_timer>, abstime=0x7f1588458e90, file=0x55880deb0a70 "/data/src/10.2/include/mysql/psi/mysql_thread.h", line=1182) at /data/src/10.2/mysys/thr_mutex.c:545
      #4  0x000055880db758a6 in inline_mysql_cond_timedwait (that=0x55880ed60d40 <COND_timer>, mutex=0x55880ed60c80 <LOCK_timer>, abstime=0x7f1588458e90, src_file=0x55880deb0aa0 "/data/src/10.2/mysys/thr_timer.c", src_line=292) at /data/src/10.2/include/mysql/psi/mysql_thread.h:1182
      #5  0x000055880db763d9 in timer_handler (arg=0x0) at /data/src/10.2/mysys/thr_timer.c:292
      #6  0x000055880db10f3a in pfs_spawn_thread (arg=0x5588101d9a20) at /data/src/10.2/storage/perfschema/pfs.cc:1869
      #7  0x00007f158cc0e609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #8  0x00007f158c7e8293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
       
      Thread 1 (Thread 0x7f158c6c1740 (LWP 957634)):
      #0  0x00007f158c7dbaff in __GI___poll (fds=0x7ffdd2f1ab30, nfds=2, timeout=-1) at ../sysdeps/unix/sysv/linux/poll.c:29
      #1  0x000055880d09fc0a in handle_connections_sockets () at /data/src/10.2/sql/mysqld.cc:6724
      #2  0x000055880d09eeca in mysqld_main (argc=128, argv=0x5588100ba040) at /data/src/10.2/sql/mysqld.cc:6192
      #3  0x000055880d09343d in main (argc=6, argv=0x7ffdd2f1ae38) at /data/src/10.2/sql/main.cc:25
      [Inferior 1 (process 957634) detached]
      

      Consequent snapshots look pretty much the same, except the address:

      < #0  0x000055880d13dc1e in Query_cache::move_by_type (this=0x55880e4cdb00 <query_cache>, border=0x7f15868568f0, before=0x7f15868568f8, gap=0x7f1586856900, block=0x7f15741930d0) at /data/src/10.2/sql/sql_cache.cc:4349
      ---
      > #0  0x000055880d13dc2e in Query_cache::move_by_type (this=0x55880e4cdb00 <query_cache>, border=0x7f15868568f0, before=0x7f15868568f8, gap=0x7f1586856900, block=0x7f15741930d0) at /data/src/10.2/sql/sql_cache.cc:4350
      

      The hang currently reproducible for me every time on all of 10.2-10.6, debug, release, ASAN builds, current and older ones, back at least to 10.2.6 and probably to EOL-ed versions as well.
      However, given the high sensitivity of the test case to the exact structure and/or the length of queries, it won't be too surprising if it doesn't happen on some other machines or builds. I wrote everything I could notice regarding the dependencies; some observations can be false.

      Some less unfortunate variations of the test case can also crash instead of hanging, in the same Query_cache::move_by_type with minor variations of the test case. The crashing effect is not reliable though – it may crash on one version/build but hang on another.

      Example of a crash:

      10.2 c32edd75 debug

      #3  <signal handler called>
      #4  __memmove_avx_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:384
      #5  0x0000562afc9acac0 in Query_cache::move_by_type (this=0x562afdd3cb00 <query_cache>, border=0x7fca753438f0, before=0x7fca753438f8, gap=0x7fca75343900, block=0x7fca76f48100) at /data/src/10.2/sql/sql_cache.cc:4329
      #6  0x0000562afc9ac561 in Query_cache::pack_cache (this=0x562afdd3cb00 <query_cache>) at /data/src/10.2/sql/sql_cache.cc:4253
      #7  0x0000562afc9a79be in Query_cache::pack (this=0x562afdd3cb00 <query_cache>, thd=0x7fca64000d90, join_limit=524288, iteration_limit=2) at /data/src/10.2/sql/sql_cache.cc:2490
      #8  0x0000562afcb8c71a in reload_acl_and_cache (thd=0x7fca64000d90, options=131072, tables=0x0, write_to_binlog=0x7fca75343d10) at /data/src/10.2/sql/sql_reload.cc:211
      #9  0x0000562afca06a45 in mysql_execute_command (thd=0x7fca64000d90) at /data/src/10.2/sql/sql_parse.cc:5201
      #10 0x0000562afca0e43b in mysql_parse (thd=0x7fca64000d90, rawbuf=0x7fca640126f8 "FLUSH QUERY CACHE", length=17, parser_state=0x7fca753445f0, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:7794
      #11 0x0000562afc9fc664 in dispatch_command (command=COM_QUERY, thd=0x7fca64000d90, packet=0x7fca64008b51 "FLUSH QUERY CACHE", packet_length=17, is_com_multi=false, is_next_command=false) at /data/src/10.2/sql/sql_parse.cc:1827
      #12 0x0000562afc9fb15f in do_command (thd=0x7fca64000d90) at /data/src/10.2/sql/sql_parse.cc:1381
      #13 0x0000562afcb55c94 in do_handle_one_connection (connect=0x562aff153120) at /data/src/10.2/sql/sql_connect.cc:1336
      #14 0x0000562afcb559f9 in handle_one_connection (arg=0x562aff153120) at /data/src/10.2/sql/sql_connect.cc:1241
      #15 0x0000562afd37ff3a in pfs_spawn_thread (arg=0x562aff136590) at /data/src/10.2/storage/perfschema/pfs.cc:1869
      #16 0x00007fca7b6fb609 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #17 0x00007fca7b2d5293 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
      

      Attachments

        Issue Links

          Activity

            People

              sanja Oleksandr Byelkin
              elenst Elena Stepanova
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.