Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-28739

Trying to lock uninitialized mutex or hang upon shutdown after using Spider with query_cache

Details

    Description

      --source plugin/spider/spider/include/init_spider.inc
       
      set global query_cache_type= on;
      set spider_same_server_link = on;
       
      eval create server s foreign data wrapper mysql options (host "127.0.0.1", database "test", user "root", port $MASTER_MYPORT);
      CREATE TABLE t (a INT);
      CREATE TABLE t_SPIDER (a INT) ENGINE=SPIDER COMMENT="wrapper 'mysql', srv 's', table 't'";
      SELECT * FROM t_SPIDER;
       
      --source include/restart_mysqld.inc
       
      DROP TABLE t_SPIDER;
      DROP TABLE t;
       
      --source plugin/spider/spider/include/deinit_spider.inc
      

      10.3 91d5fffa

      2022-06-03  1:50:08 0 [Note] Event Scheduler: Purging the queue. 0 events
      safe_mutex: Trying to lock uninitialized mutex at /data/src/10.3/sql/sql_cache.cc, line 723
      220603  1:50:08 [ERROR] mysqld got signal 6 ;
       
      #3  <signal handler called>
      #4  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
      #5  0x00007faa86c1e537 in __GI_abort () at abort.c:79
      #6  0x000055f9a2c5e92c in safe_mutex_lock (mp=0x55f9a36ee2c0 <query_cache+160>, my_flags=0, file=0x55f9a2ce7ea8 "/data/src/10.3/sql/sql_cache.cc", line=723) at /data/src/10.3/mysys/thr_mutex.c:248
      #7  0x000055f9a2040152 in inline_mysql_mutex_lock (that=0x55f9a36ee2c0 <query_cache+160>, src_file=0x55f9a2ce7ea8 "/data/src/10.3/sql/sql_cache.cc", src_line=723) at /data/src/10.3/include/mysql/psi/mysql_thread.h:717
      #8  0x000055f9a20411f8 in Query_cache::lock (this=0x55f9a36ee220 <query_cache>, thd=0x7faa0c001fe0) at /data/src/10.3/sql/sql_cache.cc:723
      #9  0x000055f9a20480d9 in Query_cache::invalidate_table (this=0x55f9a36ee220 <query_cache>, thd=0x7faa0c001fe0, key=0x7faa829062b0 "mysql", key_length=23) at /data/src/10.3/sql/sql_cache.cc:3312
      #10 0x000055f9a2045cf5 in Query_cache::invalidate_by_MyISAM_filename (this=0x55f9a36ee220 <query_cache>, filename=0x7faa0c00c4b8 "./mysql/spider_table_sts") at /data/src/10.3/sql/sql_cache.cc:2432
      #11 0x000055f9a2042629 in query_cache_invalidate_by_MyISAM_filename (filename=0x7faa0c00c4b8 "./mysql/spider_table_sts") at /data/src/10.3/sql/sql_cache.cc:1249
      #12 0x000055f9a2b6da25 in mi_update_status (param=0x7faa0c0090b0) at /data/src/10.3/storage/myisam/mi_locking.c:338
      #13 0x000055f9a2b6db74 in mi_update_status_with_lock (info=0x7faa0c0090b0) at /data/src/10.3/storage/myisam/mi_locking.c:374
      #14 0x000055f9a2b6cc96 in mi_lock_database (info=0x7faa0c0090b0, lock_type=2) at /data/src/10.3/storage/myisam/mi_locking.c:70
      #15 0x000055f9a2b46dac in ha_myisam::external_lock (this=0x7faa0c030f68, thd=0x7faa0c001fe0, lock_type=2) at /data/src/10.3/storage/myisam/ha_myisam.cc:2106
      #16 0x000055f9a23f5859 in handler::ha_external_lock (this=0x7faa0c030f68, thd=0x7faa0c001fe0, lock_type=2) at /data/src/10.3/sql/handler.cc:6418
      #17 0x000055f9a2520301 in unlock_external (thd=0x7faa0c001fe0, table=0x55f9a5af56b8, count=1) at /data/src/10.3/sql/lock.cc:708
      #18 0x000055f9a251f708 in mysql_unlock_tables (thd=0x7faa0c001fe0, sql_lock=0x55f9a5af5688, free_lock=false) at /data/src/10.3/sql/lock.cc:429
      #19 0x000055f9a251f65f in mysql_unlock_tables (thd=0x7faa0c001fe0, sql_lock=0x55f9a5af5688) at /data/src/10.3/sql/lock.cc:413
      #20 0x000055f9a20270b3 in close_thread_tables (thd=0x7faa0c001fe0) at /data/src/10.3/sql/sql_base.cc:863
      #21 0x00007faa80a71ebe in spider_sys_close_table (thd=0x7faa0c001fe0, open_tables_backup=0x7faa82906910) at /data/src/10.3/storage/spider/spd_sys_table.cc:407
      #22 0x00007faa80a71c51 in spider_close_sys_table (thd=0x7faa0c001fe0, table=0x7faa0c02fea0, open_tables_backup=0x7faa82906910, need_lock=false) at /data/src/10.3/storage/spider/spd_sys_table.cc:352
      #23 0x00007faa80a7ce98 in spider_sys_insert_or_update_table_sts (thd=0x7faa0c001fe0, name=0x7faa70191728 "./test/t_SPIDER", name_length=15, data_file_length=0x7faa700c4cf0, max_data_file_length=0x7faa700c4cf8, index_file_length=0x7faa700c4d00, records=0x7faa700c4d08, mean_rec_length=0x7faa700c4d10, check_time=0x7faa700c4d18, create_time=0x7faa700c4d20, update_time=0x7faa700c4d28, need_lock=false) at /data/src/10.3/storage/spider/spd_sys_table.cc:2950
      #24 0x00007faa80af5142 in spider_free_share (share=0x7faa700c3b40) at /data/src/10.3/storage/spider/spd_table.cc:5779
      #25 0x00007faa80b2b055 in ha_spider::close (this=0x7faa700c1e48) at /data/src/10.3/storage/spider/ha_spider.cc:815
      #26 0x000055f9a23eacea in handler::ha_close (this=0x7faa700c1e48) at /data/src/10.3/sql/handler.cc:2844
      #27 0x000055f9a21e76ab in closefrm (table=0x7faa700c11e0) at /data/src/10.3/sql/table.cc:3790
      #28 0x000055f9a230a044 in intern_close_table (table=0x7faa700c11e0) at /data/src/10.3/sql/table_cache.cc:222
      #29 0x000055f9a230a4b2 in tc_purge (mark_flushed=true) at /data/src/10.3/sql/table_cache.cc:335
      #30 0x000055f9a2025f26 in close_cached_tables (thd=0x0, tables=0x0, wait_for_refresh=false, timeout=31536000) at /data/src/10.3/sql/sql_base.cc:377
      #31 0x000055f9a230bbe0 in tdc_start_shutdown () at /data/src/10.3/sql/table_cache.cc:660
      #32 0x000055f9a1f8d188 in clean_up (print_message=true) at /data/src/10.3/sql/mysqld.cc:2241
      #33 0x000055f9a1f8cdb0 in unireg_end () at /data/src/10.3/sql/mysqld.cc:2116
      #34 0x000055f9a1f8ccae in kill_server (sig_ptr=0x0) at /data/src/10.3/sql/mysqld.cc:2043
      #35 0x000055f9a1f8ccec in kill_server_thread (arg=0x7faa80ceade0) at /data/src/10.3/sql/mysqld.cc:2066
      #36 0x000055f9a2be854a in pfs_spawn_thread (arg=0x55f9a5c19850) at /data/src/10.3/storage/perfschema/pfs.cc:1869
      #37 0x00007faa86dc6ea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
      #38 0x00007faa86cf6def in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
      

      Reproducible on 10.3, 10.5+. On some reason couldn't reproduce on 10.4.
      Non-debug builds sometimes hang.

      The problem is particularly non-artificial for 10.3, because Debian installation of 10.3 includes a config file with query cache enabled.

      Attachments

        Issue Links

          Activity

            ycp Yuchen Pei added a comment - - edited

            The cause is that spider updates its sts system tables during the
            close() method, which is called a bit too late, after the call to
            query_cache_destroy() which destroys the structure_guard_mutex.

            10.10

            // Inside clean_up():
              query_cache_destroy();
            //  [... 4 lines elided]
              tdc_start_shutdown();

            However, to begin with, it is not clear whether the sts and crd
            tables need to be updated at all when freeing a spider_share.
            Removing the call to spider_sys_insert_or_update_table_sts() and
            spider_sys_insert_or_update_table_crd() from spider_free_share()
            fixes the test, and does not introduce any regressions in existing
            tests either, see the following commit

            6931635fa0f upstream/bb-10.10-mdev-28739 MDEV-28739 [demo] Commenting out spider sts and crd table updates fixes the problem

            In fact, spider_free_share() is the only place where the spider sts
            and crd tables are updated. sts tables, for example, are used to
            populate the spider handler's stat field. If the sts table does not
            have info about a remote table, spider will make a "show table
            status" to obtain this piece of information.

            Apart from failure paths of spider_open_share() etc.,
            spider_free_share() is only called in ha_spider::close(). So the sts
            and crd tables are only updated at ha_spider::close(). The freshness
            of the data is thus questionable. It remains to be seen whether
            spider checks for the freshness before querying these tables. In any
            case, this ticket is now about the usefulness of the spider sts and
            crd system tables, so I will group it with other relevant tickets
            about these tables under the label spider-sts-crd
            (<https://jira.mariadb.org/issues/?jql=labels%20%3D%20spider-sts-crd>).

            ycp Yuchen Pei added a comment - - edited The cause is that spider updates its sts system tables during the close() method, which is called a bit too late, after the call to query_cache_destroy() which destroys the structure_guard_mutex . 10.10 // Inside clean_up(): query_cache_destroy(); // [... 4 lines elided] tdc_start_shutdown(); However, to begin with, it is not clear whether the sts and crd tables need to be updated at all when freeing a spider_share. Removing the call to spider_sys_insert_or_update_table_sts() and spider_sys_insert_or_update_table_crd() from spider_free_share() fixes the test, and does not introduce any regressions in existing tests either, see the following commit 6931635fa0f upstream/bb-10.10-mdev-28739 MDEV-28739 [demo] Commenting out spider sts and crd table updates fixes the problem In fact, spider_free_share() is the only place where the spider sts and crd tables are updated. sts tables, for example, are used to populate the spider handler's stat field. If the sts table does not have info about a remote table, spider will make a "show table status" to obtain this piece of information. Apart from failure paths of spider_open_share() etc., spider_free_share() is only called in ha_spider::close(). So the sts and crd tables are only updated at ha_spider::close(). The freshness of the data is thus questionable. It remains to be seen whether spider checks for the freshness before querying these tables. In any case, this ticket is now about the usefulness of the spider sts and crd system tables, so I will group it with other relevant tickets about these tables under the label spider-sts-crd (< https://jira.mariadb.org/issues/?jql=labels%20%3D%20spider-sts-crd >).
            ycp Yuchen Pei added a comment -

            Draft commit, will send to review tomorrow if nothing new occurs:

            cb8df270bb3 MDEV-28739 MDEV-29421 Remove updating spider sts/crd tables from spider_free_share()

            We are moving the spider system tables altogether, since the only
            reference to the update function is removed. See also the commit message.

            ycp Yuchen Pei added a comment - Draft commit, will send to review tomorrow if nothing new occurs: cb8df270bb3 MDEV-28739 MDEV-29421 Remove updating spider sts/crd tables from spider_free_share() We are moving the spider system tables altogether, since the only reference to the update function is removed. See also the commit message.
            ycp Yuchen Pei added a comment -

            Hi holyfoot, ptal thanks (based on 11.0)

            c29972b314690bc94b890a2aaa6f2898c7880432
            MDEV-28739 MDEV-29421 Remove spider persistent table stats
             
            We remove the call to update spider persistent table stats (sts/crd)
            in spider_free_share(). This prevents spider from opening and closing
            further tables during close(), which fixes the following issues:
             
            MDEV-28739: ha_spider::close() is called during tdc_start_shutdown(),
            which is called after query_cache_destroy(). Closing the sts/crd Aria
            tables will trigger a call to Query_cache::invalidate_table(), which
            will attempt to use the query cache mutex structure_guard_mutex
            destroyed previously.
             
            MDEV-29421: during ha_spider::close(), spider_free_share() could
            trigger another spider_free_share() through updating sts/crd table,
            because open_table() calls tc_add_table(), which could trigger another
            ha_spider::close()...
             
            Since spider sts/crd system tables are only updated here, there's no
            use for these tables any more, and we remove all uses of these tables
            too.
             
            The removal should not cause any performance issue, as in memory
            spider table stats are only updated based on a time
            interval (spider_sts_interval and spider_crd_interval), which defaults
            to 10 seconds. It should not affect accuracy either, due to the
            infrequency of server restart. And inaccurate stats are not a problem
            for optimizer anyway.
             
            To be on the safe side, we defer the removal of the spider sts/crd
            tables themselves to future.
            

            Here's a 10.4 version:

            87bf3f002e6 MDEV-28739 MDEV-29421 Remove spider persistent table stats

            ycp Yuchen Pei added a comment - Hi holyfoot , ptal thanks (based on 11.0) c29972b314690bc94b890a2aaa6f2898c7880432 MDEV-28739 MDEV-29421 Remove spider persistent table stats   We remove the call to update spider persistent table stats (sts/crd) in spider_free_share(). This prevents spider from opening and closing further tables during close(), which fixes the following issues:   MDEV-28739: ha_spider::close() is called during tdc_start_shutdown(), which is called after query_cache_destroy(). Closing the sts/crd Aria tables will trigger a call to Query_cache::invalidate_table(), which will attempt to use the query cache mutex structure_guard_mutex destroyed previously.   MDEV-29421: during ha_spider::close(), spider_free_share() could trigger another spider_free_share() through updating sts/crd table, because open_table() calls tc_add_table(), which could trigger another ha_spider::close()...   Since spider sts/crd system tables are only updated here, there's no use for these tables any more, and we remove all uses of these tables too.   The removal should not cause any performance issue, as in memory spider table stats are only updated based on a time interval (spider_sts_interval and spider_crd_interval), which defaults to 10 seconds. It should not affect accuracy either, due to the infrequency of server restart. And inaccurate stats are not a problem for optimizer anyway.   To be on the safe side, we defer the removal of the spider sts/crd tables themselves to future. Here's a 10.4 version: 87bf3f002e6 MDEV-28739 MDEV-29421 Remove spider persistent table stats

            ok to push.

            holyfoot Alexey Botchkov added a comment - ok to push.
            ycp Yuchen Pei added a comment - - edited

            pushed bdfd93d30c1bba2a8932477f16f6280ee665d818 to 10.4

            Just noticed the comment in the description that it was not reproducible on 10.4 at reporting. Just to note that I could reproduce in 10.4 which is why I had a 10.4 fix.

            Conflict resolution (old commit, make sure to add add_suppression in the test mdev_27575):

            • 10.4->10.5: 1a97c706f072a55e27f9580e7bf6dc8727f4316d
            • 10.5->10.6: ad7620a25cf7f8f517c9f4373d9c1d00853a6141
            • 10.6->10.11: 7ba4f323c6990d1825a2754bf8577f16f5d4cf40
            • 10.11->11.0: e3ce81b1e90c73b1e21c8751c78572893e55b456
            • ES23.08: 57ab808c0cf34baa34a60df08368b1f4ae9e640e
            ycp Yuchen Pei added a comment - - edited pushed bdfd93d30c1bba2a8932477f16f6280ee665d818 to 10.4 Just noticed the comment in the description that it was not reproducible on 10.4 at reporting. Just to note that I could reproduce in 10.4 which is why I had a 10.4 fix. Conflict resolution (old commit, make sure to add add_suppression in the test mdev_27575): 10.4->10.5: 1a97c706f072a55e27f9580e7bf6dc8727f4316d 10.5->10.6: ad7620a25cf7f8f517c9f4373d9c1d00853a6141 10.6->10.11: 7ba4f323c6990d1825a2754bf8577f16f5d4cf40 10.11->11.0: e3ce81b1e90c73b1e21c8751c78572893e55b456 ES23.08: 57ab808c0cf34baa34a60df08368b1f4ae9e640e

            People

              ycp Yuchen Pei
              elenst Elena Stepanova
              Votes:
              0 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.