Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-24317

Data race in LOGGER::init_error_log at sql/log.cc:1443 and in LOGGER::error_log_print at sql/log.cc:1181

Details

    Description

      Simply starting the server (both optimized and debug) generates:

      • The sql/log.cc:1443 race, once at startup and once at shutdown
      • The sql/log.cc:1181 race, at startup

      10.5.9 1435f35bdabd078f0c4e744ab4bdfd8d4acca3ea (Optimized)

      WARNING: ThreadSanitizer: data race (pid=1100047)
        Write of size 8 at 0x55786da8fee0 by main thread (mutexes: write M1285):
          #0 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1443 (mariadbd+0xf13ac7)
          #1 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1432 (mariadbd+0xf13ac7)
          #2 LOGGER::set_handlers(unsigned long long, unsigned long long, unsigned long long) /test/10.5_opt_san/sql/log.cc:1611 (mariadbd+0xf13ac7)
          #3 init_server_components /test/10.5_opt_san/sql/mysqld.cc:5066 (mariadbd+0x7206d7)
          #4 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
          #5 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
       
        Previous read of size 8 at 0x55786da8fee0 by thread T5:
          #0 LOGGER::error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:1181 (mariadbd+0xf106b2)
          #1 error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:6818 (mariadbd+0xf106b2)
          #2 sql_print_information_v(char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:9167 (mariadbd+0xf106b2)
          #3 sql_print_information(char const*, ...) /test/10.5_opt_san/sql/log.cc:9156 (mariadbd+0xf106b2)
          #4 ib::info::~info() /test/10.5_opt_san/storage/innobase/ut/ut0ut.cc:543 (mariadbd+0x67f3c7)
          #5 buf_load_status /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:147 (mariadbd+0x68d3d3)
          #6 buf_load /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:501 (mariadbd+0x15c20b6)
          #7 buf_dump_load_func /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:758 (mariadbd+0x15c31e1)
          #8 tpool::task::execute() /test/10.5_opt_san/tpool/task.cc:52 (mariadbd+0x16f48df)
          #9 tpool::thread_pool_generic::worker_main(tpool::worker_data*) /test/10.5_opt_san/tpool/tpool_generic.cc:546 (mariadbd+0x16f1b54)
          #10 void std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(std::__invoke_memfun_deref, void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:73 (mariadbd+0x16f2a47)
          #11 std::__invoke_result<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>::type std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:95 (mariadbd+0x16f2a47)
          #12 void std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul>(std::_Index_tuple<0ul, 1ul, 2ul>) /usr/include/c++/9/thread:244 (mariadbd+0x16f2a47)
          #13 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator()() /usr/include/c++/9/thread:251 (mariadbd+0x16f2a47)
          #14 std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run() /usr/include/c++/9/thread:195 (mariadbd+0x16f2a47)
          #15 <null> <null> (libstdc++.so.6+0xd6d83)
       
        Location is global 'logger' of size 192 at 0x55786da8fe80 (mariadbd+0x0000022beee0)
        Mutex M1285 (0x55786da8fe80) created at:
          #0 pthread_rwlock_init <null> (libtsan.so.0+0x3699f)
          #1 inline_mysql_rwlock_init /test/10.5_opt_san/include/mysql/psi/mysql_thread.h:870 (mariadbd+0xf12918)
          #2 LOGGER::init_base() /test/10.5_opt_san/sql/log.cc:1236 (mariadbd+0xf12918)
          #3 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5396 (mariadbd+0x72560e)
          #4 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
       
        Thread T5 (tid=1100058, running) created by main thread at:
          #0 pthread_create <null> (libtsan.so.0+0x5ea99)
          #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0xd7048)
          #2 tpool::thread_pool_generic::maybe_wake_or_create_thread() /test/10.5_opt_san/tpool/tpool_generic.cc:802 (mariadbd+0x16f2787)
          #3 tpool::thread_pool_generic::submit_task(tpool::task*) /test/10.5_opt_san/tpool/tpool_generic.cc:821 (mariadbd+0x16f2787)
          #4 buf_flush_lists(unsigned long, unsigned long) /test/10.5_opt_san/storage/innobase/buf/buf0flu.cc:1533 (mariadbd+0x15ca505)
          #5 buf_flush_sync() /test/10.5_opt_san/storage/innobase/buf/buf0flu.cc:2289 (mariadbd+0x15cbfe8)
          #6 recv_sys_t::apply(bool) /test/10.5_opt_san/storage/innobase/log/log0recv.cc:2731 (mariadbd+0x13f4379)
          #7 srv_start(bool) /test/10.5_opt_san/storage/innobase/srv/srv0start.cc:1490 (mariadbd+0x673e74)
          #8 innodb_init /test/10.5_opt_san/storage/innobase/handler/ha_innodb.cc:3922 (mariadbd+0x1360400)
          #9 ha_initialize_handlerton(st_plugin_int*) /test/10.5_opt_san/sql/handler.cc:645 (mariadbd+0xcf9547)
          #10 plugin_initialize /test/10.5_opt_san/sql/sql_plugin.cc:1459 (mariadbd+0x8d3f74)
          #11 plugin_init(int*, char**, int) /test/10.5_opt_san/sql/sql_plugin.cc:1751 (mariadbd+0x8d5cc3)
          #12 init_server_components /test/10.5_opt_san/sql/mysqld.cc:4913 (mariadbd+0x72043d)
          #13 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
          #14 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
       
      SUMMARY: ThreadSanitizer: data race /test/10.5_opt_san/sql/log.cc:1443 in LOGGER::init_error_log(unsigned long long)
      

      void LOGGER::init_error_log(ulonglong error_log_printer)
      {
        if (error_log_printer & LOG_NONE)
        {
          error_log_handler_list[0]= 0;
          return;
        }
       
        switch (error_log_printer) {
        case LOG_FILE:
          error_log_handler_list[0]= file_log_handler;
          error_log_handler_list[1]= 0;                   //<---------------------- Here
          break;
          /* these two are disabled for now */
        case LOG_TABLE:
          DBUG_ASSERT(0);
          break;
        case LOG_TABLE|LOG_FILE:
          DBUG_ASSERT(0);
          break;
        }
      }
      

      Secondary race at sql/log.cc:1181 which may or may not be related.

      WARNING: ThreadSanitizer: data race (pid=1301692)
        Read of size 8 at 0x56342cab5ed8 by thread T8:
          #0 LOGGER::error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:1181 (mariadbd+0xf10663)
          #1 error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:6818 (mariadbd+0xf10663)
          #2 sql_print_information_v(char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:9167 (mariadbd+0xf10663)
          #3 sql_print_information(char const*, ...) /test/10.5_opt_san/sql/log.cc:9156 (mariadbd+0xf10663)
          #4 ib::info::~info() /test/10.5_opt_san/storage/innobase/ut/ut0ut.cc:543 (mariadbd+0x67f3c7)
          #5 buf_load_status /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:147 (mariadbd+0x68d3d3)
          #6 buf_load /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:717 (mariadbd+0x15c2d98)
          #7 buf_dump_load_func /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:758 (mariadbd+0x15c31e1)
          #8 tpool::task::execute() /test/10.5_opt_san/tpool/task.cc:52 (mariadbd+0x16f48df)
          #9 tpool::thread_pool_generic::worker_main(tpool::worker_data*) /test/10.5_opt_san/tpool/tpool_generic.cc:546 (mariadbd+0x16f1b54)
          #10 void std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(std::__invoke_memfun_deref, void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:73 (mariadbd+0x16f2a47)
          #11 std::__invoke_result<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>::type std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:95 (mariadbd+0x16f2a47)
          #12 void std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul>(std::_Index_tuple<0ul, 1ul, 2ul>) /usr/include/c++/9/thread:244 (mariadbd+0x16f2a47)
          #13 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator()() /usr/include/c++/9/thread:251 (mariadbd+0x16f2a47)
          #14 std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run() /usr/include/c++/9/thread:195 (mariadbd+0x16f2a47)
          #15 <null> <null> (libstdc++.so.6+0xd6d83)
       
        Previous write of size 8 at 0x56342cab5ed8 by main thread (mutexes: write M1285):
          #0 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1442 (mariadbd+0xf13ab8)
          #1 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1432 (mariadbd+0xf13ab8)
          #2 LOGGER::set_handlers(unsigned long long, unsigned long long, unsigned long long) /test/10.5_opt_san/sql/log.cc:1611 (mariadbd+0xf13ab8)
          #3 init_server_components /test/10.5_opt_san/sql/mysqld.cc:5066 (mariadbd+0x7206d7)
          #4 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
          #5 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
       
        Location is global 'logger' of size 192 at 0x56342cab5e80 (mariadbd+0x0000022beed8)
       
        Mutex M1285 (0x56342cab5e80) created at:
          #0 pthread_rwlock_init <null> (libtsan.so.0+0x3699f)
          #1 inline_mysql_rwlock_init /test/10.5_opt_san/include/mysql/psi/mysql_thread.h:870 (mariadbd+0xf12918)
          #2 LOGGER::init_base() /test/10.5_opt_san/sql/log.cc:1236 (mariadbd+0xf12918)
          #3 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5396 (mariadbd+0x72560e)
          #4 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
       
        Thread T8 (tid=1302015, running) created by main thread at:
          #0 pthread_create <null> (libtsan.so.0+0x5ea99)
          #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0xd7048)
          #2 tpool::thread_pool_generic::maybe_wake_or_create_thread() /test/10.5_opt_san/tpool/tpool_generic.cc:802 (mariadbd+0x16f2787)
          #3 tpool::thread_pool_generic::submit_task(tpool::task*) /test/10.5_opt_san/tpool/tpool_generic.cc:821 (mariadbd+0x16f2787)
          #4 buf_do_load_dump /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:812 (mariadbd+0x15c115c)
          #5 buf_load_at_startup() /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:806 (mariadbd+0x15c115c)
          #6 srv_start(bool) /test/10.5_opt_san/storage/innobase/srv/srv0start.cc:1927 (mariadbd+0x6752ad)
          #7 innodb_init /test/10.5_opt_san/storage/innobase/handler/ha_innodb.cc:3922 (mariadbd+0x1360400)
          #8 ha_initialize_handlerton(st_plugin_int*) /test/10.5_opt_san/sql/handler.cc:645 (mariadbd+0xcf9547)
          #9 plugin_initialize /test/10.5_opt_san/sql/sql_plugin.cc:1459 (mariadbd+0x8d3f74)
          #10 plugin_init(int*, char**, int) /test/10.5_opt_san/sql/sql_plugin.cc:1751 (mariadbd+0x8d5cc3)
          #11 init_server_components /test/10.5_opt_san/sql/mysqld.cc:4913 (mariadbd+0x72043d)
          #12 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
          #13 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
       
      SUMMARY: ThreadSanitizer: data race /test/10.5_opt_san/sql/log.cc:1181 in LOGGER::error_log_print(loglevel, char const*, __va_list_tag*)
      

      bool LOGGER::error_log_print(enum loglevel level, const char *format,
                                   va_list args)
      {
        bool error= FALSE;
        Log_event_handler **current_handler;
        THD *thd= current_thd;
       
        if (likely(thd))
          thd->error_printed_to_log= 1;
       
        /* currently we don't need locking here as there is no error_log table */
        for (current_handler= error_log_handler_list ; *current_handler ;)                //<--------------- Here
          error= (*current_handler++)->log_error(level, format, args) || error;
       
        return error;
      }
      

      Setup:

      Compiled with GCC >=7.5.0 (I use GCC 9.3.0) and:
          -DWITH_TSAN=ON -DWSREP_LIB_WITH_TSAN=ON -DMUTEXTYPE=sys
      Set before execution:
          export TSAN_OPTIONS=suppress_equal_stacks=1:suppress_equal_addresses=1:history_size=7:verbosity=1:exitcode=0
      

      Bug confirmed present in:
      MariaDB: 10.3.28 (dbg), 10.3.28 (opt), 10.4.18 (dbg), 10.4.18 (opt), 10.5.9 (dbg), 10.5.9 (opt), 10.6.0 (dbg), 10.6.0 (opt)

      Bug confirmed not present in:
      MariaDB: 10.1.49 (dbg), 10.1.49 (opt), 10.2.37 (dbg), 10.2.37 (opt)

      Attachments

        Activity

          kevg Eugene Kosov (Inactive) added a comment - - edited

          Probably, because of the bug server can loose one error log message at the very beginning.

          My patch would look like this:

          diff --git a/sql/log.cc b/sql/log.cc
          index 20fa37b4299..b47c4ce32d5 100644
          --- a/sql/log.cc
          +++ b/sql/log.cc
          @@ -1174,21 +1174,15 @@ void Log_to_file_event_handler::flush()
           bool LOGGER::error_log_print(enum loglevel level, const char *format,
                                        va_list args)
           {
          -  bool error= FALSE;
          -  Log_event_handler **current_handler;
             THD *thd= current_thd;
           
             if (likely(thd))
               thd->error_printed_to_log= 1;
           
          -  /* currently we don't need locking here as there is no error_log table */
          -  for (current_handler= error_log_handler_list ; *current_handler ;)
          -    error= (*current_handler++)->log_error(level, format, args) || error;
          -
          -  return error;
          +  return (*error_log_handler.load(std::memory_order_relaxed))
          +      .log_error(level, format, args);
           }
           
          -
           void LOGGER::cleanup_base()
           {
             DBUG_ASSERT(inited == 1);
          @@ -1442,14 +1436,13 @@ void LOGGER::init_error_log(ulonglong error_log_printer)
           {
             if (error_log_printer & LOG_NONE)
             {
          -    error_log_handler_list[0]= 0;
          +    error_log_handler= nullptr;
               return;
             }
           
             switch (error_log_printer) {
             case LOG_FILE:
          -    error_log_handler_list[0]= file_log_handler;
          -    error_log_handler_list[1]= 0;
          +    error_log_handler= file_log_handler;
               break;
               /* these two are disabled for now */
             case LOG_TABLE:
          diff --git a/sql/log.h b/sql/log.h
          index f2d620ef7fc..4f43e52ef22 100644
          --- a/sql/log.h
          +++ b/sql/log.h
          @@ -21,6 +21,8 @@
           #include "wsrep_mysqld.h"
           #include "rpl_constants.h"
           
          +#include <atomic>
          +
           class Relay_log_info;
           
           class Format_description_log_event;
          @@ -1047,8 +1049,8 @@ class LOGGER
             Log_to_csv_event_handler *table_log_handler;
             Log_to_file_event_handler *file_log_handler;
           
          +  std::atomic<Log_event_handler *> error_log_handler{nullptr};
             /* NULL-terminated arrays of log handlers */
          -  Log_event_handler *error_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];
             Log_event_handler *slow_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];
             Log_event_handler *general_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];
          

          kevg Eugene Kosov (Inactive) added a comment - - edited Probably, because of the bug server can loose one error log message at the very beginning. My patch would look like this: diff --git a/sql/log.cc b/sql/log.cc index 20fa37b4299..b47c4ce32d5 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -1174,21 +1174,15 @@ void Log_to_file_event_handler::flush() bool LOGGER::error_log_print(enum loglevel level, const char *format, va_list args) { - bool error= FALSE; - Log_event_handler **current_handler; THD *thd= current_thd; if (likely(thd)) thd->error_printed_to_log= 1; - /* currently we don't need locking here as there is no error_log table */ - for (current_handler= error_log_handler_list ; *current_handler ;) - error= (*current_handler++)->log_error(level, format, args) || error; - - return error; + return (*error_log_handler.load(std::memory_order_relaxed)) + .log_error(level, format, args); } - void LOGGER::cleanup_base() { DBUG_ASSERT(inited == 1); @@ -1442,14 +1436,13 @@ void LOGGER::init_error_log(ulonglong error_log_printer) { if (error_log_printer & LOG_NONE) { - error_log_handler_list[0]= 0; + error_log_handler= nullptr; return; } switch (error_log_printer) { case LOG_FILE: - error_log_handler_list[0]= file_log_handler; - error_log_handler_list[1]= 0; + error_log_handler= file_log_handler; break; /* these two are disabled for now */ case LOG_TABLE: diff --git a/sql/log.h b/sql/log.h index f2d620ef7fc..4f43e52ef22 100644 --- a/sql/log.h +++ b/sql/log.h @@ -21,6 +21,8 @@ #include "wsrep_mysqld.h" #include "rpl_constants.h" +#include <atomic> + class Relay_log_info; class Format_description_log_event; @@ -1047,8 +1049,8 @@ class LOGGER Log_to_csv_event_handler *table_log_handler; Log_to_file_event_handler *file_log_handler; + std::atomic<Log_event_handler *> error_log_handler{nullptr}; /* NULL-terminated arrays of log handlers */ - Log_event_handler *error_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; Log_event_handler *slow_log_handler_list[MAX_LOG_HANDLERS_NUM + 1]; Log_event_handler *general_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];

          There's no reason to use atomic load for something that never changes run-time.

          Better initialize FILE handlers before storage engines and TABLE handlers after. Or initialize all handlers before storage engines (TABLE handler doesn't actually need the engine until it tries to write to the log). Or initialize error log before engines and slow/general logs after.

          serg Sergei Golubchik added a comment - There's no reason to use atomic load for something that never changes run-time. Better initialize FILE handlers before storage engines and TABLE handlers after. Or initialize all handlers before storage engines (TABLE handler doesn't actually need the engine until it tries to write to the log). Or initialize error log before engines and slow/general logs after.

          People

            serg Sergei Golubchik
            Roel Roel Van de Paar
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.