[MDEV-24317] Data race in LOGGER::init_error_log at sql/log.cc:1443 and in LOGGER::error_log_print at sql/log.cc:1181 Created: 2020-12-01  Updated: 2022-04-21  Resolved: 2022-04-12

Status: Closed
Project: MariaDB Server
Component/s: Tests
Affects Version/s: 10.3, 10.4, 10.5, 10.6
Fix Version/s: 10.3.35, 10.4.25, 10.5.16, 10.6.8, 10.7.4

Type: Bug Priority: Minor
Reporter: Roel Van de Paar Assignee: Sergei Golubchik
Resolution: Fixed Votes: 0
Labels: affects-tests, not-10.1, not-10.2, tsan


 Description   

Simply starting the server (both optimized and debug) generates:

  • The sql/log.cc:1443 race, once at startup and once at shutdown
  • The sql/log.cc:1181 race, at startup

10.5.9 1435f35bdabd078f0c4e744ab4bdfd8d4acca3ea (Optimized)

WARNING: ThreadSanitizer: data race (pid=1100047)
  Write of size 8 at 0x55786da8fee0 by main thread (mutexes: write M1285):
    #0 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1443 (mariadbd+0xf13ac7)
    #1 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1432 (mariadbd+0xf13ac7)
    #2 LOGGER::set_handlers(unsigned long long, unsigned long long, unsigned long long) /test/10.5_opt_san/sql/log.cc:1611 (mariadbd+0xf13ac7)
    #3 init_server_components /test/10.5_opt_san/sql/mysqld.cc:5066 (mariadbd+0x7206d7)
    #4 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
    #5 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
 
  Previous read of size 8 at 0x55786da8fee0 by thread T5:
    #0 LOGGER::error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:1181 (mariadbd+0xf106b2)
    #1 error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:6818 (mariadbd+0xf106b2)
    #2 sql_print_information_v(char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:9167 (mariadbd+0xf106b2)
    #3 sql_print_information(char const*, ...) /test/10.5_opt_san/sql/log.cc:9156 (mariadbd+0xf106b2)
    #4 ib::info::~info() /test/10.5_opt_san/storage/innobase/ut/ut0ut.cc:543 (mariadbd+0x67f3c7)
    #5 buf_load_status /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:147 (mariadbd+0x68d3d3)
    #6 buf_load /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:501 (mariadbd+0x15c20b6)
    #7 buf_dump_load_func /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:758 (mariadbd+0x15c31e1)
    #8 tpool::task::execute() /test/10.5_opt_san/tpool/task.cc:52 (mariadbd+0x16f48df)
    #9 tpool::thread_pool_generic::worker_main(tpool::worker_data*) /test/10.5_opt_san/tpool/tpool_generic.cc:546 (mariadbd+0x16f1b54)
    #10 void std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(std::__invoke_memfun_deref, void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:73 (mariadbd+0x16f2a47)
    #11 std::__invoke_result<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>::type std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:95 (mariadbd+0x16f2a47)
    #12 void std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul>(std::_Index_tuple<0ul, 1ul, 2ul>) /usr/include/c++/9/thread:244 (mariadbd+0x16f2a47)
    #13 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator()() /usr/include/c++/9/thread:251 (mariadbd+0x16f2a47)
    #14 std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run() /usr/include/c++/9/thread:195 (mariadbd+0x16f2a47)
    #15 <null> <null> (libstdc++.so.6+0xd6d83)
 
  Location is global 'logger' of size 192 at 0x55786da8fe80 (mariadbd+0x0000022beee0)
  Mutex M1285 (0x55786da8fe80) created at:
    #0 pthread_rwlock_init <null> (libtsan.so.0+0x3699f)
    #1 inline_mysql_rwlock_init /test/10.5_opt_san/include/mysql/psi/mysql_thread.h:870 (mariadbd+0xf12918)
    #2 LOGGER::init_base() /test/10.5_opt_san/sql/log.cc:1236 (mariadbd+0xf12918)
    #3 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5396 (mariadbd+0x72560e)
    #4 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
 
  Thread T5 (tid=1100058, running) created by main thread at:
    #0 pthread_create <null> (libtsan.so.0+0x5ea99)
    #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0xd7048)
    #2 tpool::thread_pool_generic::maybe_wake_or_create_thread() /test/10.5_opt_san/tpool/tpool_generic.cc:802 (mariadbd+0x16f2787)
    #3 tpool::thread_pool_generic::submit_task(tpool::task*) /test/10.5_opt_san/tpool/tpool_generic.cc:821 (mariadbd+0x16f2787)
    #4 buf_flush_lists(unsigned long, unsigned long) /test/10.5_opt_san/storage/innobase/buf/buf0flu.cc:1533 (mariadbd+0x15ca505)
    #5 buf_flush_sync() /test/10.5_opt_san/storage/innobase/buf/buf0flu.cc:2289 (mariadbd+0x15cbfe8)
    #6 recv_sys_t::apply(bool) /test/10.5_opt_san/storage/innobase/log/log0recv.cc:2731 (mariadbd+0x13f4379)
    #7 srv_start(bool) /test/10.5_opt_san/storage/innobase/srv/srv0start.cc:1490 (mariadbd+0x673e74)
    #8 innodb_init /test/10.5_opt_san/storage/innobase/handler/ha_innodb.cc:3922 (mariadbd+0x1360400)
    #9 ha_initialize_handlerton(st_plugin_int*) /test/10.5_opt_san/sql/handler.cc:645 (mariadbd+0xcf9547)
    #10 plugin_initialize /test/10.5_opt_san/sql/sql_plugin.cc:1459 (mariadbd+0x8d3f74)
    #11 plugin_init(int*, char**, int) /test/10.5_opt_san/sql/sql_plugin.cc:1751 (mariadbd+0x8d5cc3)
    #12 init_server_components /test/10.5_opt_san/sql/mysqld.cc:4913 (mariadbd+0x72043d)
    #13 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
    #14 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
 
SUMMARY: ThreadSanitizer: data race /test/10.5_opt_san/sql/log.cc:1443 in LOGGER::init_error_log(unsigned long long)

void LOGGER::init_error_log(ulonglong error_log_printer)
{
  if (error_log_printer & LOG_NONE)
  {
    error_log_handler_list[0]= 0;
    return;
  }
 
  switch (error_log_printer) {
  case LOG_FILE:
    error_log_handler_list[0]= file_log_handler;
    error_log_handler_list[1]= 0;                   //<---------------------- Here
    break;
    /* these two are disabled for now */
  case LOG_TABLE:
    DBUG_ASSERT(0);
    break;
  case LOG_TABLE|LOG_FILE:
    DBUG_ASSERT(0);
    break;
  }
}

Secondary race at sql/log.cc:1181 which may or may not be related.

WARNING: ThreadSanitizer: data race (pid=1301692)
  Read of size 8 at 0x56342cab5ed8 by thread T8:
    #0 LOGGER::error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:1181 (mariadbd+0xf10663)
    #1 error_log_print(loglevel, char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:6818 (mariadbd+0xf10663)
    #2 sql_print_information_v(char const*, __va_list_tag*) /test/10.5_opt_san/sql/log.cc:9167 (mariadbd+0xf10663)
    #3 sql_print_information(char const*, ...) /test/10.5_opt_san/sql/log.cc:9156 (mariadbd+0xf10663)
    #4 ib::info::~info() /test/10.5_opt_san/storage/innobase/ut/ut0ut.cc:543 (mariadbd+0x67f3c7)
    #5 buf_load_status /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:147 (mariadbd+0x68d3d3)
    #6 buf_load /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:717 (mariadbd+0x15c2d98)
    #7 buf_dump_load_func /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:758 (mariadbd+0x15c31e1)
    #8 tpool::task::execute() /test/10.5_opt_san/tpool/task.cc:52 (mariadbd+0x16f48df)
    #9 tpool::thread_pool_generic::worker_main(tpool::worker_data*) /test/10.5_opt_san/tpool/tpool_generic.cc:546 (mariadbd+0x16f1b54)
    #10 void std::__invoke_impl<void, void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(std::__invoke_memfun_deref, void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:73 (mariadbd+0x16f2a47)
    #11 std::__invoke_result<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>::type std::__invoke<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*>(void (tpool::thread_pool_generic::*&&)(tpool::worker_data*), tpool::thread_pool_generic*&&, tpool::worker_data*&&) /usr/include/c++/9/bits/invoke.h:95 (mariadbd+0x16f2a47)
    #12 void std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::_M_invoke<0ul, 1ul, 2ul>(std::_Index_tuple<0ul, 1ul, 2ul>) /usr/include/c++/9/thread:244 (mariadbd+0x16f2a47)
    #13 std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> >::operator()() /usr/include/c++/9/thread:251 (mariadbd+0x16f2a47)
    #14 std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (tpool::thread_pool_generic::*)(tpool::worker_data*), tpool::thread_pool_generic*, tpool::worker_data*> > >::_M_run() /usr/include/c++/9/thread:195 (mariadbd+0x16f2a47)
    #15 <null> <null> (libstdc++.so.6+0xd6d83)
 
  Previous write of size 8 at 0x56342cab5ed8 by main thread (mutexes: write M1285):
    #0 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1442 (mariadbd+0xf13ab8)
    #1 LOGGER::init_error_log(unsigned long long) /test/10.5_opt_san/sql/log.cc:1432 (mariadbd+0xf13ab8)
    #2 LOGGER::set_handlers(unsigned long long, unsigned long long, unsigned long long) /test/10.5_opt_san/sql/log.cc:1611 (mariadbd+0xf13ab8)
    #3 init_server_components /test/10.5_opt_san/sql/mysqld.cc:5066 (mariadbd+0x7206d7)
    #4 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
    #5 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
 
  Location is global 'logger' of size 192 at 0x56342cab5e80 (mariadbd+0x0000022beed8)
 
  Mutex M1285 (0x56342cab5e80) created at:
    #0 pthread_rwlock_init <null> (libtsan.so.0+0x3699f)
    #1 inline_mysql_rwlock_init /test/10.5_opt_san/include/mysql/psi/mysql_thread.h:870 (mariadbd+0xf12918)
    #2 LOGGER::init_base() /test/10.5_opt_san/sql/log.cc:1236 (mariadbd+0xf12918)
    #3 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5396 (mariadbd+0x72560e)
    #4 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
 
  Thread T8 (tid=1302015, running) created by main thread at:
    #0 pthread_create <null> (libtsan.so.0+0x5ea99)
    #1 std::thread::_M_start_thread(std::unique_ptr<std::thread::_State, std::default_delete<std::thread::_State> >, void (*)()) <null> (libstdc++.so.6+0xd7048)
    #2 tpool::thread_pool_generic::maybe_wake_or_create_thread() /test/10.5_opt_san/tpool/tpool_generic.cc:802 (mariadbd+0x16f2787)
    #3 tpool::thread_pool_generic::submit_task(tpool::task*) /test/10.5_opt_san/tpool/tpool_generic.cc:821 (mariadbd+0x16f2787)
    #4 buf_do_load_dump /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:812 (mariadbd+0x15c115c)
    #5 buf_load_at_startup() /test/10.5_opt_san/storage/innobase/buf/buf0dump.cc:806 (mariadbd+0x15c115c)
    #6 srv_start(bool) /test/10.5_opt_san/storage/innobase/srv/srv0start.cc:1927 (mariadbd+0x6752ad)
    #7 innodb_init /test/10.5_opt_san/storage/innobase/handler/ha_innodb.cc:3922 (mariadbd+0x1360400)
    #8 ha_initialize_handlerton(st_plugin_int*) /test/10.5_opt_san/sql/handler.cc:645 (mariadbd+0xcf9547)
    #9 plugin_initialize /test/10.5_opt_san/sql/sql_plugin.cc:1459 (mariadbd+0x8d3f74)
    #10 plugin_init(int*, char**, int) /test/10.5_opt_san/sql/sql_plugin.cc:1751 (mariadbd+0x8d5cc3)
    #11 init_server_components /test/10.5_opt_san/sql/mysqld.cc:4913 (mariadbd+0x72043d)
    #12 mysqld_main(int, char**) /test/10.5_opt_san/sql/mysqld.cc:5496 (mariadbd+0x725bf4)
    #13 main /test/10.5_opt_san/sql/main.cc:25 (mariadbd+0x6b8455)
 
SUMMARY: ThreadSanitizer: data race /test/10.5_opt_san/sql/log.cc:1181 in LOGGER::error_log_print(loglevel, char const*, __va_list_tag*)

bool LOGGER::error_log_print(enum loglevel level, const char *format,
                             va_list args)
{
  bool error= FALSE;
  Log_event_handler **current_handler;
  THD *thd= current_thd;
 
  if (likely(thd))
    thd->error_printed_to_log= 1;
 
  /* currently we don't need locking here as there is no error_log table */
  for (current_handler= error_log_handler_list ; *current_handler ;)                //<--------------- Here
    error= (*current_handler++)->log_error(level, format, args) || error;
 
  return error;
}

Setup:

Compiled with GCC >=7.5.0 (I use GCC 9.3.0) and:
    -DWITH_TSAN=ON -DWSREP_LIB_WITH_TSAN=ON -DMUTEXTYPE=sys
Set before execution:
    export TSAN_OPTIONS=suppress_equal_stacks=1:suppress_equal_addresses=1:history_size=7:verbosity=1:exitcode=0

Bug confirmed present in:
MariaDB: 10.3.28 (dbg), 10.3.28 (opt), 10.4.18 (dbg), 10.4.18 (opt), 10.5.9 (dbg), 10.5.9 (opt), 10.6.0 (dbg), 10.6.0 (opt)

Bug confirmed not present in:
MariaDB: 10.1.49 (dbg), 10.1.49 (opt), 10.2.37 (dbg), 10.2.37 (opt)



 Comments   
Comment by Eugene Kosov (Inactive) [ 2020-12-02 ]

Probably, because of the bug server can loose one error log message at the very beginning.

My patch would look like this:

diff --git a/sql/log.cc b/sql/log.cc
index 20fa37b4299..b47c4ce32d5 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1174,21 +1174,15 @@ void Log_to_file_event_handler::flush()
 bool LOGGER::error_log_print(enum loglevel level, const char *format,
                              va_list args)
 {
-  bool error= FALSE;
-  Log_event_handler **current_handler;
   THD *thd= current_thd;
 
   if (likely(thd))
     thd->error_printed_to_log= 1;
 
-  /* currently we don't need locking here as there is no error_log table */
-  for (current_handler= error_log_handler_list ; *current_handler ;)
-    error= (*current_handler++)->log_error(level, format, args) || error;
-
-  return error;
+  return (*error_log_handler.load(std::memory_order_relaxed))
+      .log_error(level, format, args);
 }
 
-
 void LOGGER::cleanup_base()
 {
   DBUG_ASSERT(inited == 1);
@@ -1442,14 +1436,13 @@ void LOGGER::init_error_log(ulonglong error_log_printer)
 {
   if (error_log_printer & LOG_NONE)
   {
-    error_log_handler_list[0]= 0;
+    error_log_handler= nullptr;
     return;
   }
 
   switch (error_log_printer) {
   case LOG_FILE:
-    error_log_handler_list[0]= file_log_handler;
-    error_log_handler_list[1]= 0;
+    error_log_handler= file_log_handler;
     break;
     /* these two are disabled for now */
   case LOG_TABLE:
diff --git a/sql/log.h b/sql/log.h
index f2d620ef7fc..4f43e52ef22 100644
--- a/sql/log.h
+++ b/sql/log.h
@@ -21,6 +21,8 @@
 #include "wsrep_mysqld.h"
 #include "rpl_constants.h"
 
+#include <atomic>
+
 class Relay_log_info;
 
 class Format_description_log_event;
@@ -1047,8 +1049,8 @@ class LOGGER
   Log_to_csv_event_handler *table_log_handler;
   Log_to_file_event_handler *file_log_handler;
 
+  std::atomic<Log_event_handler *> error_log_handler{nullptr};
   /* NULL-terminated arrays of log handlers */
-  Log_event_handler *error_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];
   Log_event_handler *slow_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];
   Log_event_handler *general_log_handler_list[MAX_LOG_HANDLERS_NUM + 1];

Comment by Sergei Golubchik [ 2020-12-21 ]

There's no reason to use atomic load for something that never changes run-time.

Better initialize FILE handlers before storage engines and TABLE handlers after. Or initialize all handlers before storage engines (TABLE handler doesn't actually need the engine until it tries to write to the log). Or initialize error log before engines and slow/general logs after.

Generated at Thu Feb 08 09:29:04 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.