Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-35829

galera node crash with race condition

Details

    • Bug
    • Status: Open (View Workflow)
    • Major
    • Resolution: Unresolved
    • 10.11.10
    • 10.11
    • Galera
    • None
    • 3 nodes with loadbalancer Hetzner Cloud Server CCX43 CPU 16 RAM 64GB Disk 200GB Debian 12 mariadb 10.11.10 galera 4 26.4.20 happens with ws-rep rsync and mariabackup

    Description

      Race condition CPU 100% with always the same message (from crash to crash innodb table changes):

      2025-01-04  5:00:19 0 [ERROR] InnoDB: ibuf cursor restoration fails! ibuf record inserted to page [page id: space=14819, page number=396] in file ./fm@002dcz@002dprod/wp_gdmaq_log_email.ibd
      2025-01-04  5:00:19 0 [ERROR] InnoDB: Submit a detailed bug report to https://jira.mariadb.org/
      PHYSICAL RECORD: n_fields 6; 1-byte offsets; info bits 0
      0: len 4; hex 000039e3; asc   9 ;;
      1: len 1; hex 00; asc  ;;
      2: len 4; hex 0000018c; asc     ;;
      3: len 16; hex 00010101860800088000860800088000; asc                 ;;
      4: len 8; hex 00000000000413d1; asc         ;;
      5: len 8; hex 00000000000a5853; asc       XS;;
      PHYSICAL RECORD: n_fields 6; 1-byte offsets; info bits 0
      0: len 4; hex 000039e3; asc   9 ;;
      1: len 1; hex 00; asc  ;;
      2: len 4; hex 0000018c; asc     ;;
      3: len 16; hex 00010101860800088000860800088000; asc                 ;;
      4: len 8; hex 00000000000413d1; asc         ;;
      5: len 8; hex 00000000000a5853; asc       XS;;
      DATA TUPLE: 3 fields;
      0: len 4; hex 000039e3; asc   9 ;;
      1: len 1; hex 00; asc  ;;
      2: len 4; hex 0000018c; asc     ;;
      

      ----------------------------------------------------------
      60-galera-cnf
      ---------------------------------------------------------------------------------------------------------

      [mysqld]
      wsrep_provider              = /usr/lib/galera/libgalera_smm.so
      wsrep_on                    = ON
      wsrep_cluster_name          = Forum Prod Galera Cluster
      wsrep_cluster_address       = gcomm://10.1.1.2,10.1.1.3,10.1.1.4
      wsrep_node_name             = DB03
      wsrep_node_address          = 10.1.1.4
      wsrep_provider_options      = "gmcast.listen_addr=tcp://10.1.1.4:4567; gcache.size=10G; gcache.page_size=10G"
      binlog_format               = ROW
      default_storage_engine      = InnoDB
      wsrep_slave_threads         = 16
      wsrep_load_data_splitting   = ON
      innodb_autoinc_lock_mode    = 2
       
      bind-address = 10.0.0.15,10.1.1.4
      

      ---------------------------------------------------
      50-server.cnf
      ------------------------------------------------------------------------------------------

      # These groups are read by MariaDB server.
      # Use it for options that only the server (but not clients) should see
      [server]
      # this is only for the mysqld standalone daemon
      [mysqld]
      #
      # * Basic Settings
      #
      pid-file                = /run/mysqld/mysqld.pid
      basedir                 = /usr
      skip-name-resolve
      #
      # * Fine Tuning
      #
      thread_cache_size = 100
      table_cache = 2500
      join_buffer_size = 64M
      tmp_table_size = 64M
      max_heap_table_size = 64M
      table_definition_cache = 3500
      #
      # * Logging and Replication
      #
      # Both location gets rotated by the cronjob.
      # Be aware that this log type is a performance killer.
      # Recommend only changing this at runtime for short testing periods if needed!
      # general_log_file       = /var/log/mysql/mysql.log
      # general_log            = 1
       
      # When running under systemd, error logging goes via stdout/stderr to journald
      # and when running legacy init error logging goes to syslog due to
      # /etc/mysql/conf.d/mariadb.conf.d/50-mysqld_safe.cnf
      # Enable this if you want to have error logging into a separate file
      log_error = /var/log/mysql/error.log
      # Enable the slow query log to see queries with especially long duration
      log_slow_query_file    = /var/log/mysql/mariadb-slow.log
      log_slow_query_time    = 10
      log_slow_verbosity     = query_plan,explain
      # log-queries-not-using-indexes
      # log_slow_min_examined_row_limit = 1000
       
      # The following can be used as easy to replay backup logs or for replication.
      # note: if you are setting up a replication slave, see README.Debian about
      #       other settings you may need to change.
      expire_logs_days        = 10
      #
      # * SSL/TLS
      #
      ssl-ca = /etc/mysql/certs/ca-cert.pem
      ssl-key = /etc/mysql/certs/server-key.pem
      ssl-cert = /etc/mysql/certs/server-cert.pem
      proxy-protocol-networks=10.0.0.0/24,localhost
      require-secure-transport = off
      #
      # * Character sets
      #
      character-set-server  = utf8mb4
      collation-server      = utf8mb4_general_ci
      #
      # * InnoDB
      #
      innodb_flush_log_at_trx_commit=0
      innodb_flush_method=O_DIRECT
      innodb_doublewrite=1
      innodb_autoextend_increment=64
      innodb_file_per_table=1
      innodb_thread_concurrency=0
      innodb_buffer_pool_size=46G
      innodb_buffer_pool_instances=10
      innodb_log_file_size=8G
      innodb_io_capacity = 2000
      innodb_io_capacity_max = 5000
      innodb_checksum_algorithm = crc32
      innodb_log_compressed_pages=OFF
      innodb_change_buffering=all
      innodb_autoinc_lock_mode=2
      [mariadb]
      key_buffer_size        = 128M
      net_buffer_length      = 1048576
      max_allowed_packet     = 1024M
      query_cache_type=0
      query_cache_size=0
      performance_schema=on
      wait_timeout=300
      proxy-protocol-networks=10.0.0.0/24,localhost
      # Hetzner recommendations
      max_connect_errors    = 4294967295
      max_connections       = 100000
      

      Attachments

        Issue Links

          Activity

            hriesz Hansjuergen Riess created issue -
            hriesz Hansjuergen Riess made changes -
            Field Original Value New Value
            Environment Hetzner Cloud Server CCX43 CPU 16 RAM 64GB Disk 200GB Debian 12 mariadb 10.11.10 galera 4 26.4.20 3 nodes with loadbalancer Hetzner Cloud Server CCX43 CPU 16 RAM 64GB Disk 200GB Debian 12 mariadb 10.11.10 galera 4 26.4.20
            hriesz Hansjuergen Riess made changes -
            Environment 3 nodes with loadbalancer Hetzner Cloud Server CCX43 CPU 16 RAM 64GB Disk 200GB Debian 12 mariadb 10.11.10 galera 4 26.4.20 3 nodes with loadbalancer Hetzner Cloud Server CCX43 CPU 16 RAM 64GB Disk 200GB Debian 12 mariadb 10.11.10 galera 4 26.4.20 happens with ws-rep rsync and mariabackup
            serg Sergei Golubchik made changes -
            Description Race condition CPU 100% with always the same message (from crash to crash innodb table changes):

            2025-01-04 5:00:19 0 [ERROR] InnoDB: ibuf cursor restoration fails! ibuf record inserted to page [page id: space=14819, page number=396] in file ./fm@002dcz@002dprod/wp_gdmaq_log_email.ibd
            2025-01-04 5:00:19 0 [ERROR] InnoDB: Submit a detailed bug report to https://jira.mariadb.org/
            PHYSICAL RECORD: n_fields 6; 1-byte offsets; info bits 0
            0: len 4; hex 000039e3; asc 9 ;;
            1: len 1; hex 00; asc ;;
            2: len 4; hex 0000018c; asc ;;
            3: len 16; hex 00010101860800088000860800088000; asc ;;
            4: len 8; hex 00000000000413d1; asc ;;
            5: len 8; hex 00000000000a5853; asc XS;;
            PHYSICAL RECORD: n_fields 6; 1-byte offsets; info bits 0
            0: len 4; hex 000039e3; asc 9 ;;
            1: len 1; hex 00; asc ;;
            2: len 4; hex 0000018c; asc ;;
            3: len 16; hex 00010101860800088000860800088000; asc ;;
            4: len 8; hex 00000000000413d1; asc ;;
            5: len 8; hex 00000000000a5853; asc XS;;
            DATA TUPLE: 3 fields;
            0: len 4; hex 000039e3; asc 9 ;;
            1: len 1; hex 00; asc ;;
            2: len 4; hex 0000018c; asc ;;

            ----------------------------------------------------------
            60-galera-cnf
            ---------------------------------------------------------------------------------------------------------
            [mysqld]
            wsrep_provider = /usr/lib/galera/libgalera_smm.so
            wsrep_on = ON
            wsrep_cluster_name = Forum Prod Galera Cluster
            wsrep_cluster_address = gcomm://10.1.1.2,10.1.1.3,10.1.1.4
            wsrep_node_name = DB03
            wsrep_node_address = 10.1.1.4
            wsrep_provider_options = "gmcast.listen_addr=tcp://10.1.1.4:4567; gcache.size=10G; gcache.page_size=10G"
            binlog_format = ROW
            default_storage_engine = InnoDB
            wsrep_slave_threads = 16
            wsrep_load_data_splitting = ON
            innodb_autoinc_lock_mode = 2

            bind-address = 10.0.0.15,10.1.1.4
            ---------------------------------------------------
            50-server.cnf
            ------------------------------------------------------------------------------------------
            # These groups are read by MariaDB server.
            # Use it for options that only the server (but not clients) should see
            [server]
            # this is only for the mysqld standalone daemon
            [mysqld]
            #
            # * Basic Settings
            #
            pid-file = /run/mysqld/mysqld.pid
            basedir = /usr
            skip-name-resolve
            #
            # * Fine Tuning
            #
            thread_cache_size = 100
            table_cache = 2500
            join_buffer_size = 64M
            tmp_table_size = 64M
            max_heap_table_size = 64M
            table_definition_cache = 3500
            #
            # * Logging and Replication
            #
            # Both location gets rotated by the cronjob.
            # Be aware that this log type is a performance killer.
            # Recommend only changing this at runtime for short testing periods if needed!
            # general_log_file = /var/log/mysql/mysql.log
            # general_log = 1

            # When running under systemd, error logging goes via stdout/stderr to journald
            # and when running legacy init error logging goes to syslog due to
            # /etc/mysql/conf.d/mariadb.conf.d/50-mysqld_safe.cnf
            # Enable this if you want to have error logging into a separate file
            log_error = /var/log/mysql/error.log
            # Enable the slow query log to see queries with especially long duration
            log_slow_query_file = /var/log/mysql/mariadb-slow.log
            log_slow_query_time = 10
            log_slow_verbosity = query_plan,explain
            # log-queries-not-using-indexes
            # log_slow_min_examined_row_limit = 1000

            # The following can be used as easy to replay backup logs or for replication.
            # note: if you are setting up a replication slave, see README.Debian about
            # other settings you may need to change.
            expire_logs_days = 10
            #
            # * SSL/TLS
            #
            ssl-ca = /etc/mysql/certs/ca-cert.pem
            ssl-key = /etc/mysql/certs/server-key.pem
            ssl-cert = /etc/mysql/certs/server-cert.pem
            proxy-protocol-networks=10.0.0.0/24,localhost
            require-secure-transport = off
            #
            # * Character sets
            #
            character-set-server = utf8mb4
            collation-server = utf8mb4_general_ci
            #
            # * InnoDB
            #
            innodb_flush_log_at_trx_commit=0
            innodb_flush_method=O_DIRECT
            innodb_doublewrite=1
            innodb_autoextend_increment=64
            innodb_file_per_table=1
            innodb_thread_concurrency=0
            innodb_buffer_pool_size=46G
            innodb_buffer_pool_instances=10
            innodb_log_file_size=8G
            innodb_io_capacity = 2000
            innodb_io_capacity_max = 5000
            innodb_checksum_algorithm = crc32
            innodb_log_compressed_pages=OFF
            innodb_change_buffering=all
            innodb_autoinc_lock_mode=2
            [mariadb]
            key_buffer_size = 128M
            net_buffer_length = 1048576
            max_allowed_packet = 1024M
            query_cache_type=0
            query_cache_size=0
            performance_schema=on
            wait_timeout=300
            proxy-protocol-networks=10.0.0.0/24,localhost
            # Hetzner recommendations
            max_connect_errors = 4294967295
            max_connections = 100000


            Race condition CPU 100% with always the same message (from crash to crash innodb table changes):
            {noformat}
            2025-01-04 5:00:19 0 [ERROR] InnoDB: ibuf cursor restoration fails! ibuf record inserted to page [page id: space=14819, page number=396] in file ./fm@002dcz@002dprod/wp_gdmaq_log_email.ibd
            2025-01-04 5:00:19 0 [ERROR] InnoDB: Submit a detailed bug report to https://jira.mariadb.org/
            PHYSICAL RECORD: n_fields 6; 1-byte offsets; info bits 0
            0: len 4; hex 000039e3; asc 9 ;;
            1: len 1; hex 00; asc ;;
            2: len 4; hex 0000018c; asc ;;
            3: len 16; hex 00010101860800088000860800088000; asc ;;
            4: len 8; hex 00000000000413d1; asc ;;
            5: len 8; hex 00000000000a5853; asc XS;;
            PHYSICAL RECORD: n_fields 6; 1-byte offsets; info bits 0
            0: len 4; hex 000039e3; asc 9 ;;
            1: len 1; hex 00; asc ;;
            2: len 4; hex 0000018c; asc ;;
            3: len 16; hex 00010101860800088000860800088000; asc ;;
            4: len 8; hex 00000000000413d1; asc ;;
            5: len 8; hex 00000000000a5853; asc XS;;
            DATA TUPLE: 3 fields;
            0: len 4; hex 000039e3; asc 9 ;;
            1: len 1; hex 00; asc ;;
            2: len 4; hex 0000018c; asc ;;
            {noformat}
            ----------------------------------------------------------
            60-galera-cnf
            ---------------------------------------------------------------------------------------------------------
            {noformat}
            [mysqld]
            wsrep_provider = /usr/lib/galera/libgalera_smm.so
            wsrep_on = ON
            wsrep_cluster_name = Forum Prod Galera Cluster
            wsrep_cluster_address = gcomm://10.1.1.2,10.1.1.3,10.1.1.4
            wsrep_node_name = DB03
            wsrep_node_address = 10.1.1.4
            wsrep_provider_options = "gmcast.listen_addr=tcp://10.1.1.4:4567; gcache.size=10G; gcache.page_size=10G"
            binlog_format = ROW
            default_storage_engine = InnoDB
            wsrep_slave_threads = 16
            wsrep_load_data_splitting = ON
            innodb_autoinc_lock_mode = 2

            bind-address = 10.0.0.15,10.1.1.4
            {noformat}
            ---------------------------------------------------
            50-server.cnf
            ------------------------------------------------------------------------------------------
            {noformat}
            # These groups are read by MariaDB server.
            # Use it for options that only the server (but not clients) should see
            [server]
            # this is only for the mysqld standalone daemon
            [mysqld]
            #
            # * Basic Settings
            #
            pid-file = /run/mysqld/mysqld.pid
            basedir = /usr
            skip-name-resolve
            #
            # * Fine Tuning
            #
            thread_cache_size = 100
            table_cache = 2500
            join_buffer_size = 64M
            tmp_table_size = 64M
            max_heap_table_size = 64M
            table_definition_cache = 3500
            #
            # * Logging and Replication
            #
            # Both location gets rotated by the cronjob.
            # Be aware that this log type is a performance killer.
            # Recommend only changing this at runtime for short testing periods if needed!
            # general_log_file = /var/log/mysql/mysql.log
            # general_log = 1

            # When running under systemd, error logging goes via stdout/stderr to journald
            # and when running legacy init error logging goes to syslog due to
            # /etc/mysql/conf.d/mariadb.conf.d/50-mysqld_safe.cnf
            # Enable this if you want to have error logging into a separate file
            log_error = /var/log/mysql/error.log
            # Enable the slow query log to see queries with especially long duration
            log_slow_query_file = /var/log/mysql/mariadb-slow.log
            log_slow_query_time = 10
            log_slow_verbosity = query_plan,explain
            # log-queries-not-using-indexes
            # log_slow_min_examined_row_limit = 1000

            # The following can be used as easy to replay backup logs or for replication.
            # note: if you are setting up a replication slave, see README.Debian about
            # other settings you may need to change.
            expire_logs_days = 10
            #
            # * SSL/TLS
            #
            ssl-ca = /etc/mysql/certs/ca-cert.pem
            ssl-key = /etc/mysql/certs/server-key.pem
            ssl-cert = /etc/mysql/certs/server-cert.pem
            proxy-protocol-networks=10.0.0.0/24,localhost
            require-secure-transport = off
            #
            # * Character sets
            #
            character-set-server = utf8mb4
            collation-server = utf8mb4_general_ci
            #
            # * InnoDB
            #
            innodb_flush_log_at_trx_commit=0
            innodb_flush_method=O_DIRECT
            innodb_doublewrite=1
            innodb_autoextend_increment=64
            innodb_file_per_table=1
            innodb_thread_concurrency=0
            innodb_buffer_pool_size=46G
            innodb_buffer_pool_instances=10
            innodb_log_file_size=8G
            innodb_io_capacity = 2000
            innodb_io_capacity_max = 5000
            innodb_checksum_algorithm = crc32
            innodb_log_compressed_pages=OFF
            innodb_change_buffering=all
            innodb_autoinc_lock_mode=2
            [mariadb]
            key_buffer_size = 128M
            net_buffer_length = 1048576
            max_allowed_packet = 1024M
            query_cache_type=0
            query_cache_size=0
            performance_schema=on
            wait_timeout=300
            proxy-protocol-networks=10.0.0.0/24,localhost
            # Hetzner recommendations
            max_connect_errors = 4294967295
            max_connections = 100000
            {noformat}
            serg Sergei Golubchik made changes -
            Assignee Jan Lindström [ JIRAUSER53125 ]
            janlindstrom Jan Lindström made changes -
            Status Open [ 1 ] Needs Feedback [ 10501 ]
            hriesz Hansjuergen Riess made changes -
            Comment [ Or the other way round: The index gets corrupt when updated....
            The crashed node connot be restarted. It shows the race condition at once again.
            Only a recreation works (delete /var/lib/mysql except /var/lib/mysql/mysql and systemctl start mariadb) ]
            janlindstrom Jan Lindström made changes -
            Status Needs Feedback [ 10501 ] Open [ 1 ]
            janlindstrom Jan Lindström made changes -
            serg Sergei Golubchik made changes -
            Fix Version/s 10.11 [ 27614 ]

            People

              janlindstrom Jan Lindström
              hriesz Hansjuergen Riess
              Votes:
              1 Vote for this issue
              Watchers:
              5 Start watching this issue

              Dates

                Created:
                Updated:

                Git Integration

                  Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.