In an environment running Galera Cluster with 6 MariaDB nodes, 1 arbitrator node, some replicas and a ProxySQL, after a network issue that triggered a state transfer on two nodes,
for some reason, almost all the transactions hang in:
- “starting” state on the commit statement or on "".
- "acquiring total order isolation" on the "KILL CONNECTION" statement (The "KILL CONNECTION" was requested by the ProxySQL)
We tried to restart the service but it hangs on stopping, ProxySQL detected this node as down and switched the traffic to another node.
By looking at the backtrace it seems that we have a kind of "pthread_cond_wait() deadlock" executed by lock.wait() on the enter() function on the commit monitor during the commit order critical section.
Unfortunately, we didn't find a way to reproduce the problem
{"report":{"fcp":933.8999998569489,"ttfb":274,"pageVisibility":"visible","entityId":113664,"key":"jira.project.issue.view-issue","isInitial":true,"threshold":1000,"elementTimings":{},"userDeviceMemory":8,"userDeviceProcessors":64,"apdex":0.5,"journeyId":"80933a0f-e85f-4d5e-bfb0-08d8a3532fd5","navigationType":0,"readyForUser":1059.0999999046326,"redirectCount":0,"resourceLoadedEnd":640.7000000476837,"resourceLoadedStart":291.89999985694885,"resourceTiming":[{"duration":42.80000019073486,"initiatorType":"link","name":"https://jira.mariadb.org/s/2c21342762a6a02add1c328bed317ffd-CDN/lu2cib/820016/12ta74/0a8bac35585be7fc6c9cc5a0464cd4cf/_/download/contextbatch/css/_super/batch.css","startTime":291.89999985694885,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":291.89999985694885,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":334.7000000476837,"responseStart":0,"secureConnectionStart":0},{"duration":43,"initiatorType":"link","name":"https://jira.mariadb.org/s/7ebd35e77e471bc30ff0eba799ebc151-CDN/lu2cib/820016/12ta74/494e4c556ecbb29f90a3d3b4f09cb99c/_/download/contextbatch/css/jira.browse.project,project.issue.navigator,jira.view.issue,jira.general,jira.global,atl.general,-_super/batch.css?agile_global_admin_condition=true&jag=true&jira.create.linked.issue=true&slack-enabled=true&whisper-enabled=true","startTime":292.09999990463257,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":292.09999990463257,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":335.09999990463257,"responseStart":0,"secureConnectionStart":0},{"duration":224.5,"initiatorType":"script","name":"https://jira.mariadb.org/s/0917945aaa57108d00c5076fea35e069-CDN/lu2cib/820016/12ta74/0a8bac35585be7fc6c9cc5a0464cd4cf/_/download/contextbatch/js/_super/batch.js?locale=en","startTime":292.39999985694885,"connectEnd":292.39999985694885,"connectStart":292.39999985694885,"domainLookupEnd":292.39999985694885,"domainLookupStart":292.39999985694885,"fetchStart":292.39999985694885,"redirectEnd":0,"redirectStart":0,"requestStart":333,"responseEnd":516.8999998569489,"responseStart":348.39999985694885,"secureConnectionStart":292.39999985694885},{"duration":348.10000014305115,"initiatorType":"script","name":"https://jira.mariadb.org/s/2d8175ec2fa4c816e8023260bd8c1786-CDN/lu2cib/820016/12ta74/494e4c556ecbb29f90a3d3b4f09cb99c/_/download/contextbatch/js/jira.browse.project,project.issue.navigator,jira.view.issue,jira.general,jira.global,atl.general,-_super/batch.js?agile_global_admin_condition=true&jag=true&jira.create.linked.issue=true&locale=en&slack-enabled=true&whisper-enabled=true","startTime":292.59999990463257,"connectEnd":292.59999990463257,"connectStart":292.59999990463257,"domainLookupEnd":292.59999990463257,"domainLookupStart":292.59999990463257,"fetchStart":292.59999990463257,"redirectEnd":0,"redirectStart":0,"requestStart":341.7999999523163,"responseEnd":640.7000000476837,"responseStart":358.09999990463257,"secureConnectionStart":292.59999990463257},{"duration":82.79999995231628,"initiatorType":"script","name":"https://jira.mariadb.org/s/a9324d6758d385eb45c462685ad88f1d-CDN/lu2cib/820016/12ta74/c92c0caa9a024ae85b0ebdbed7fb4bd7/_/download/contextbatch/js/atl.global,-_super/batch.js?locale=en","startTime":292.7999999523163,"connectEnd":292.7999999523163,"connectStart":292.7999999523163,"domainLookupEnd":292.7999999523163,"domainLookupStart":292.7999999523163,"fetchStart":292.7999999523163,"redirectEnd":0,"redirectStart":0,"requestStart":344.2999999523163,"responseEnd":375.59999990463257,"responseStart":367.39999985694885,"secureConnectionStart":292.7999999523163},{"duration":83.30000019073486,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2cib/820016/12ta74/1.0/_/download/batch/jira.webresources:calendar-en/jira.webresources:calendar-en.js","startTime":292.89999985694885,"connectEnd":292.89999985694885,"connectStart":292.89999985694885,"domainLookupEnd":292.89999985694885,"domainLookupStart":292.89999985694885,"fetchStart":292.89999985694885,"redirectEnd":0,"redirectStart":0,"requestStart":344.59999990463257,"responseEnd":376.2000000476837,"responseStart":369,"secureConnectionStart":292.89999985694885},{"duration":83.70000004768372,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2cib/820016/12ta74/1.0/_/download/batch/jira.webresources:calendar-localisation-moment/jira.webresources:calendar-localisation-moment.js","startTime":293.09999990463257,"connectEnd":293.09999990463257,"connectStart":293.09999990463257,"domainLookupEnd":293.09999990463257,"domainLookupStart":293.09999990463257,"fetchStart":293.09999990463257,"redirectEnd":0,"redirectStart":0,"requestStart":344.7000000476837,"responseEnd":376.7999999523163,"responseStart":369.39999985694885,"secureConnectionStart":293.09999990463257},{"duration":47.59999990463257,"initiatorType":"link","name":"https://jira.mariadb.org/s/b04b06a02d1959df322d9cded3aeecc1-CDN/lu2cib/820016/12ta74/a2ff6aa845ffc9a1d22fe23d9ee791fc/_/download/contextbatch/css/jira.global.look-and-feel,-_super/batch.css","startTime":293.2999999523163,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":293.2999999523163,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":340.89999985694885,"responseStart":0,"secureConnectionStart":0},{"duration":92.5,"initiatorType":"script","name":"https://jira.mariadb.org/rest/api/1.0/shortcuts/820016/47140b6e0a9bc2e4913da06536125810/shortcuts.js?context=issuenavigation&context=issueaction","startTime":293.5,"connectEnd":293.5,"connectStart":293.5,"domainLookupEnd":293.5,"domainLookupStart":293.5,"fetchStart":293.5,"redirectEnd":0,"redirectStart":0,"requestStart":365.5,"responseEnd":386,"responseStart":382.2000000476837,"secureConnectionStart":293.5},{"duration":49.89999985694885,"initiatorType":"link","name":"https://jira.mariadb.org/s/3ac36323ba5e4eb0af2aa7ac7211b4bb-CDN/lu2cib/820016/12ta74/d176f0986478cc64f24226b3d20c140d/_/download/contextbatch/css/com.atlassian.jira.projects.sidebar.init,-_super,-project.issue.navigator,-jira.view.issue/batch.css?jira.create.linked.issue=true","startTime":293.7000000476837,"connectEnd":0,"connectStart":0,"domainLookupEnd":0,"domainLookupStart":0,"fetchStart":293.7000000476837,"redirectEnd":0,"redirectStart":0,"requestStart":0,"responseEnd":343.59999990463257,"responseStart":0,"secureConnectionStart":0},{"duration":99.70000004768372,"initiatorType":"script","name":"https://jira.mariadb.org/s/5d5e8fe91fbc506585e83ea3b62ccc4b-CDN/lu2cib/820016/12ta74/d176f0986478cc64f24226b3d20c140d/_/download/contextbatch/js/com.atlassian.jira.projects.sidebar.init,-_super,-project.issue.navigator,-jira.view.issue/batch.js?jira.create.linked.issue=true&locale=en","startTime":293.89999985694885,"connectEnd":293.89999985694885,"connectStart":293.89999985694885,"domainLookupEnd":293.89999985694885,"domainLookupStart":293.89999985694885,"fetchStart":293.89999985694885,"redirectEnd":0,"redirectStart":0,"requestStart":365.7000000476837,"responseEnd":393.59999990463257,"responseStart":382.89999985694885,"secureConnectionStart":293.89999985694885},{"duration":337.39999985694885,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2cib/820016/12ta74/1.0/_/download/batch/jira.webresources:bigpipe-js/jira.webresources:bigpipe-js.js","startTime":299.7000000476837,"connectEnd":299.7000000476837,"connectStart":299.7000000476837,"domainLookupEnd":299.7000000476837,"domainLookupStart":299.7000000476837,"fetchStart":299.7000000476837,"redirectEnd":0,"redirectStart":0,"requestStart":394.59999990463257,"responseEnd":637.0999999046326,"responseStart":630.2999999523163,"secureConnectionStart":299.7000000476837},{"duration":338.5,"initiatorType":"script","name":"https://jira.mariadb.org/s/d41d8cd98f00b204e9800998ecf8427e-CDN/lu2cib/820016/12ta74/1.0/_/download/batch/jira.webresources:bigpipe-init/jira.webresources:bigpipe-init.js","startTime":299.7999999523163,"connectEnd":299.7999999523163,"connectStart":299.7999999523163,"domainLookupEnd":299.7999999523163,"domainLookupStart":299.7999999523163,"fetchStart":299.7999999523163,"redirectEnd":0,"redirectStart":0,"requestStart":416.89999985694885,"responseEnd":638.2999999523163,"responseStart":632.3999998569489,"secureConnectionStart":299.7999999523163},{"duration":117,"initiatorType":"xmlhttprequest","name":"https://jira.mariadb.org/rest/webResources/1.0/resources","startTime":672,"connectEnd":672,"connectStart":672,"domainLookupEnd":672,"domainLookupStart":672,"fetchStart":672,"redirectEnd":0,"redirectStart":0,"requestStart":753.2999999523163,"responseEnd":789,"responseStart":787.7000000476837,"secureConnectionStart":672}],"fetchStart":0,"domainLookupStart":0,"domainLookupEnd":0,"connectStart":0,"connectEnd":0,"requestStart":53,"responseStart":274,"responseEnd":291,"domLoading":279,"domInteractive":1149,"domContentLoadedEventStart":1149,"domContentLoadedEventEnd":1198,"domComplete":1631,"loadEventStart":1631,"loadEventEnd":1631,"userAgent":"Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)","marks":[{"name":"bigPipe.sidebar-id.start","time":1127.2999999523163},{"name":"bigPipe.sidebar-id.end","time":1128.0999999046326},{"name":"bigPipe.activity-panel-pipe-id.start","time":1128.2000000476837},{"name":"bigPipe.activity-panel-pipe-id.end","time":1129.7999999523163},{"name":"activityTabFullyLoaded","time":1242.3999998569489}],"measures":[],"correlationId":"91c1dc5c55418c","effectiveType":"4g","downlink":9.7,"rtt":0,"serverDuration":118,"dbReadsTimeInMs":21,"dbConnsTimeInMs":34,"applicationHash":"9d11dbea5f4be3d4cc21f03a88dd11d8c8687422","experiments":[]}}
I see that it has been previously claimed that this bug does not affect MariaDB Server 10.6 or later. Please clarify what should be done on merge to 10.6. If it is anything else than a null-merge (discarding the changes), we need to review and test the 10.6 version as well.
Am I right that this is basically yet another attempt at fixing
MDEV-23328?