# -*- coding: utf-8 -*- # # Daniel Nilsson (dnilsson@arroweurope.com) # # Simplified code to reproduce the MDEV-24307 crash # https://jira.mariadb.org/browse/MDEV-24307 # import MySQLdb import argparse from getpass import getpass import uuid import os import shutil class PWProxy(object): pw_uid=0 pw_gid=0 try: from pwd import getpwnam except ImportError: getpwnam = lambda n: PWProxy() def main(): ap = argparse.ArgumentParser("Test script to replicate MariaDB MDEV-24307", formatter_class=argparse.ArgumentDefaultsHelpFormatter) ap.add_argument('cmd', type=str, nargs='?', choices=['init', 'backup', 'restore']) ap.add_argument('--host', type=str, default='localhost', help="Host") ap.add_argument('-d', '--dbfolder', type=str, help="Location of database files") ap.add_argument('-u', '--user', type=str, default='root', help="User") ap.add_argument('-p', '--port', type=int, default=3306, help="Port") ap.add_argument('-s', '--schema', type=str, default='tmp', help="Schema") ap.add_argument('-l', '--loop', type=int, default=1, help="Loops") ap.add_argument('-f', '--dir', type=str, default='.', help="Backup file location") args = ap.parse_args() if args.cmd in ('backup', 'restore') and (args.dbfolder is None or not os.path.isdir(args.dbfolder)): raise Exception("you need a valid db data folder defined using -d option on this command") print("using schema: {}".format(args.schema)) passwd = getpass("Password:") tablecount = 20 tablename = "mdev24307" datasize = 50000 with MySQLdb.connect(host=args.host, user=args.user, passwd=passwd, port=args.port, db=args.schema) as db: if 'init' in args.cmd: # # Setup refresh the sample data # print("\ninit tables") with db.cursor() as cur: # Create table cur.execute("SHOW TABLES") if 'mdev24307_0' in [row[0] for row in cur.fetchall()]: print("truncating tables") for n in range(tablecount): cur.execute("TRUNCATE {}_{}".format(tablename, n)) db.commit() else: print("creating tables") for n in range(tablecount): print(" -> {}_{}".format(tablename, n)) cur.execute(""" CREATE TABLE `{}_{}` ( `id` INT NOT NULL AUTO_INCREMENT, `data` varchar(40) CHARACTER SET utf8 DEFAULT NULL, KEY dataIdx (data), PRIMARY KEY (id) ) ENGINE=InnoDB CHARSET=utf8 AUTO_INCREMENT=1; """.format(tablename, n)) db.commit() with db.cursor() as cur: # Fill with data print("filling table data") for n in range(tablecount): cur.executemany("INSERT INTO {}_{} (data) VALUES (%s)".format(tablename, n), [str(uuid.uuid4()) for _ in range(datasize)]) db.commit() elif 'backup' in args.cmd: # This is the backup phase, that never breaks, in my case the backup and restore is two different but identical machines print("\nbackup operation") print("locking tables") with db.cursor() as cur: cur.execute("FLUSH TABLES {} FOR EXPORT".format(",".join(["{}_{}".format(tablename, n) for n in range(tablecount)]))) db.commit() # Copy file print("copy table ibd files") for n in range(tablecount): filename = "{}_{}.ibd".format(tablename, n) srcfile = os.path.join(args.dbfolder, args.schema, filename) dstfile = os.path.join(args.dir, filename) shutil.copy(srcfile, dstfile) os.chmod(dstfile, 0o777) print("{}: ok".format(filename)) # Unlock tables print("unlock tables") with db.cursor() as cur: cur.execute("UNLOCK TABLES") db.commit() # # *********************************************************************************************** # Below is the actual restore phase where the crash happens, not always but sometimes. # after service has restarted using the same backup files retrying the restore usually works fine # *********************************************************************************************** # elif 'restore' in args.cmd: print("\nrestore operation") for lp in range(args.loop): with db.cursor() as cur: print("locking tables") cur.execute("LOCK TABLES {};".format(",".join(["`{}_{}` LOW_PRIORITY WRITE".format(tablename, n) for n in range(tablecount)]))) db.commit() for n in range(tablecount): print("\nprocessing: {}_{}".format(tablename, n)) filename = "{}_{}.ibd".format(tablename, n) srcfile = os.path.join(args.dir, filename) dstfile = os.path.join(args.dbfolder, args.schema, filename) # Get owner and group id of original file try: s = os.stat(dstfile) uid, gid = s.st_uid, s.st_gid except FileNotFoundError: s = getpwnam('mysql') uid, gid = s.pw_uid, s.pw_gid with db.cursor() as cur: print(" -> discarding tablespace") cur.execute("ALTER TABLE `{}_{}` DISCARD TABLESPACE;".format(tablename, n)) db.commit() print(" -> copy .ibd file in place") shutil.copy(srcfile, dstfile) print(" -> setting owner and permissions") if 'chown' in os.__all__: os.chown(dstfile, uid, gid) os.chmod(dstfile, 0o660) with db.cursor() as cur: print(" -> importing tablespace") cur.execute("ALTER TABLE `{}_{}` IMPORT TABLESPACE".format(tablename, n)) print("\nunlocking tables") with db.cursor() as cur: cur.execute("UNLOCK TABLES") db.commit() print("all done!") if __name__ == '__main__': main()