Uploaded image for project: 'MariaDB Server'
  1. MariaDB Server
  2. MDEV-14113

MS: Add linger option on TCP sockets

Details

    • 10.3.3-1

    Description

      From 04a3482b9c5772cd3a82a4aec51976cd2e3e686d Mon Sep 17 00:00:00 2001
      From: Shuode Li <shuodl@microsoft.com>
      Date: Tue, 19 Sep 2017 01:47:19 +0000
      Subject: [PATCH] Merged PR 55490: Add linger option on TCP sockets.
       
      Add linger option on TCP sockets.
       
      Related work items: #73276
      ---
       CMakeLists.txt                       |  5 +++++
       include/mysql_com.h                  |  4 ++++
       mysql-test/r/mysqld--help-win.result |  6 ++++++
       sql/mysqld.cc                        | 10 ++++++++++
       sql/mysqld.h                         |  5 +++++
       sql/sys_vars.cc                      | 15 +++++++++++++++
       tools/cmake.cmd                      |  4 ++++
       7 files changed, 49 insertions(+)
       
      diff --git a/CMakeLists.txt b/CMakeLists.txt
      index 90a31a48303..aa3620a07c8 100644
      --- a/CMakeLists.txt
      +++ b/CMakeLists.txt
      @@ -462,6 +462,11 @@ OPTION(DISABLE_AZURE_REPLICATION
       IF(DISABLE_AZURE_REPLICATION)
         ADD_DEFINITIONS(-DDISABLE_AZURE_REPLICATION)
       ENDIF()
      +OPTION(DISABLE_TCP_LINGER
      +  "disable tcp linger options" OFF)
      +IF(DISABLE_TCP_LINGER)
      +  ADD_DEFINITIONS(-DDISABLE_TCP_LINGER)
      +ENDIF()
       
       OPTION(DISABLE_USER_NAME_CHECK
         "disable user name check" OFF)
      diff --git a/include/mysql_com.h b/include/mysql_com.h
      index 8f603d07322..8b527e91e92 100644
      --- a/include/mysql_com.h
      +++ b/include/mysql_com.h
      @@ -309,6 +309,10 @@ enum enum_server_command
       #define NET_WRITE_TIMEOUT	60		/* Timeout on write */
       #define NET_WAIT_TIMEOUT	8*60*60		/* Wait for new query */
       
      +#ifndef DISABLE_TCP_LINGER
      +#define TCP_LINGER_TIMEOUT	10
      +#endif
      +
       #define ONLY_KILL_QUERY         1
       
       
      diff --git a/mysql-test/r/mysqld--help-win.result b/mysql-test/r/mysqld--help-win.result
      index aee358bc2b2..351ea82a1f3 100644
      --- a/mysql-test/r/mysqld--help-win.result
      +++ b/mysql-test/r/mysqld--help-win.result
      @@ -1026,6 +1026,10 @@ The following options may be given as the first argument:
        --tc-heuristic-recover=name
        Decision to use in heuristic recover process. Possible
        values are COMMIT or ROLLBACK.
      + --tcp-linger        The number of seconds the server tcp linger timeout
      + (Defaults to on; use --skip-tcp-linger to disable.)
      + --tcp-linger-timeout[=#]
      + The number of seconds the server tcp linger timeout
        --thread-cache-size=#
        How many threads we should keep in a cache for reuse
        --thread-handling=name
      @@ -1362,6 +1366,8 @@ sync-relay-log-info 10000
       sysdate-is-now FALSE
       table-open-cache-instances 1
       tc-heuristic-recover COMMIT
      +tcp-linger TRUE
      +tcp-linger-timeout 10
       thread-cache-size 9
       thread-handling one-thread-per-connection
       thread-stack 262144
      diff --git a/sql/mysqld.cc b/sql/mysqld.cc
      index 151ffc5fcd2..0ceb4d037f2 100644
      --- a/sql/mysqld.cc
      +++ b/sql/mysqld.cc
      @@ -588,6 +588,11 @@ ulong binlog_cache_use= 0, binlog_cache_disk_use= 0;
       ulong binlog_stmt_cache_use= 0, binlog_stmt_cache_disk_use= 0;
       ulong max_connections, max_connect_errors;
       
      +#ifndef DISABLE_TCP_LINGER
      +my_bool tcp_linger_onoff;
      +uint tcp_linger_timeout;
      +#endif
      +
       #ifndef DISABLE_SUPER_ACL_CONNECTION
       ulong max_super_acl_connections;    /* The variable define the max connection number of super user */
       #endif
      @@ -6499,6 +6504,11 @@ void handle_connections_sockets()
               sleep(1);       // Give other threads some time
             continue;
           }
      +
      +#ifndef DISABLE_TCP_LINGER
      +    struct linger ling = {tcp_linger_onoff, tcp_linger_timeout};
      +    mysql_socket_setsockopt(new_sock, SOL_SOCKET, SO_LINGER, (char *)&ling, sizeof(ling));
      +#endif
       
       #ifdef HAVE_LIBWRAP
           {
      diff --git a/sql/mysqld.h b/sql/mysqld.h
      index 9bd87bab9aa..b5f6b118f2d 100644
      --- a/sql/mysqld.h
      +++ b/sql/mysqld.h
      @@ -222,6 +222,11 @@ extern MYSQL_PLUGIN_IMPORT ulong max_connections;
       extern ulong max_digest_length;
       extern ulong max_connect_errors, connect_timeout;
       
      +#ifndef DISABLE_TCP_LINGER
      +extern my_bool tcp_linger_onoff;
      +extern uint tcp_linger_timeout;
      +#endif
      +
       #ifndef DISABLE_SUPER_ACL_CONNECTION
       extern ulong max_super_acl_connections;
       #endif
      diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
      index eba5e9005aa..1753fa016ef 100644
      --- a/sql/sys_vars.cc
      +++ b/sql/sys_vars.cc
      @@ -3205,6 +3205,21 @@ static Sys_var_ulong Sys_net_wait_timeout(
              VALID_RANGE(1, IF_WIN(INT_MAX32/1000, LONG_TIMEOUT)),
              DEFAULT(NET_WAIT_TIMEOUT), BLOCK_SIZE(1));
       
      +#ifndef DISABLE_TCP_LINGER
      +static Sys_var_mybool Sys_tcp_linger_onoff(
      +       "tcp_linger",
      +       "The number of seconds the server tcp linger timeout",
      +       GLOBAL_VAR(tcp_linger_onoff), CMD_LINE(OPT_ARG), DEFAULT(TRUE),
      +       NO_MUTEX_GUARD, NOT_IN_BINLOG);
      +
      +static Sys_var_uint Sys_tcp_linger_timout(
      +       "tcp_linger_timeout",
      +       "The number of seconds the server tcp linger timeout",
      +       GLOBAL_VAR(tcp_linger_timeout), CMD_LINE(OPT_ARG),
      +       VALID_RANGE(1, 65535U),
      +       DEFAULT(TCP_LINGER_TIMEOUT), BLOCK_SIZE(1));
      +#endif
      +
       static Sys_var_plugin Sys_default_storage_engine(
              "default_storage_engine", "The default storage engine for new tables",
              SESSION_VAR(table_plugin), NO_CMD_LINE,
      diff --git a/tools/cmake.cmd b/tools/cmake.cmd
      index 0bcbc9db4fc..a48f447d257 100644
      --- a/tools/cmake.cmd
      +++ b/tools/cmake.cmd
      @@ -68,6 +68,10 @@ IF NOT "x%plugin:dazurerepl=%"=="x%plugin%" (
           SET cmakeargs=%cmakeargs% -DDISABLE_AZURE_REPLICATION=ON
       )
       
      +IF NOT "x%plugin:etcplinger=%"=="x%plugin%" (
      +    SET cmakeargs=%cmakeargs% -DDISABLE_TCP_LINGER=ON
      +)
      +
       IF NOT "x%plugin:dusernamecheck=%"=="x%plugin%" (
           SET cmakeargs=%cmakeargs% -DDISABLE_USER_NAME_CHECK=ON
       )
      

      Attachments

        Activity

          speaking if TCP keepalive, we have an upcoming patch for it as well
          https://github.com/MariaDB/server/commit/44cfcce67f0c4a63989db406623bff2fe6faf40d

          wlad Vladislav Vaintroub added a comment - speaking if TCP keepalive, we have an upcoming patch for it as well https://github.com/MariaDB/server/commit/44cfcce67f0c4a63989db406623bff2fe6faf40d

          As for the userland send timeout. It's kinda different, an example:
          1) The sending was not blocked in the last send(2).
          2) The server close(2)'ed the socket after the last send(2) call. The data in the last send(2) was still in the TCP send buffer.
          3) If the client's ACK for the last piece of data was excessively dropped, or the extreme case that the client itself was crashed.
          4) Then the socket's (and inpcb in the BSD world) destruction would solely rely on the TCP retransmission timeout; as I said, it could be quite lengthy.

          Thanks,
          sephe

          Sepherosa Ziehau Sepherosa Ziehau added a comment - As for the userland send timeout. It's kinda different, an example: 1) The sending was not blocked in the last send(2). 2) The server close(2)'ed the socket after the last send(2) call. The data in the last send(2) was still in the TCP send buffer. 3) If the client's ACK for the last piece of data was excessively dropped, or the extreme case that the client itself was crashed. 4) Then the socket's (and inpcb in the BSD world) destruction would solely rely on the TCP retransmission timeout; as I said, it could be quite lengthy. Thanks, sephe

          Sepherosa Ziehau
          can you look at this patch to see if it will solve problems in most cases ?https://github.com/MariaDB/server/commit/8b09de139bf52777ba5e78821f3b64734bd22195

          It would use abortive close, (no linger), if connection ran into timeout. I think this is going to the most frequent cases, where server close connection first (usually client does it). In this case, server does not care about TCP retransmission (as discussed, this would happen when writing large result sets and client application reads at slower pace than server writes)

          There are some other cases where server would close the socket before client does it, e.g during connection on failed authentication. But then, server sends an error message, and it does somewhat care that the client receives it.

          The patch also cleans up socket timeout handling, so that Windows is consistent with another platforms (using nonblocking socket IO + waiting in poll/select on single socket, rather than setsockopt). This makes identifying timeouts easier.

          I also removed the superficial shutdown() before closesocket() in places where it was used, because it reportedly breaks SO_LINGER on Windows.

          wlad Vladislav Vaintroub added a comment - Sepherosa Ziehau can you look at this patch to see if it will solve problems in most cases ? https://github.com/MariaDB/server/commit/8b09de139bf52777ba5e78821f3b64734bd22195 It would use abortive close, (no linger), if connection ran into timeout. I think this is going to the most frequent cases, where server close connection first (usually client does it). In this case, server does not care about TCP retransmission (as discussed, this would happen when writing large result sets and client application reads at slower pace than server writes) There are some other cases where server would close the socket before client does it, e.g during connection on failed authentication. But then, server sends an error message, and it does somewhat care that the client receives it. The patch also cleans up socket timeout handling, so that Windows is consistent with another platforms (using nonblocking socket IO + waiting in poll/select on single socket, rather than setsockopt). This makes identifying timeouts easier. I also removed the superficial shutdown() before closesocket() in places where it was used, because it reportedly breaks SO_LINGER on Windows.

          This is definitely a step forward to the right direction! The diff looks good. Thank you very much!

          Sepherosa Ziehau Sepherosa Ziehau added a comment - This is definitely a step forward to the right direction! The diff looks good. Thank you very much!

          I pushed the mentioned patch into 10.3. Please let us know if there is anything else we should do WRT lingering.

          wlad Vladislav Vaintroub added a comment - I pushed the mentioned patch into 10.3. Please let us know if there is anything else we should do WRT lingering.

          People

            wlad Vladislav Vaintroub
            svoj Sergey Vojtovich
            Votes:
            0 Vote for this issue
            Watchers:
            4 Start watching this issue

            Dates

              Created:
              Updated:
              Resolved:

              Git Integration

                Error rendering 'com.xiplink.jira.git.jira_git_plugin:git-issue-webpanel'. Please contact your Jira administrators.