[MDEV-31068] Reuse duplicate case conversion code in ctype-utf8.c and ctype-ucs2.c Created: 2023-04-18  Updated: 2023-04-18  Resolved: 2023-04-18

Status: Closed
Project: MariaDB Server
Component/s: Character Sets
Fix Version/s: 11.1.1, 10.11.3, 11.0.2, 10.10.4

Type: Task Priority: Critical
Reporter: Alexander Barkov Assignee: Alexander Barkov
Resolution: Fixed Votes: 0
Labels: None

Issue Links:
Blocks
blocks MDEV-30577 Case folding for uca1400 collations i... Closed
Relates
relates to MDEV-31069 Reuse duplicate char-to-weight conver... Closed

 Description   

Files ctype-utf8.c and ctype-ucs2.c have very similar pieces of the code:

static inline void
my_tolower_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].tolower;
}
 
 
static inline void
my_toupper_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].toupper;
}

 
static inline void
my_tolower_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].tolower;
}
 
 
static inline void
my_toupper_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((*wc <= uni_plane->maxchar) && (page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].toupper;
}

static inline void
my_tolower_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
    *wc= page[*wc & 0xFF].tolower;
}
 
 
static inline void
my_toupper_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
    *wc= page[*wc & 0xFF].toupper;
}

static inline void
my_tolower_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
    *wc= page[*wc & 0xFF].tolower;
}
 
 
static inline void
my_toupper_utf8mb3(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
    *wc= page[*wc & 0xFF].toupper;
}

static inline void
my_tolower_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  if (*wc <= uni_plane->maxchar)
  {
    MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[(*wc >> 8)]))
      *wc= page[*wc & 0xFF].tolower;
  }
}
 
 
static inline void
my_toupper_utf8mb4(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  if (*wc <= uni_plane->maxchar)
  {
    MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[(*wc >> 8)]))
      *wc= page[*wc & 0xFF].toupper;
  }
}

In order to simplify the patch for MDEV-30577 lets move the repeatable code to a shared file ctype-unidata.h:

/* utf8mb3 and ucs2 share this code */
static inline void
my_tolower_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  const MY_UNICASE_CHARACTER *page;
  DBUG_ASSERT(*wc <= uni_plane->maxchar);
  if ((page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].tolower;
}
 
 
static inline void
my_toupper_unicode_bmp(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  const MY_UNICASE_CHARACTER *page;
  DBUG_ASSERT(*wc <= uni_plane->maxchar);
  if ((page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].toupper;
}

/* utf8mb4, utf16, utf32 share this code */
static inline void
my_tolower_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  if (*wc <= uni_plane->maxchar)
  {
    const MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[(*wc >> 8)]))
      *wc= page[*wc & 0xFF].tolower;
  }
}
 
 
static inline void
my_toupper_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  if (*wc <= uni_plane->maxchar)
  {
    const MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[(*wc >> 8)]))
      *wc= page[*wc & 0xFF].toupper;
  }
}


Generated at Thu Feb 08 10:21:02 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.