[MDEV-31069] Reuse duplicate char-to-weight conversion code in ctype-utf8.c and ctype-ucs2.c Created: 2023-04-18  Updated: 2023-04-18  Resolved: 2023-04-18

Status: Closed
Project: MariaDB Server
Component/s: Character Sets
Fix Version/s: 11.1.1, 10.11.3, 11.0.2, 10.10.4

Type: Task Priority: Critical
Reporter: Alexander Barkov Assignee: Alexander Barkov
Resolution: Fixed Votes: 0
Labels: None

Issue Links:
Blocks
blocks MDEV-30577 Case folding for uca1400 collations i... Closed
Relates
relates to MDEV-31068 Reuse duplicate case conversion code ... Closed

 Description   

Files ctype-utf8.c and ctype-ucs2.c have very similar pieces of the code:

static inline void
my_tosort_utf16(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  if (*wc <= uni_plane->maxchar)
  {
    MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[*wc >> 8]))
      *wc= page[*wc & 0xFF].sort;
  }
  else
  {
    *wc= MY_CS_REPLACEMENT_CHARACTER;
  }
}

static inline void
my_tosort_utf32(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  if (*wc <= uni_plane->maxchar)
  {
    MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[*wc >> 8]))
      *wc= page[*wc & 0xFF].sort;
  }
  else
  {
    *wc= MY_CS_REPLACEMENT_CHARACTER;
  }
}

static inline void
my_tosort_ucs2(MY_UNICASE_INFO *uni_plane, my_wc_t *wc)
{
  MY_UNICASE_CHARACTER *page;
  if ((page= uni_plane->page[(*wc >> 8) & 0xFF]))
    *wc= page[*wc & 0xFF].sort;
}

static inline void
my_tosort_unicode(MY_UNICASE_INFO *uni_plane, my_wc_t *wc, uint flags)
{
  if (*wc <= uni_plane->maxchar)
  {
    MY_UNICASE_CHARACTER *page;
    if ((page= uni_plane->page[*wc >> 8]))
      *wc= (flags & MY_CS_LOWER_SORT) ?
           page[*wc & 0xFF].tolower :
           page[*wc & 0xFF].sort;
  }
  else
  {
    *wc= MY_CS_REPLACEMENT_CHARACTER;
  }
}

In order to simplify the patch for MDEV-30577 lets move the repeatable code to a shared file ctype-unidata.h:

static inline void my_tosort_unicode_bmp(const MY_UNICASE_INFO *uni_plane,
                                         my_wc_t *wc)
{
  const MY_UNICASE_CHARACTER *page;
  DBUG_ASSERT(*wc <= uni_plane->maxchar);
  if ((page= uni_plane->page[*wc >> 8]))
    *wc= page[*wc & 0xFF].sort;
}

 

static inline void my_tosort_unicode(const MY_UNICASE_INFO *uni_plane,
my_wc_t *wc)
{
if (*wc <= uni_plane->maxchar)

{ const MY_UNICASE_CHARACTER *page; if ((page= uni_plane->page[*wc >> 8])) *wc= page[*wc & 0xFF].sort; }

else

{ *wc= MY_CS_REPLACEMENT_CHARACTER; }

}

 


Generated at Thu Feb 08 10:21:03 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.