[MDEV-26751] Deficiencies in MTR coverage for JSON histograms Created: 2021-10-02  Updated: 2021-12-07  Resolved: 2021-12-03

Status: Closed
Project: MariaDB Server
Component/s: Optimizer, Tests
Affects Version/s: N/A
Fix Version/s: 10.8.0

Type: Bug Priority: Critical
Reporter: Elena Stepanova Assignee: Sergei Petrunia
Resolution: Fixed Votes: 0
Labels: None

Issue Links:
Problem/Incident
is caused by MDEV-21130 Histograms: use JSON as on-disk format Closed
is caused by MDEV-26519 JSON Histograms: improve histogram co... Closed

 Description   

Lines from the cumulative patch 0ff8976e..c548019b, not covered by default MTR test suites:

Line numbers of preview-10.7-MDEV-26519-json-histograms c548019b

Lines with zero coverage (70/1477):
 
===File sql/field.h:
   1864 : +  virtual bool pos_through_val_str() { return false;}
 
===File sql/opt_histogram_json.cc:
     32 : +    return true;
     51 : +    if (out->alloc(out->alloced_length()*2))
     52 : +      return true;
     53 : +  }
     65 : +    return true;
     85 : +    if (out->alloc(out->alloced_length()*2))
     86 : +      return true;
     87 : +  }
    168 : +      return true;
    184 : +      return true;
    205 : +      return true;
    263 : +        return 1; // OOM
    268 : +          return 1;
    291 : +          return 1;
    300 : +          return 1;
    309 : +          return 1;
    410 : +      err_pos= hist_array;
    411 : +      err= "JSON parse error";
    451 : +      err_pos= size;
    452 : +      err= ".size member must be a floating-point value";
    453 : +      goto error;
    472 : +      err_pos= ndv;
    473 : +      err= ".ndv member must be an integer value";
    474 : +      goto error;
    481 : +      err_pos= ndv;
    482 : +      err= "Out of memory";
    483 : +      goto error;
    500 : +      err_pos= bucket_info;
    501 : +      err= ".end member must be a scalar";
    502 : +      goto error;
    508 : +        err_pos= bucket_info;
    509 : +        err= "Out of memory";
    510 : +        goto error;
    532 : +    err= ".end member is allowed only in last bucket";
    533 : +    err_pos= hist_data;
    534 : +    goto error;
    580 : +    store_key_image_to_rec_no_null(field, left.data(), field->key_length());
    581 : +    double min_val_real= field->val_real();
    583 : +    store_key_image_to_rec_no_null(field, right.data(), field->key_length());
    584 : +    double max_val_real= field->val_real();
    586 : +    store_key_image_to_rec_no_null(field, (const char*)key, field->key_length());
    587 : +    double midp_val_real= field->val_real();
    589 : +    res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);
    613 : +    sel= avg_sel;
    674 : +        idx < (int)buckets.size()-1)
    681 : +      idx++;
    715 : +      idx--;
    771 : +      *equal= true;
    772 : +      return middle;
    807 : +      *equal= true;
    809 : +      low= high;
 
===File sql/sql_statistics.cc:
   1083 : +            stat_field->set_null();
   1089 : +            stat_field->set_null();
   1234 : +        return NULL;
   1257 : +    return true;
   1657 : +  default:
   1658 : +    DBUG_ASSERT(0);
 
===File sql/sql_statistics.h:
    235 : +    default:
    236 : +      DBUG_ASSERT(0);
    249 : +    default:
    250 : +      DBUG_ASSERT(0);
    263 : +    default:
    264 : +      DBUG_ASSERT(0);
    330 : +    default:
    331 : +      DBUG_ASSERT(0);
    332 : +      return;
    345 : +    default:
    346 : +      DBUG_ASSERT(0);
    347 : +      return;

Some of them are apparently extreme cases which are either impossible or very difficult to trigger via MTR. However, some fragments are worth taking care of, unless they are bogus – aren't really a part of the patch or non-coverage is wrong (which happens).


1

    580 : +    store_key_image_to_rec_no_null(field, left.data(), field->key_length());
    581 : +    double min_val_real= field->val_real();
    583 : +    store_key_image_to_rec_no_null(field, right.data(), field->key_length());
    584 : +    double max_val_real= field->val_real();
    586 : +    store_key_image_to_rec_no_null(field, (const char*)key, field->key_length());
    587 : +    double midp_val_real= field->val_real();
    589 : +    res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);

It is the "else" below. A range query for a numeric column or something of the sort, maybe?

sql/opt_histogram_json.cc

555:double position_in_interval(Field *field, const  uchar *key, uint key_len,
556:                            const std::string& left, const std::string& right)
557:{
558:  double res;
559:  if (field->pos_through_val_str())
...
578:  else
579:  {
580:    store_key_image_to_rec_no_null(field, left.data(), field->key_length());
581:    double min_val_real= field->val_real();
582:
583:    store_key_image_to_rec_no_null(field, right.data(), field->key_length());
584:    double max_val_real= field->val_real();
585:
586:    store_key_image_to_rec_no_null(field, (const char*)key, field->key_length());
587:    double midp_val_real= field->val_real();
588:
589:    res= pos_in_interval_for_double(midp_val_real, min_val_real, max_val_real);
590:  }
591:  return res;
592:}


2

613 : +    sel= avg_sel;

sql/opt_histogram_json.cc

609:  if (buckets[idx].ndv == 1 && !equal)
610:  {
611:    // The bucket has a single value and it doesn't match! Use the global
612:    // average.
613:    sel= avg_sel;
614:  }


3

    674 : +        idx < (int)buckets.size()-1)
    681 : +      idx++;

sql/opt_histogram_json.cc

669:    // Find the leftmost bucket that contains the lookup value.
670:    // (If the lookup value is to the left of all buckets, find bucket #0)
671:    bool equal;
672:    int idx= find_bucket(field, min_key, &equal);
673:    if (equal && exclusive_endp && buckets[idx].ndv==1 &&
674:        idx < (int)buckets.size()-1)
675:    {
676:      /*
677:        The range is "col > $CONST" and we've found a bucket that contains
678:        only the value $CONST. Move to the next bucket.
679:        TODO: what if the last value in the histogram is a popular one?
680:      */
681:      idx++;
682:    }


4

   715 : +      idx--;

sql/opt_histogram_json.cc

708:    if (equal && !inclusive_endp && idx > 0)
709:    {
710:      /*
711:        The range is "col < $CONST" and we've found a bucket starting with
712:        $CONST. Move to the previous bucket.
713:        TODO: what if the first value is the popular one?
714:      */
715:      idx--;
716:    }


5

    771 : +      *equal= true;
    772 : +      return middle;

sql/opt_histogram_json.cc

768:    res= field->key_cmp((uchar*)buckets[middle].start_value.data(), lookup_val);
769:    if (!res)
770:    {
771:      *equal= true;
772:      return middle;
773:    }


6

    807 : +      *equal= true;
    809 : +      low= high;

sql/opt_histogram_json.cc

803:  else if (high == (int)buckets.size() - 1)
804:  {
805:    res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
806:    if (!res)
807:      *equal= true;
808:    if (res >= 0)
809:      low= high;
810:  }


7

   1083 : +            stat_field->set_null();
   1089 : +            stat_field->set_null();

sql/sql_statistics.cc

1079:        case COLUMN_STAT_HIST_TYPE:
1080:          if (stats->histogram)
1081:            stat_field->store(stats->histogram->get_type() + 1);
1082:          else
1083:            stat_field->set_null();
1084:          break;
1085:        case COLUMN_STAT_HISTOGRAM:
1086:          if (stats->histogram)
1087:            stats->histogram->serialize(stat_field);
1088:          else
1089:            stat_field->set_null();
1090:          break;


Also, while it doesn't seem particularly critical, there are tests for some parsing errors, but not all of them, so maybe it makes sense to cover the rest as well, for consistency:

    410 : +      err_pos= hist_array;
    411 : +      err= "JSON parse error";

    451 : +      err_pos= size;
    452 : +      err= ".size member must be a floating-point value";
    453 : +      goto error;

    472 : +      err_pos= ndv;
    473 : +      err= ".ndv member must be an integer value";
    474 : +      goto error;

    500 : +      err_pos= bucket_info;
    501 : +      err= ".end member must be a scalar";
    502 : +      goto error;

    532 : +    err= ".end member is allowed only in last bucket";
    533 : +    err_pos= hist_data;
    534 : +    goto error;



 Comments   
Comment by Sergei Petrunia [ 2021-12-03 ]

Fixed last meaningful coverage gaps in

commit 5fb922bea0c7f70b6c912946464c43b8c6a007d5 (HEAD -> preview-10.8-MDEV-26519-json-histograms, origin/preview-10.8-MDEV-26519-json-histograms)
Author: Sergei Petrunia <psergey@askmonty.org>
Date:   Fri Dec 3 19:03:42 2021 +0300
 
    More test coverage

There are still uncovered lines but these are out-of-memory errors and other very rare conditions.

Generated at Thu Feb 08 09:47:40 UTC 2024 using Jira 8.20.16#820016-sha1:9d11dbea5f4be3d4cc21f03a88dd11d8c8687422.