Details
-
Bug
-
Status: Closed (View Workflow)
-
Critical
-
Resolution: Won't Fix
-
N/A
-
None
Description
The attached is a MyISAM table of 10,000 rows. The blobs contain random valid vectors of 300 dimensions, with decimal values in the range [-1,1], rounded to 3 digits.
The table does NOT contain a vector key yet, to keep the attachment within allowed limits.
The key is added in the test case.
When the results were originally observed, a table was created with the key right away, and the data was inserted later, so the sequence of these events is not important.
The test case assumes that the attached .tar.gz has been downloaded into /tmp/t1.tar.gz.
Non-debug is recommended for the speed, although the results were observed on both debug and non-debug (optimized) builds.
--let $datadir= `select @@datadir`
|
set @mhnsw_cache_size.save= @@mhnsw_cache_size; |
set global mhnsw_cache_size = 1024*1024*1024*16; |
|
show variables like 'mhnsw%'; |
|
--exec cd $datadir/test && tar zxf /tmp/t1.tar.gz
|
|
flush tables;
|
show create table t1; |
select v into @x from t1 where a = 3315; |
|
alter table t1 add vector(v) distance_function=euclidean; |
select a, vec_distance_euclidean(@x,v) d from t1 order by d limit 5; |
select a, vec_distance_euclidean(@x,v) d from t1 ignore index(v) order by d limit 5; |
|
alter table t1 drop index v; |
|
alter table t1 add vector(v) distance_function=cosine; |
select a, vec_distance_cosine(@x,v) d from t1 order by d limit 5; |
select a, vec_distance_cosine(@x,v) d from t1 ignore index(v) order by d limit 5; |
|
# Cleanup
|
drop table t1; |
set global mhnsw_cache_size = @mhnsw_cache_size.save; |
Variables and results with ignore index are expectedly always the same, so I will only put them once:
bb-11.6-MDEV-32887-vector 77be73c489fb7c21ca58e78cef10e0c166f293d8 |
show variables like 'mhnsw%'; |
Variable_name Value
|
mhnsw_cache_size 17179869184
|
mhnsw_distance_function euclidean
|
mhnsw_max_edges_per_node 6
|
mhnsw_min_limit 20
|
select a, vec_distance_euclidean(@x,v) d from t1 ignore index(v) order by d limit 5; |
a d
|
3315 0
|
7635 12.50324915924236
|
9334 12.563088310469816
|
6648 12.608954182338042
|
9172 12.707197837452801
|
...
|
select a, vec_distance_cosine(@x,v) d from t1 ignore index(v) order by d limit 5; |
a d
|
3315 0
|
7635 0.7771883486270881
|
5520 0.8046445031934746
|
9334 0.8086114040573656
|
9172 0.8102427237123788
|
Results with the key:
Run 1 (not even close) |
select a, vec_distance_euclidean(@x,v) d from t1 order by d limit 5; |
a d
|
6648 12.608954182338042
|
3111 12.786672011857574
|
6820 12.823950330913943
|
7551 12.928194851581154
|
5809 12.98145983473916
|
...
|
select a, vec_distance_cosine(@x,v) d from t1 order by d limit 5; |
a d
|
8409 0.8386867487818171
|
3306 0.8412524384535305
|
1828 0.8414313336328335
|
4703 0.8435710622067698
|
7909 0.8519108809341281
|
Run 2 (cosine is a bit better, euclidean is not) |
select a, vec_distance_euclidean(@x,v) d from t1 order by d limit 5; |
a d
|
1177 12.93642435869211
|
6235 13.026700988526791
|
1324 13.079036454820425
|
2448 13.095841745779792
|
656 13.122851969401845
|
...
|
select a, vec_distance_cosine(@x,v) d from t1 order by d limit 5; |
a d
|
3315 0
|
5520 0.8046445031934746
|
9172 0.8102427237123788
|
7394 0.8127154394866568
|
5485 0.8231259039959056
|
Run 3 |
select a, vec_distance_euclidean(@x,v) d from t1 order by d limit 5; |
a d
|
7635 12.50324915924236
|
6648 12.608954182338042
|
1404 12.723599357424018
|
3111 12.786672011857574
|
1391 12.93990258318726
|
...
|
select a, vec_distance_cosine(@x,v) d from t1 order by d limit 5; |
a d
|
3315 0
|
9172 0.8102427237123788
|
7866 0.837638193766091
|
1389 0.8410038139530068
|
4987 0.8442649503706942
|
Run 4 |
select a, vec_distance_euclidean(@x,v) d from t1 order by d limit 5; |
a d
|
4254 12.767494074180155
|
1177 12.93642435869211
|
1391 12.93990258318726
|
3507 12.966269694714853
|
255 12.970943053477262
|
...
|
select a, vec_distance_cosine(@x,v) d from t1 order by d limit 5; |
a d
|
5520 0.8046445031934746
|
4350 0.8190976187144863
|
391 0.8232104976427741
|
7551 0.8287919274151629
|
8468 0.8384453379517783
|
Same exact test, but with limit 20 (to check that it's not caused by the limit being less than mhnsw_min_limit):
select a, vec_distance_euclidean(@x,v) d from t1 order by d limit 20; |
a d
|
7635 12.50324915924236
|
6648 12.608954182338042
|
1404 12.723599357424018
|
3111 12.786672011857574
|
3306 12.823136865380373
|
6820 12.823950330913943
|
295 13.007126973370685
|
306 13.057057241979072
|
1324 13.079036454820425
|
2870 13.13617213513499
|
2897 13.14262895892705
|
3937 13.148377252067812
|
3448 13.160321764636343
|
3035 13.160573340948721
|
1360 13.186843854252368
|
5015 13.194576904696737
|
284 13.218408697029098
|
8390 13.221548034320039
|
2866 13.243591886142184
|
221 13.26426526394774
|
select a, vec_distance_euclidean(@x,v) d from t1 ignore index(v) order by d limit 20; |
a d
|
3315 0
|
7635 12.50324915924236
|
9334 12.563088310469816
|
6648 12.608954182338042
|
9172 12.707197837452801
|
1404 12.723599357424018
|
4254 12.767494074180155
|
3111 12.786672011857574
|
3306 12.823136865380373
|
6820 12.823950330913943
|
1672 12.832487804471619
|
7756 12.849441666575796
|
4350 12.849856891070015
|
6910 12.884293858360516
|
7394 12.89493025099165
|
2027 12.910620369385416
|
9266 12.921002581438557
|
3437 12.92536904913946
|
7551 12.928194851581154
|
1177 12.93642435869211
|
select a, vec_distance_cosine(@x,v) d from t1 order by d limit 20; |
a d
|
1404 0.8244472141765844
|
7551 0.8287919274151629
|
3418 0.8414019840534113
|
3013 0.8420972403078036
|
6862 0.8611517082215528
|
6548 0.8619836870922029
|
9666 0.8654217200832848
|
2032 0.8704524872350232
|
656 0.8797460565666559
|
6235 0.8799792113360422
|
841 0.8853988672739864
|
3597 0.8861685294170588
|
2729 0.8898933361683874
|
5541 0.8940726801394091
|
1406 0.9004069304945477
|
5550 0.9020265731424626
|
8672 0.9088167751619433
|
6644 0.9101736435569907
|
8487 0.9129824876523147
|
3959 0.9160429870969378
|
select a, vec_distance_cosine(@x,v) d from t1 ignore index(v) order by d limit 20; |
a d
|
3315 0
|
7635 0.7771883486270881
|
5520 0.8046445031934746
|
9334 0.8086114040573656
|
9172 0.8102427237123788
|
7756 0.8125144540813232
|
7394 0.8127154394866568
|
4350 0.8190976187144863
|
3437 0.8203684064084903
|
5485 0.8231259039959056
|
391 0.8232104976427741
|
1404 0.8244472141765844
|
9266 0.8249084943882273
|
4254 0.8275833756230361
|
1672 0.8279814979458477
|
2870 0.8287906907312421
|
7551 0.8287919274151629
|
1208 0.8300718978436028
|
953 0.83157330363886
|
8211 0.8322335194569056
|
Increasing the number of edges also doesn't seem to help much (tried 12 and 50 with limit 5).
Attachments
Issue Links
- is caused by
-
MDEV-34939 vector search in 11.7
- Closed