From f29c605c6b45a8093f605e3d25a7d08bbb4fea15 Mon Sep 17 00:00:00 2001 From: zichenzhao Date: Wed, 3 Jun 2026 11:18:48 +0800 Subject: [PATCH] mhnsw: invalidate only modified nodes on commit, not every visited one Add a dirty bit to FVectorNode, set it in FVectorNode::save() (which covers the inserted target node and every neighbor rewritten by update_second_degree_neighbors) and in mhnsw_invalidate() alongside the deleted flag. The partial-invalidation loop in do_commit now skips non-dirty entries. The pre-existing TODO ("consider flushing only changed nodes (a flag in the node)") is removed. --- sql/vector_mhnsw.cc | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/vector_mhnsw.cc b/sql/vector_mhnsw.cc index 14942735e88..ab0b213a462 100644 --- a/sql/vector_mhnsw.cc +++ b/sql/vector_mhnsw.cc @@ -373,7 +373,7 @@ class FVectorNode const FVector *vec= nullptr; Neighborhood *neighbors= nullptr; uint8_t max_layer; - bool stored:1, deleted:1; + bool stored:1, deleted:1, dirty:1; FVectorNode(MHNSW_Share *ctx_, const void *gref_); FVectorNode(MHNSW_Share *ctx_, const void *tref_, uint8_t layer, @@ -698,10 +698,13 @@ int MHNSW_Trx::do_commit(THD *thd, bool all) { // consider copying nodes from trx to shared cache when it makes // sense. for ann_benchmarks it does not. - // also, consider flushing only changed nodes (a flag in the node) for (FVectorNode &from : trx->get_cache()) + { + if (!from.dirty) + continue; if (FVectorNode *node= ctx->find_node(from.gref())) node->vec= nullptr; + } ctx->start= nullptr; } ctx->release(true, share); @@ -808,14 +811,14 @@ const FVector *FVectorNode::make_vec(const void *v) } FVectorNode::FVectorNode(MHNSW_Share *ctx_, const void *gref_) - : ctx(ctx_), stored(true), deleted(false) + : ctx(ctx_), stored(true), deleted(false), dirty(false) { memcpy(gref(), gref_, gref_len()); } FVectorNode::FVectorNode(MHNSW_Share *ctx_, const void *tref_, uint8_t layer, const void *vec_) - : ctx(ctx_), stored(false), deleted(false) + : ctx(ctx_), stored(false), deleted(false), dirty(false) { DBUG_ASSERT(tref_); memset(gref(), 0xff, gref_len()); // important: larger than any real gref @@ -1045,6 +1048,14 @@ int FVectorNode::save(TABLE *graph) DBUG_ASSERT(vec); DBUG_ASSERT(neighbors); + // Mark as dirty BEFORE touching disk: any path that calls save() + // is mutating this node. do_commit's partial invalidation + // uses this flag to skip read-only trx-cache entries and only + // invalidate the share-cache copies that are now actually stale. + // Set even on save() error paths so a partially-written node still + // forces share to reload. + dirty= true; + restore_record(graph, s->default_values); graph->field[FIELD_LAYER]->store(max_layer, false); if (deleted) @@ -1533,6 +1544,7 @@ int mhnsw_invalidate(TABLE *table, const uchar *rec, KEY *keyinfo) graph->file->position(graph->record[0]); FVectorNode *node= ctx->get_node(graph->file->ref); node->deleted= true; + node->dirty= true; // forces share-cache invalidation on commit return 0; } -- 2.43.7