Skip to content

Commit 9200b58

Browse files
committed
draft: simplify filter impl
1 parent 4f0c16c commit 9200b58

6 files changed

Lines changed: 19 additions & 46 deletions

File tree

faiss_vector_index.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,7 @@ type faissIndex interface {
4646
// performs a search on the index using the provided query vector and parameters, with an optional
4747
// exclude selector to indicate a "blocklist" of indexed vectors to ignore during search.
4848
// It returns the distances and corresponding vector IDs of the top k results.
49-
searchWithoutIDs(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error)
50-
// performs a search on the index using the provided query vector and parameters, with a required
51-
// include selector to indicate an "allowlist" of indexed vectors to consider during search.
52-
// It returns the distances and corresponding vector IDs of the top k results.
53-
searchWithIDs(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error)
49+
searchWithSelector(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error)
5450
// serializes the index into a byte slice,
5551
// which can be stored or transmitted.
5652
serialize() ([]byte, error)

faiss_vector_index_bivf.go

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -74,46 +74,24 @@ func (b *faissBinaryIndex) reconstructBatch(vecIDs []int64, prealloc []float32)
7474
return nil, errNotSupported
7575
}
7676

77-
func (b *faissBinaryIndex) searchWithoutIDs(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error) {
77+
func (b *faissBinaryIndex) searchWithSelector(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error) {
7878
// search the binary index with oversampling and then do a re-ranking on the
7979
// FAISS index to get the top K results
8080
// first binarize the query vector if not already done
8181
qVector.binarize()
82-
_, binIDs, err := b.binary.SearchWithoutIDs(qVector.binaryData, binaryOversampleValue*k,
83-
selector, params)
84-
if err != nil {
85-
return nil, nil, err
86-
}
87-
var scores []float32
88-
var labels []int64
89-
// if we have a backing index for re-ranking, compute the distances/scores for the
90-
// retrieved binary IDs and then get the top K results based on those distances/scores.
91-
if b.backing != nil {
92-
distances, err := b.backing.DistCompute(qVector.floatData, binIDs)
93-
if err != nil {
94-
return nil, nil, err
95-
}
96-
// quick select algorithm for inplace partial sorting to get top K results
97-
// based on distances/scores
98-
scores, labels = topNIDsByDistance(distances, binIDs, int(k))
82+
83+
var binIDs []int64
84+
var err error
85+
if selector == nil {
86+
_, binIDs, err = b.binary.Search(qVector.binaryData, binaryOversampleValue*k)
9987
} else {
100-
// if we don't have a backing index for re-ranking, we return error since we cannot return meaningful
101-
// scores without a backing index to compute distances/scores for the retrieved binary IDs.
102-
return nil, nil, errNotSupported
88+
_, binIDs, err = b.binary.SearchWithSelector(qVector.binaryData, binaryOversampleValue*k,
89+
selector, params)
10390
}
104-
return scores, labels, nil
105-
}
106-
107-
func (b *faissBinaryIndex) searchWithIDs(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error) {
108-
// search the binary index with oversampling and then do a re-ranking on the
109-
// FAISS index to get the top K results
110-
// first binarize the query vector if not already done
111-
qVector.binarize()
112-
_, binIDs, err := b.binary.SearchWithIDs(qVector.binaryData, binaryOversampleValue*k,
113-
selector, params)
11491
if err != nil {
11592
return nil, nil, err
11693
}
94+
11795
var scores []float32
11896
var labels []int64
11997
// if we have a backing index for re-ranking, compute the distances/scores for the

faiss_vector_index_float32.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,11 @@ func (f *faissFloat32Index) reconstructBatch(vecIDs []int64, prealloc []float32)
6363
return f.idx.ReconstructBatch(vecIDs, prealloc)
6464
}
6565

66-
func (f *faissFloat32Index) searchWithoutIDs(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error) {
67-
return f.idx.SearchWithoutIDs(qVector.floatData, k, selector, params)
68-
}
69-
70-
func (f *faissFloat32Index) searchWithIDs(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error) {
71-
return f.idx.SearchWithIDs(qVector.floatData, k, selector, params)
66+
func (f *faissFloat32Index) searchWithSelector(qVector *vectorSet, k int64, selector faiss.Selector, params json.RawMessage) ([]float32, []int64, error) {
67+
if selector == nil {
68+
return f.idx.Search(qVector.floatData, k)
69+
}
70+
return f.idx.SearchWithSelector(qVector.floatData, k, selector, params)
7271
}
7372

7473
func (f *faissFloat32Index) serialize() ([]byte, error) {

faiss_vector_wrapper.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ func (v *vectorIndexWrapper) searchWithoutIDs(qVector *vectorSet, k int64,
406406
if sel != nil {
407407
defer sel.Delete()
408408
}
409-
return v.index.searchWithoutIDs(qVector, k, sel, params)
409+
return v.index.searchWithSelector(qVector, k, sel, params)
410410
},
411411
func(numIter int, labels []int64) bool {
412412
// if this is the first loop iteration and we have < k unique docIDs,
@@ -457,7 +457,7 @@ func (v *vectorIndexWrapper) searchWithIDs(vecSet *vectorSet, k int64, include *
457457
if sel != nil {
458458
defer sel.Delete()
459459
}
460-
return v.index.searchWithIDs(vecSet, k, sel, params)
460+
return v.index.searchWithSelector(vecSet, k, sel, params)
461461
},
462462
func(numIter int, labels []int64) bool {
463463
// if this is the first loop iteration and we have < k unique docIDs,

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@ require (
2020
github.com/spf13/pflag v1.0.9 // indirect
2121
golang.org/x/sys v0.40.0 // indirect
2222
)
23+
24+
replace github.com/blevesearch/go-faiss => /Users/thejas.orkombu/fts/blevesearch/go-faiss

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ github.com/bits-and-blooms/bitset v1.24.2 h1:M7/NzVbsytmtfHbumG+K2bremQPMJuqv1JD
44
github.com/bits-and-blooms/bitset v1.24.2/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
55
github.com/blevesearch/bleve_index_api v1.3.9 h1:TLoiBaqcfWGfI1Il0+zzky452uYCPoSMosDSltkCfKs=
66
github.com/blevesearch/bleve_index_api v1.3.9/go.mod h1:xvd48t5XMeeioWQ5/jZvgLrV98flT2rdvEJ3l/ki4Ko=
7-
github.com/blevesearch/go-faiss v1.0.30 h1:pWX3/Si4Z7GlwsD2eRXoF3SfVaDkg8plBlPdUKuhGts=
8-
github.com/blevesearch/go-faiss v1.0.30/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
97
github.com/blevesearch/mmap-go v1.2.0 h1:l33nNKPFcBjJUMwem6sAYJPUzhUCABoK9FxZDGiFNBI=
108
github.com/blevesearch/mmap-go v1.2.0/go.mod h1:Vd6+20GBhEdwJnU1Xohgt88XCD/CTWcqbCNxkZpyBo0=
119
github.com/blevesearch/scorch_segment_api/v2 v2.4.5 h1:Q7Bzpyk86xS22TgTd4VQfSvzZAybDEJ90hNOGqyNqlI=

0 commit comments

Comments
 (0)