Skip to content

Commit af58188

Browse files
committed
Merge remote-tracking branch 'origin/develop'
2 parents c994d08 + e3f0a80 commit af58188

17 files changed

+598
-106
lines changed

python_vect_bindings/nmslib_vector.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,15 +260,18 @@ class IndexWrapper {
260260
}
261261

262262
PyObject* KnnQuery(int k, const Object* query) {
263+
IntVector ids;
264+
Py_BEGIN_ALLOW_THREADS
265+
KNNQueue<T>* res;
263266
KNNQuery<T> knn(*space_, query, k);
264267
index_->Search(&knn, -1);
265-
KNNQueue<T>* res = knn.Result()->Clone();
266-
IntVector ids;
268+
res = knn.Result()->Clone();
267269
while (!res->Empty()) {
268270
ids.insert(ids.begin(), res->TopObject()->id());
269271
res->Pop();
270272
}
271273
delete res;
274+
Py_END_ALLOW_THREADS
272275
PyObject* z = PyList_New(ids.size());
273276
if (!z) {
274277
return NULL;
@@ -386,7 +389,9 @@ template <typename T>
386389
void _createIndex(PyObject* ptr, const AnyParams& index_params) {
387390
IndexWrapper<T>* index = reinterpret_cast<IndexWrapper<T>*>(
388391
PyLong_AsVoidPtr(ptr));
392+
Py_BEGIN_ALLOW_THREADS
389393
index->CreateIndex(index_params);
394+
Py_END_ALLOW_THREADS
390395
}
391396

392397
PyObject* createIndex(PyObject* self, PyObject* args) {

similarity_search/include/distcomp.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,30 @@ template <class T> T AngularDistance(const T *p1, const T *p2, size_t qty);
6464
template <class T> T CosineSimilarity(const T *p1, const T *p2, size_t qty);
6565
// Scalar product divided by vector Euclidean norms
6666
template <class T> T NormScalarProduct(const T *p1, const T *p2, size_t qty);
67+
// Sclar product divided by query vector Euclidean norm
68+
// Query is the second argument (by convention we use only left queries, where a data point is the left argument)
69+
// We don't have a SIMD version for QueryNormScalarProduct function yet.
70+
template <class T> T QueryNormScalarProduct(const T *p1, const T *p2, size_t qty);
6771
template <class T> T NormScalarProductSIMD(const T *p1, const T *p2, size_t qty);
6872

6973
// Scalar product that is not normalized
7074
template <class T> T ScalarProduct(const T *p1, const T *p2, size_t qty);
7175
template <class T> T ScalarProductSIMD(const T *p1, const T *p2, size_t qty);
7276

73-
// Fast scalar product between sparse vectors (using SIMD)
74-
float ScalarProductFast(const char* pData1, size_t len1, const char* pData2, size_t len2);
77+
// Fast normalized-scalar product between sparse vectors (using SIMD)
78+
float NormSparseScalarProductFast(const char* pData1, size_t len1, const char* pData2, size_t len2);
79+
/*
80+
* Fast query-side normalized-scalar product between sparse vectors (using SIMD).
81+
* By our standard convention of using left queries, the query is the right argument.
82+
*/
83+
float QueryNormSparseScalarProductFast(const char* pData, size_t lenData, const char* pQuery, size_t lenQuery);
84+
// Fast scalar product between sparse vectors without normalization (using SIMD)
85+
float SparseScalarProductFast(const char* pData1, size_t len1, const char* pData2, size_t len2);
86+
87+
/*
88+
* Sometimes due to rounding errors, we get values > 1 or < -1.
89+
* This throws off other functions that use scalar product, e.g., acos
90+
*/
7591

7692
/*
7793
* Itakura-Saito distance

similarity_search/include/factory/init_spaces.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,13 @@ inline void initSpaces() {
113113
REGISTER_SPACE_CREATOR(double, SPACE_SPARSE_COSINE_SIMILARITY, CreateSparseCosineSimilarity)
114114
REGISTER_SPACE_CREATOR(float, SPACE_SPARSE_ANGULAR_DISTANCE, CreateSparseAngularDistance)
115115
REGISTER_SPACE_CREATOR(double, SPACE_SPARSE_ANGULAR_DISTANCE, CreateSparseAngularDistance)
116+
REGISTER_SPACE_CREATOR(float, SPACE_SPARSE_NEGATIVE_SCALAR, CreateSparseNegativeScalarProduct)
117+
REGISTER_SPACE_CREATOR(double, SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR, CreateSparseQueryNormNegativeScalarProduct)
116118

117119
REGISTER_SPACE_CREATOR(float, SPACE_SPARSE_COSINE_SIMILARITY_FAST, CreateSparseCosineSimilarityFast)
118120
REGISTER_SPACE_CREATOR(float, SPACE_SPARSE_ANGULAR_DISTANCE_FAST, CreateSparseAngularDistanceFast)
119-
121+
REGISTER_SPACE_CREATOR(float, SPACE_SPARSE_NEGATIVE_SCALAR_FAST, CreateSparseNegativeScalarProductFast)
122+
REGISTER_SPACE_CREATOR(float, SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR_FAST, CreateSparseQueryNormNegativeScalarProductFast)
120123

121124
REGISTER_SPACE_CREATOR(float, "savch", CreateSavch)
122125

similarity_search/include/factory/space/space_sparse_scalar.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ namespace similarity {
2626
* Creating functions.
2727
*/
2828

29+
/* Regular sparse spaces */
30+
2931
template <typename dist_t>
3032
Space<dist_t>* CreateSparseCosineSimilarity(const AnyParams& /* ignoring params */) {
3133
// Cosine Similarity
@@ -39,12 +41,36 @@ Space<dist_t>* CreateSparseAngularDistance(const AnyParams& /* ignoring params *
3941
return new SpaceSparseAngularDistance<dist_t>();
4042
}
4143

44+
template <typename dist_t>
45+
Space<dist_t>* CreateSparseNegativeScalarProduct(const AnyParams& /* ignoring params */) {
46+
// Cosine Similarity
47+
return new SpaceSparseNegativeScalarProduct<dist_t>();
48+
}
49+
50+
template <typename dist_t>
51+
Space<dist_t>* CreateSparseQueryNormNegativeScalarProduct(const AnyParams& /* ignoring params */) {
52+
// Cosine Similarity
53+
return new SpaceSparseQueryNormNegativeScalarProduct<dist_t>();
54+
}
55+
56+
/* Fast sparse spaces */
57+
4258
Space<float>* CreateSparseCosineSimilarityFast(const AnyParams& /* ignoring params */) {
4359
// Cosine Similarity
4460
return new SpaceSparseCosineSimilarityFast();
4561
}
4662

4763

64+
Space<float>* CreateSparseNegativeScalarProductFast(const AnyParams& /* ignoring params */) {
65+
// Cosine Similarity
66+
return new SpaceSparseNegativeScalarProductFast();
67+
}
68+
69+
Space<float>* CreateSparseQueryNormNegativeScalarProductFast(const AnyParams& /* ignoring params */) {
70+
// Cosine Similarity
71+
return new SpaceSparseQueryNormNegativeScalarProductFast();
72+
}
73+
4874
Space<float>* CreateSparseAngularDistanceFast(const AnyParams& /* ignoring params */) {
4975
// Cosine Similarity
5076
return new SpaceSparseAngularDistanceFast();

similarity_search/include/gold_standard.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ class GoldStandardManager {
234234
// if maxKeepEntryCoeff is non-zero, it defines how many GS entries (relative to the result set size) we memorize
235235
void Compute(size_t threadQty, float maxKeepEntryCoeff) {
236236
threadQty = std::max(size_t(1), threadQty);
237-
LOG(LIB_INFO) << "Computing gold standard data using " << threadQty << " threads, keeping " << maxKeepEntryCoeff<< " times result set size entries";;
237+
LOG(LIB_INFO) << "Computing gold standard data using " << threadQty << " threads, keeping " << maxKeepEntryCoeff<< "x entries compared to the result set size";;
238238
for (size_t i = 0; i < config_.GetRange().size(); ++i) {
239239
vvGoldStandardRange_[i].clear();
240240
const dist_t radius = config_.GetRange()[i];

similarity_search/include/logging.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ class RuntimeErrorWrapper {
6969
#define LOG(severity) \
7070
Logger(severity, __FILE__, __LINE__, __FUNCTION__).stream()
7171

72-
7372
#define CHECK(condition) \
7473
if (!(condition)) {\
7574
LOG(LIB_ERROR) << "Check failed: " << #condition; \

similarity_search/include/simdutils.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@
5050
#define MM_EXTRACT_DOUBLE(v,i) _mm_cvtsd_f64(_mm_shuffle_pd(v, v, _MM_SHUFFLE2(0, i)))
5151
#define MM_EXTRACT_FLOAT(v,i) _mm_cvtss_f32(_mm_shuffle_ps(v, v, _MM_SHUFFLE(0, 0, 0, i)))
5252

53+
/*
54+
* However, if we need to extract many numbers to sum the up, it is more efficient no
55+
* not to use the above MM_EXTRACT_FLOAT (https://github.com/searchivarius/BlogCode/tree/master/2016/bench_sums)
56+
*
57+
*/
58+
inline float mm128_sum(__m128 reg) {
59+
float PORTABLE_ALIGN16 TmpRes[4];
60+
_mm_store_ps(TmpRes, reg);
61+
return TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
62+
};
63+
5364
#endif
5465

5566
#ifdef _MSC_VER

similarity_search/include/space/space_sparse_scalar.h

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030
#include "space_sparse_vector.h"
3131
#include "distcomp.h"
3232

33-
#define SPACE_SPARSE_COSINE_SIMILARITY "cosinesimil_sparse"
34-
#define SPACE_SPARSE_ANGULAR_DISTANCE "angulardist_sparse"
33+
#define SPACE_SPARSE_COSINE_SIMILARITY "cosinesimil_sparse"
34+
#define SPACE_SPARSE_ANGULAR_DISTANCE "angulardist_sparse"
35+
#define SPACE_SPARSE_NEGATIVE_SCALAR "negdotprod_sparse"
36+
#define SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR "querynorm_negdotprod_sparse"
3537

3638
namespace similarity {
3739

@@ -54,7 +56,7 @@ class SpaceSparseAngularDistance : public SpaceSparseVectorSimpleStorage<dist_t>
5456
struct SpaceAngularDist {
5557
dist_t operator()(const dist_t* x, const dist_t* y, size_t length) const {
5658
dist_t val = AngularDistance(x, y, length);
57-
// TODO: @leo shouldn't happen any more, but let's keep this check here for a while
59+
// This shouldn't normally happen, but let's keep this check
5860
if (std::isnan(val)) throw runtime_error("SpaceAngularDist Bug: NAN dist!!!!");
5961
return val;
6062
}
@@ -82,7 +84,7 @@ class SpaceSparseCosineSimilarity : public SpaceSparseVectorSimpleStorage<dist_t
8284
struct SpaceCosineSimilarityDist {
8385
dist_t operator()(const dist_t* x, const dist_t* y, size_t length) const {
8486
dist_t val = CosineSimilarity(x, y, length);
85-
// TODO: @leo shouldn't happen any more, but let's keep this check here for a while
87+
// This shouldn't normally happen, but let's keep this check
8688
if (std::isnan(val)) throw runtime_error("SpaceCosineSimilarityDist Bug: NAN dist!!!!");
8789
return val;
8890
}
@@ -91,6 +93,59 @@ class SpaceSparseCosineSimilarity : public SpaceSparseVectorSimpleStorage<dist_t
9193
DISABLE_COPY_AND_ASSIGN(SpaceSparseCosineSimilarity);
9294
};
9395

96+
template <typename dist_t>
97+
class SpaceSparseNegativeScalarProduct : public SpaceSparseVectorSimpleStorage<dist_t> {
98+
public:
99+
explicit SpaceSparseNegativeScalarProduct() {}
100+
virtual ~SpaceSparseNegativeScalarProduct() {}
101+
102+
virtual std::string StrDesc() const {
103+
return "NegativeScalarProduct";
104+
}
105+
106+
protected:
107+
virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const {
108+
return SpaceSparseVectorSimpleStorage<dist_t>::
109+
ComputeDistanceHelper(obj1, obj2, distObjNegativeScalarProduct_);
110+
}
111+
private:
112+
struct SpaceNegativeScalarDist {
113+
dist_t operator()(const dist_t* x, const dist_t* y, size_t length) const {
114+
return -ScalarProductSIMD(x, y, length);
115+
}
116+
};
117+
SpaceNegativeScalarDist distObjNegativeScalarProduct_;
118+
DISABLE_COPY_AND_ASSIGN(SpaceSparseNegativeScalarProduct);
119+
};
120+
121+
template <typename dist_t>
122+
class SpaceSparseQueryNormNegativeScalarProduct : public SpaceSparseVectorSimpleStorage<dist_t> {
123+
public:
124+
explicit SpaceSparseQueryNormNegativeScalarProduct() {}
125+
virtual ~SpaceSparseQueryNormNegativeScalarProduct() {}
126+
127+
virtual std::string StrDesc() const {
128+
return "QueryNormNegativeScalarProduct";
129+
}
130+
131+
protected:
132+
virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const {
133+
return SpaceSparseVectorSimpleStorage<dist_t>::
134+
ComputeDistanceHelper(obj1, obj2, distObjQueryNormNegativeScalarProduct_);
135+
}
136+
private:
137+
struct SpaceNegativeQueryNormScalarDist {
138+
dist_t operator()(const dist_t* x, const dist_t* y, size_t length) const {
139+
// This shouldn't normally happen, but let's keep this check
140+
dist_t val = QueryNormScalarProduct(x, y, length);
141+
if (std::isnan(val)) throw runtime_error("SpaceNegativeQueryNormScalarDist Bug: NAN dist!!!!");
142+
return -val;
143+
}
144+
};
145+
SpaceNegativeQueryNormScalarDist distObjQueryNormNegativeScalarProduct_;
146+
DISABLE_COPY_AND_ASSIGN(SpaceSparseQueryNormNegativeScalarProduct);
147+
};
148+
94149

95150
} // namespace similarity
96151

similarity_search/include/space/space_sparse_scalar_fast.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@
3434
#define SPACE_SPARSE_COSINE_SIMILARITY_FAST "cosinesimil_sparse_fast"
3535
#define SPACE_SPARSE_ANGULAR_DISTANCE_FAST "angulardist_sparse_fast"
3636

37+
#define SPACE_SPARSE_NEGATIVE_SCALAR_FAST "negdotprod_sparse_fast"
38+
#define SPACE_SPARSE_QUERY_NORM_NEGATIVE_SCALAR_FAST "querynorm_negdotprod_sparse_fast"
39+
40+
3741
namespace similarity {
3842

3943
class SpaceSparseCosineSimilarityFast : public SpaceSparseVectorInter<float> {
@@ -58,6 +62,28 @@ class SpaceSparseAngularDistanceFast : public SpaceSparseVectorInter<float> {
5862
DISABLE_COPY_AND_ASSIGN(SpaceSparseAngularDistanceFast);
5963
};
6064

65+
class SpaceSparseNegativeScalarProductFast : public SpaceSparseVectorInter<float> {
66+
public:
67+
explicit SpaceSparseNegativeScalarProductFast(){}
68+
virtual std::string StrDesc() const {
69+
return "NegativeScalarProduct (fast)";
70+
}
71+
protected:
72+
virtual float HiddenDistance(const Object* obj1, const Object* obj2) const;
73+
DISABLE_COPY_AND_ASSIGN(SpaceSparseNegativeScalarProductFast);
74+
};
75+
76+
class SpaceSparseQueryNormNegativeScalarProductFast : public SpaceSparseVectorInter<float> {
77+
public:
78+
explicit SpaceSparseQueryNormNegativeScalarProductFast(){}
79+
virtual std::string StrDesc() const {
80+
return "QueryNormNegativeScalarProduct (fast)";
81+
}
82+
protected:
83+
virtual float HiddenDistance(const Object* obj1, const Object* obj2) const;
84+
DISABLE_COPY_AND_ASSIGN(SpaceSparseQueryNormNegativeScalarProductFast);
85+
};
86+
6187

6288
} // namespace similarity
6389

similarity_search/include/space/space_sparse_vector.h

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,6 @@ class SpaceSparseVector : public Space<dist_t> {
107107
// Sparse vectors have no fixed dimensionality
108108
virtual size_t GetElemQty(const Object* object) const {return 0;}
109109

110-
virtual dist_t ScalarProduct(const Object* obj1, const Object* obj2) const = 0;
111110
protected:
112111
void ReadSparseVec(std::string line, size_t line_num, IdType& label, vector<ElemType>& v) const;
113112
virtual void CreateVectFromObj(const Object* obj, vector<ElemType>& v) const = 0;
@@ -135,15 +134,10 @@ class SpaceSparseVectorSimpleStorage : public SpaceSparseVector<dist_t> {
135134
}
136135
}
137136

138-
Object* CreateObjFromVect(IdType id, LabelType label, const vector<ElemType>& InpVect) const {
137+
virtual Object* CreateObjFromVect(IdType id, LabelType label, const vector<ElemType>& InpVect) const override {
139138
return new Object(id, label, InpVect.size() * sizeof(ElemType), &InpVect[0]);
140139
};
141140

142-
virtual dist_t ScalarProduct(const Object* obj1, const Object* obj2) const {
143-
SpaceNormScalarProduct distObjNormSP;
144-
return SpaceSparseVectorSimpleStorage<dist_t>::
145-
ComputeDistanceHelper(obj1, obj2, distObjNormSP);
146-
}
147141
protected:
148142
DISABLE_COPY_AND_ASSIGN(SpaceSparseVectorSimpleStorage);
149143

@@ -156,13 +150,6 @@ class SpaceSparseVectorSimpleStorage : public SpaceSparseVector<dist_t> {
156150
for (size_t i = 0; i < qty; ++i) v[i] = beg[i];
157151
}
158152

159-
struct SpaceNormScalarProduct {
160-
dist_t operator()(const dist_t* x, const dist_t* y, size_t length) const {
161-
return NormScalarProduct(x, y, length);
162-
}
163-
};
164-
165-
166153
virtual dist_t HiddenDistance(const Object* obj1, const Object* obj2) const = 0;
167154

168155
/*

0 commit comments

Comments
 (0)