Skip to content
Closed
Prev Previous commit
Next Next commit
implement feedback from review comments
  • Loading branch information
Kirbstomper committed Sep 22, 2023
commit 229864a1dc2f8720f32ddb35b55a8687d11cc9ec
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
*/
public class MongoDBVectorStore implements VectorStore {

MongoTemplate mongoTemplate;
private MongoTemplate mongoTemplate;

EmbeddingClient embeddingClient;
private EmbeddingClient embeddingClient;

private final String VECTOR_COLLECTION_NAME = "vector_store";
private static final String VECTOR_COLLECTION_NAME = "vector_store";

public MongoDBVectorStore(MongoTemplate mongoTemplate, EmbeddingClient embeddingClient) {
this.mongoTemplate = mongoTemplate;
Expand All @@ -54,7 +54,7 @@ public MongoDBVectorStore(MongoTemplate mongoTemplate, EmbeddingClient embedding
* @param basicDBObject
* @return
*/
public Document mapBasicDbObject(BasicDBObject basicDBObject) {
private Document mapBasicDbObject(BasicDBObject basicDBObject) {
String id = basicDBObject.getString("_id");
String content = basicDBObject.getString("text");
Map<String, Object> metadata = (Map<String, Object>) basicDBObject.get("metadata");
Expand All @@ -72,15 +72,15 @@ public void add(List<Document> documents) {
List<Double> embedding = this.embeddingClient.embed(document);
document.setEmbedding(embedding);

mongoTemplate.save(document, VECTOR_COLLECTION_NAME);
this.mongoTemplate.save(document, VECTOR_COLLECTION_NAME);
}
}

@Override
public Optional<Boolean> delete(List<String> idList) {
Query query = new Query(where("_id").in(idList));

var deleteRes = mongoTemplate.remove(query, VECTOR_COLLECTION_NAME);
var deleteRes = this.mongoTemplate.remove(query, VECTOR_COLLECTION_NAME);
long deleteCount = deleteRes.getDeletedCount();

return Optional.of(deleteCount == idList.size());
Expand All @@ -103,7 +103,7 @@ public List<Document> similaritySearch(String query, int k) {
InMemoryVectorStore.EmbeddingMath.cosineSimilarity(queryEmbedding, entry.getEmbedding())))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused. I've assumed, wrongly perhaps, that MongoDB provides a build-in Vector Search support?

If it doesn't provide a native vector search capability, then I'm not convinced this Mongo DB sore would perform or scale any better than the in-memory one.
@markpollack what do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for taking the time to review @tzolov!

The managed version of MongoDB (atlas), allows configuration to provide semantic search capability by automatically attaching embeddings to documents using fields configured for each collection.
https://www.mongodb.com/products/platform/atlas-vector-search

Given that, I more of less went for making a PersistentVectorStore backed by MongoDB, partially out of "could I do it" and to provide another persistent (though not optimal) out of the box.

.sorted(Comparator.<InMemoryVectorStore.Similarity>comparingDouble(s -> s.getSimilarity()).reversed())
.limit(k)
.map(s -> mongoTemplate.findById(s.getKey(), BasicDBObject.class, VECTOR_COLLECTION_NAME))
.map(s -> this.mongoTemplate.findById(s.getKey(), BasicDBObject.class, VECTOR_COLLECTION_NAME))
.map(this::mapBasicDbObject)
.toList();
}
Expand All @@ -120,7 +120,7 @@ public List<Document> similaritySearch(String query, int k, double threshold) {
.filter(s -> s.getSimilarity() >= threshold)
.sorted(Comparator.<InMemoryVectorStore.Similarity>comparingDouble(s -> s.getSimilarity()).reversed())
.limit(k)
.map(s -> mongoTemplate.findById(s.getKey(), BasicDBObject.class, VECTOR_COLLECTION_NAME))
.map(s -> this.mongoTemplate.findById(s.getKey(), BasicDBObject.class, VECTOR_COLLECTION_NAME))
.map(this::mapBasicDbObject)
.toList();
}
Expand Down