datastax
diff --git a/‎.github/workflows/ci-unit-tests.yml‎
Lines changed: 5 additions & 4 deletions b/‎.github/workflows/ci-unit-tests.yml‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 10 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎ragstack/colbert/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎ragstack/colbert/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎ragstack/colbert/cassandra_db.py‎
Lines changed: 0 additions & 179 deletions b/‎ragstack/colbert/cassandra_db.py‎
Lines changed: 0 additions & 179 deletions
diff --git a/‎ragstack/colbert/cassandra_retriever.py‎
Lines changed: 29 additions & 27 deletions b/‎ragstack/colbert/cassandra_retriever.py‎
Lines changed: 29 additions & 27 deletions
@@ -46,9 +46,10 @@ jobs:
  poetry install --no-root -E colbert
  poetry build
 
+ - name: "Lint"
+ run: |
+ tox -e lint
+
  - name: Run ragstack-ai unit and integration tests
- env:
- COLBERT_ASTRA_TOKEN: ${{ secrets.COLBERT_ASTRA_TOKEN }}
- COLBERT_ASTRA_SCB: ${{ secrets.COLBERT_ASTRA_SCB }}
  run: |
- tox
+ tox -e tests
@@ -31,14 +31,23 @@ torch = { version = "2.2.1", optional = true }
 [tool.poetry.extras]
 langchain-google = ["langchain-google-genai", "langchain-google-vertexai"]
 langchain-nvidia = ["langchain-nvidia-ai-endpoints"]
-colbert = ["colbert-ai", "pyarrow", "torch", "cassio"]
+colbert = ["colbert-ai", "pyarrow", "torch"]
 
 [tool.poetry.group.test.dependencies]
 pytest = "*"
+black = "*"
+ruff = "*"
 nbmake = "*"
 testcontainers = "^3.7.1"
 tox = "^4"
 
+[tool.pytest.ini_options]
+log_cli = true
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
@@ -1,13 +1,13 @@
-from .colbert_embedding import ColbertTokenEmbeddings, calculate_query_maxlen
-from .cassandra_db import CassandraDB
+from .colbert_embedding import ColbertTokenEmbeddings
+from .cassandra_store import CassandraColBERTVectorStore
 from .cassandra_retriever import ColbertCassandraRetriever, max_similarity_torch
 from .token_embedding import PerTokenEmbeddings, PassageEmbeddings, TokenEmbeddings
 from .vector_store import ColBERTVectorStore
 from .constant import DEFAULT_COLBERT_MODEL, DEFAULT_COLBERT_DIM
 
 __all__ = (
  ColbertTokenEmbeddings,
- CassandraDB,
+ CassandraColBERTVectorStore,
  ColbertCassandraRetriever,
  max_similarity_torch,
  PerTokenEmbeddings,
 
@@ -1,11 +1,15 @@
+from typing import List
+
 from .colbert_embedding import ColbertTokenEmbeddings
 
-from .cassandra_db import CassandraDB
+from .cassandra_store import CassandraColBERTVectorStore
 import logging
 from torch import tensor
 import torch
 import math
 
+from .vector_store import ColBERTVectorStoreRetriever, Document
+
 # max similarity between a query vector and a list of embeddings
 # The function returns the highest similarity score (i.e., the maximum dot product value)
 # between the query vector and any of the embedding vectors in the list.
@@ -69,31 +73,34 @@ def max_similarity_torch(query_vector, embedding_list, is_cuda: bool = False):
  return max_sim
 
 
-class ColbertCassandraRetriever:
- db: CassandraDB
- colbertEmbeddings: ColbertTokenEmbeddings
+class ColbertCassandraRetriever(ColBERTVectorStoreRetriever):
+ vector_store: CassandraColBERTVectorStore
+ colbert_embeddings: ColbertTokenEmbeddings
  is_cuda: bool = False
 
  class Config:
  arbitrary_types_allowed = True
 
  def __init__(
  self,
- db: CassandraDB,
- colbertEmbeddings: ColbertTokenEmbeddings,
- **kwargs,
+ vector_store: CassandraColBERTVectorStore,
+ colbert_embeddings: ColbertTokenEmbeddings,
  ):
- # initialize pydantic base model
- self.db = db
- self.colbertEmbeddings = colbertEmbeddings
+ self.vector_store = vector_store
+ self.colbert_embeddings = colbert_embeddings
  self.is_cuda = torch.cuda.is_available()
 
- def retrieve(self, query: str, k: int = 10, query_maxlen: int = 64, **kwargs):
+ def close(self):
+ pass
+
+ def retrieve(
+ self, query: str, k: int = 10, query_maxlen: int = 64, **kwargs
+ ) -> List[Document]:
  #
- # if the query has fewer than a predefined number of of tokens Nq,
- # colbertEmbeddings will pad it with BERT special [mast] token up to length Nq.
+ # if the query has fewer than a predefined number of tokens Nq,
+ # colbert_embeddings will pad it with BERT special [mast] token up to length Nq.
  #
- query_encodings = self.colbertEmbeddings.encode_query(
+ query_encodings = self.colbert_embeddings.encode_query(
  query, query_maxlen=query_maxlen
  )
 
@@ -106,8 +113,8 @@ def retrieve(self, query: str, k: int = 10, query_maxlen: int = 64, **kwargs):
  doc_futures = []
  for qv in query_encodings:
  # per token based retrieval
- doc_future = self.db.session.execute_async(
- self.db.query_colbert_ann_stmt, [list(qv), top_k]
+ doc_future = self.vector_store.session.execute_async(
+ self.vector_store.query_colbert_ann_stmt, [list(qv), top_k]
  )
  doc_futures.append(doc_future)
 
@@ -119,8 +126,8 @@ def retrieve(self, query: str, k: int = 10, query_maxlen: int = 64, **kwargs):
  scores = {}
  futures = []
  for title, part in docparts:
- future = self.db.session.execute_async(
- self.db.query_colbert_parts_stmt, [title, part]
+ future = self.vector_store.session.execute_async(
+ self.vector_store.query_colbert_parts_stmt, [title, part]
  )
  futures.append((future, title, part))
 
@@ -141,23 +148,18 @@ def retrieve(self, query: str, k: int = 10, query_maxlen: int = 64, **kwargs):
  # query the doc body
  doc_futures = {}
  for title, part in docs_by_score:
- future = self.db.session.execute_async(
- self.db.query_part_by_pk_stmt, [title, part]
+ future = self.vector_store.session.execute_async(
+ self.vector_store.query_part_by_pk_stmt, [title, part]
  )
  doc_futures[(title, part)] = future
 
- answers = []
+ answers: List[Document] = []
  rank = 1
  for title, part in docs_by_score:
  rs = doc_futures[(title, part)].result()
  score = scores[(title, part)]
  answers.append(
- {
- "title": title,
- "score": score.item(),
- "rank": rank,
- "body": rs.one().body,
- }
+ Document(title=title, score=score.item(), rank=rank, body=rs.one().body)
  )
  rank = rank + 1
  # clean up on tensor memory on GPU