Skip to content

Commit 396282f

Browse files
authored
fix: Fix hybrid search bugs (#257)
This PR fixes the 2nd Major issue in #234, related to Configuration and Querying Issues Issue breakdown: - **Inconsistent k Parameter:** For hybrid searches, the dense search LIMIT was tied to the final k parameter, while the sparse search used its own secondary_top_k. **Late Initialization of HybridSearchConfig:** The HybridSearchConfig was initialized after the k parameter was calculated, which could lead to incorrect behavior if the config was passed dynamically via `kwargs`. Overview of the changes: - 1. The __query_collection method now uses a separate `dense_limit` for the dense search query. If HybridSearchConfig is active, dense_limit is set to primary_top_k. For dense-only searches, dense_limit defaults to the final k value, preserving the existing behavior. 2. The hybrid_search_config is now initialized at the beginning of `__query_collection`, ensuring that any `kwargs` overrides are handled before `k` or `dense_limit` are calculated.
1 parent f8de351 commit 396282f

File tree

1 file changed

+13
-16
lines changed

1 file changed

+13
-16
lines changed

langchain_postgres/v2/async_vectorstore.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -580,16 +580,16 @@ async def __query_collection(
580580
For best hybrid search performance, consider creating a TSV column
581581
and adding GIN index.
582582
"""
583-
if not k:
584-
k = (
585-
max(
586-
self.k,
587-
self.hybrid_search_config.primary_top_k,
588-
self.hybrid_search_config.secondary_top_k,
589-
)
590-
if self.hybrid_search_config
591-
else self.k
592-
)
583+
hybrid_search_config = kwargs.get(
584+
"hybrid_search_config", self.hybrid_search_config
585+
)
586+
587+
final_k = k if k is not None else self.k
588+
589+
dense_limit = final_k
590+
if hybrid_search_config:
591+
dense_limit = hybrid_search_config.primary_top_k
592+
593593
operator = self.distance_strategy.operator
594594
search_function = self.distance_strategy.search_function
595595

@@ -617,9 +617,9 @@ async def __query_collection(
617617
embedding_data_string = ":query_embedding"
618618
where_filters = f"WHERE {safe_filter}" if safe_filter else ""
619619
dense_query_stmt = f"""SELECT {column_names}, {search_function}("{self.embedding_column}", {embedding_data_string}) as distance
620-
FROM "{self.schema_name}"."{self.table_name}" {where_filters} ORDER BY "{self.embedding_column}" {operator} {embedding_data_string} LIMIT :k;
620+
FROM "{self.schema_name}"."{self.table_name}" {where_filters} ORDER BY "{self.embedding_column}" {operator} {embedding_data_string} LIMIT :dense_limit;
621621
"""
622-
param_dict = {"query_embedding": query_embedding, "k": k}
622+
param_dict = {"query_embedding": query_embedding, "dense_limit": dense_limit}
623623
if filter_dict:
624624
param_dict.update(filter_dict)
625625
if self.index_query_options:
@@ -637,16 +637,13 @@ async def __query_collection(
637637
result_map = result.mappings()
638638
dense_results = result_map.fetchall()
639639

640-
hybrid_search_config = kwargs.get(
641-
"hybrid_search_config", self.hybrid_search_config
642-
)
643640
fts_query = (
644641
hybrid_search_config.fts_query
645642
if hybrid_search_config and hybrid_search_config.fts_query
646643
else kwargs.get("fts_query", "")
647644
)
648645
if hybrid_search_config and fts_query:
649-
hybrid_search_config.fusion_function_parameters["fetch_top_k"] = k
646+
hybrid_search_config.fusion_function_parameters["fetch_top_k"] = final_k
650647
# do the sparse query
651648
lang = (
652649
f"'{hybrid_search_config.tsv_lang}',"

0 commit comments

Comments
 (0)