Files
LegacyHUB/tests/test_hybrid_search.py
Vadim Malanov 7f72171572 chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 16:41:50 +03:00

53 lines
1.8 KiB
Python

from __future__ import annotations
from app.api.schemas import SearchMode
from app.indexing.hybrid_search import _Candidate, _merge
def _make(chunk_id: str, *, bm25_rank: int | None = None, dense_rank: int | None = None) -> _Candidate:
return _Candidate(
chunk_id=chunk_id,
document_id="00000000-0000-0000-0000-000000000000",
page_number=1,
block_type="paragraph",
block_id=None,
text=f"text-{chunk_id}",
source_path="/tmp/doc.pdf",
original_file_name="doc.pdf",
quality_flags={},
metadata={},
bm25_score=None if bm25_rank is None else 1.0 / bm25_rank,
bm25_rank=bm25_rank,
dense_score=None if dense_rank is None else 1.0 - 0.1 * dense_rank,
dense_rank=dense_rank,
)
def test_merge_lexical_passes_through():
lex = [_make("a", bm25_rank=1), _make("b", bm25_rank=2)]
out = _merge(lex, [], "lexical")
assert [c.chunk_id for c in out] == ["a", "b"]
def test_merge_hybrid_uses_rrf_to_rank_intersected_results_higher():
lex = [_make("a", bm25_rank=2), _make("b", bm25_rank=1)]
sem = [_make("a", dense_rank=1), _make("c", dense_rank=2)]
merged = _merge(lex, sem, "hybrid")
ids = [c.chunk_id for c in merged]
# ``a`` appears in both, so it should beat ``b`` and ``c`` after RRF.
assert ids[0] == "a"
assert set(ids) == {"a", "b", "c"}
def test_merge_hybrid_handles_disjoint_sets():
lex = [_make("x", bm25_rank=1)]
sem = [_make("y", dense_rank=1)]
merged = _merge(lex, sem, "hybrid")
assert {c.chunk_id for c in merged} == {"x", "y"}
def test_search_mode_typed():
# Smoke - the literal type accepts the three valid values.
valid: list[SearchMode] = ["lexical", "semantic", "hybrid"]
assert valid == ["lexical", "semantic", "hybrid"]