test: add Alembic migration smoke and /search contract tests

tests/test_alembic.py points Alembic at an in-process SQLite database
in --sql mode so the migration files are validated end to end without
needing the real Postgres compose service. Asserts the documents,
chunks, and processing_events tables plus the unique constraints
appear in the generated DDL, and that the revision graph stays
linear at 0001_initial.

tests/test_routes_search.py monkeypatches
app.indexing.hybrid_search.run_search so the FastAPI route can be
exercised with the real SearchRequest/SearchResponse schemas. Covers
the happy path (rank, citation, reranked flag) and that empty queries
are rejected at schema validation before the backend is called.

pytest tests/test_alembic.py tests/test_routes_search.py -q: 4 passed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:54:15 +03:00
parent d3c96161b0
commit 785d3be970
2 changed files with 205 additions and 0 deletions

130
tests/test_routes_search.py Normal file
View File

@@ -0,0 +1,130 @@
"""Contract test for POST /search.
The hybrid search backend depends on live OpenSearch + Qdrant + embedder; we
patch :func:`app.indexing.hybrid_search.run_search` so the route can be
exercised with the real request/response schemas without bringing infra up.
"""
from __future__ import annotations
import uuid
import pytest
from fastapi.testclient import TestClient
from app.api.schemas import (
Citation,
SearchHit,
SearchResponse,
)
from app.config import settings
from app.main import app
@pytest.fixture
def client() -> TestClient:
return TestClient(app)
def _stub_response(query: str) -> SearchResponse:
doc_id = uuid.uuid4()
chunk_id = uuid.uuid4()
return SearchResponse(
query=query,
mode="hybrid",
total_candidates=42,
reranked=True,
results=[
SearchHit(
rank=1,
score=0.91,
document_id=doc_id,
chunk_id=chunk_id,
original_file_name="GOST_21.501-93.pdf",
source_path="/data/input/standards/GOST_21.501-93.pdf",
page_number=12,
block_type="paragraph",
text=f"Highlighted text for {query}.",
citation=Citation(
pdf="GOST_21.501-93.pdf",
page=12,
block_id="b-12-0",
table_id=None,
figure_id=None,
),
quality_flags={"low_ocr_confidence": False, "needs_manual_review": False},
metadata={"section_heading": "Глава 2"},
)
],
)
def test_search_returns_hit_with_citation(client: TestClient, monkeypatch):
from app.api import routes_search as routes
from app.indexing import hybrid_search
def fake_run(req):
assert req.query
return _stub_response(req.query)
monkeypatch.setattr(hybrid_search, "run_search", fake_run)
# The route imports run_search lazily; patch the module-level binding too.
monkeypatch.setattr(routes, "run_search", fake_run, raising=False)
res = client.post(
f"{settings.app_api_prefix}/search",
json={
"query": "ГОСТ 21.501-93",
"limit": 10,
"filters": {
"document_id": None,
"source_path": None,
"block_type": None,
"min_ocr_confidence": None,
},
"search_mode": "hybrid",
},
)
assert res.status_code == 200, res.text
body = res.json()
assert body["query"] == "ГОСТ 21.501-93"
assert body["mode"] == "hybrid"
assert body["reranked"] is True
assert body["total_candidates"] == 42
assert len(body["results"]) == 1
hit = body["results"][0]
assert hit["rank"] == 1
assert hit["page_number"] == 12
assert hit["block_type"] == "paragraph"
assert hit["citation"]["pdf"] == "GOST_21.501-93.pdf"
assert hit["citation"]["page"] == 12
assert hit["citation"]["block_id"] == "b-12-0"
def test_search_rejects_empty_query(client: TestClient, monkeypatch):
"""Schema validation should reject empty query without hitting the backend."""
from app.indexing import hybrid_search
def must_not_run(_req): # noqa: ARG001
raise AssertionError("backend should not be called for invalid input")
monkeypatch.setattr(hybrid_search, "run_search", must_not_run)
res = client.post(
f"{settings.app_api_prefix}/search",
json={
"query": "",
"limit": 10,
"filters": {
"document_id": None,
"source_path": None,
"block_type": None,
"min_ocr_confidence": None,
},
"search_mode": "hybrid",
},
)
assert res.status_code == 422