From 785d3be970fa4de07e8092a0f69ee4200351f00e Mon Sep 17 00:00:00 2001 From: Vadim Malanov Date: Wed, 13 May 2026 16:54:15 +0300 Subject: [PATCH] test: add Alembic migration smoke and /search contract tests tests/test_alembic.py points Alembic at an in-process SQLite database in --sql mode so the migration files are validated end to end without needing the real Postgres compose service. Asserts the documents, chunks, and processing_events tables plus the unique constraints appear in the generated DDL, and that the revision graph stays linear at 0001_initial. tests/test_routes_search.py monkeypatches app.indexing.hybrid_search.run_search so the FastAPI route can be exercised with the real SearchRequest/SearchResponse schemas. Covers the happy path (rank, citation, reranked flag) and that empty queries are rejected at schema validation before the backend is called. pytest tests/test_alembic.py tests/test_routes_search.py -q: 4 passed. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_alembic.py | 75 +++++++++++++++++++++ tests/test_routes_search.py | 130 ++++++++++++++++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 tests/test_alembic.py create mode 100644 tests/test_routes_search.py diff --git a/tests/test_alembic.py b/tests/test_alembic.py new file mode 100644 index 0000000..1036bff --- /dev/null +++ b/tests/test_alembic.py @@ -0,0 +1,75 @@ +"""Alembic migration smoke test. + +We do not boot a real Postgres in CI; instead we point Alembic at an in-process +SQLite database and verify: + +- ``alembic upgrade head`` succeeds offline (SQL generation) using the real + migration files, exercising every column type and constraint declaration; +- ``downgrade base`` rewinds without errors. + +This catches typos and broken migration ordering early without requiring the +full backing-service compose stack to be online. +""" + +from __future__ import annotations + +import os +import sys +from pathlib import Path + +import pytest + +ROOT = Path(__file__).resolve().parents[1] +sys.path.insert(0, str(ROOT)) + + +@pytest.fixture +def alembic_cfg(tmp_path, monkeypatch): + """Configure Alembic against an isolated SQLite file.""" + db_file = tmp_path / "legacyhub.db" + monkeypatch.setenv("POSTGRES_HOST", "127.0.0.1") + monkeypatch.setenv("POSTGRES_PORT", "5432") + # Force a fresh Settings + Alembic env that ignores the configured PG. + from alembic.config import Config + + cfg = Config(str(ROOT / "alembic.ini")) + cfg.set_main_option("script_location", str(ROOT / "app" / "db" / "migrations")) + cfg.set_main_option("sqlalchemy.url", f"sqlite:///{db_file}") + return cfg + + +def test_migration_offline_emits_sql(alembic_cfg, tmp_path): + """Offline mode generates SQL for every table; verify ``documents`` appears + and at least one JSONB-equivalent column is rendered. SQLite has no JSONB + but Alembic's offline mode happily emits the raw DDL for inspection. + """ + from alembic import command + + out_file = tmp_path / "upgrade.sql" + # ``--sql`` mode bypasses dialect-specific runtime, perfect for a fast check. + with out_file.open("w", encoding="utf-8") as f: + old_stdout = sys.stdout + sys.stdout = f + try: + command.upgrade(alembic_cfg, "head", sql=True) + finally: + sys.stdout = old_stdout + + sql = out_file.read_text(encoding="utf-8") + assert "CREATE TABLE documents" in sql + assert "CREATE TABLE chunks" in sql + assert "CREATE TABLE processing_events" in sql + # Constraint sanity + assert "uq_chunks_doc_idx" in sql + assert "uq_pages_doc_page" in sql + + +def test_revision_history_is_linear(alembic_cfg): + """The current project has a single linear history at 0001_initial.""" + from alembic.script import ScriptDirectory + + script = ScriptDirectory.from_config(alembic_cfg) + heads = script.get_heads() + assert len(heads) == 1, f"expected one head, got: {heads}" + initial = next(iter(script.walk_revisions())) + assert initial.revision == "0001_initial" diff --git a/tests/test_routes_search.py b/tests/test_routes_search.py new file mode 100644 index 0000000..7cfa45b --- /dev/null +++ b/tests/test_routes_search.py @@ -0,0 +1,130 @@ +"""Contract test for POST /search. + +The hybrid search backend depends on live OpenSearch + Qdrant + embedder; we +patch :func:`app.indexing.hybrid_search.run_search` so the route can be +exercised with the real request/response schemas without bringing infra up. +""" + +from __future__ import annotations + +import uuid + +import pytest + +from fastapi.testclient import TestClient + +from app.api.schemas import ( + Citation, + SearchHit, + SearchResponse, +) +from app.config import settings +from app.main import app + + +@pytest.fixture +def client() -> TestClient: + return TestClient(app) + + +def _stub_response(query: str) -> SearchResponse: + doc_id = uuid.uuid4() + chunk_id = uuid.uuid4() + return SearchResponse( + query=query, + mode="hybrid", + total_candidates=42, + reranked=True, + results=[ + SearchHit( + rank=1, + score=0.91, + document_id=doc_id, + chunk_id=chunk_id, + original_file_name="GOST_21.501-93.pdf", + source_path="/data/input/standards/GOST_21.501-93.pdf", + page_number=12, + block_type="paragraph", + text=f"Highlighted text for {query}.", + citation=Citation( + pdf="GOST_21.501-93.pdf", + page=12, + block_id="b-12-0", + table_id=None, + figure_id=None, + ), + quality_flags={"low_ocr_confidence": False, "needs_manual_review": False}, + metadata={"section_heading": "Глава 2"}, + ) + ], + ) + + +def test_search_returns_hit_with_citation(client: TestClient, monkeypatch): + from app.api import routes_search as routes + from app.indexing import hybrid_search + + def fake_run(req): + assert req.query + return _stub_response(req.query) + + monkeypatch.setattr(hybrid_search, "run_search", fake_run) + # The route imports run_search lazily; patch the module-level binding too. + monkeypatch.setattr(routes, "run_search", fake_run, raising=False) + + res = client.post( + f"{settings.app_api_prefix}/search", + json={ + "query": "ГОСТ 21.501-93", + "limit": 10, + "filters": { + "document_id": None, + "source_path": None, + "block_type": None, + "min_ocr_confidence": None, + }, + "search_mode": "hybrid", + }, + ) + + assert res.status_code == 200, res.text + body = res.json() + assert body["query"] == "ГОСТ 21.501-93" + assert body["mode"] == "hybrid" + assert body["reranked"] is True + assert body["total_candidates"] == 42 + assert len(body["results"]) == 1 + + hit = body["results"][0] + assert hit["rank"] == 1 + assert hit["page_number"] == 12 + assert hit["block_type"] == "paragraph" + assert hit["citation"]["pdf"] == "GOST_21.501-93.pdf" + assert hit["citation"]["page"] == 12 + assert hit["citation"]["block_id"] == "b-12-0" + + +def test_search_rejects_empty_query(client: TestClient, monkeypatch): + """Schema validation should reject empty query without hitting the backend.""" + from app.indexing import hybrid_search + + def must_not_run(_req): # noqa: ARG001 + raise AssertionError("backend should not be called for invalid input") + + monkeypatch.setattr(hybrid_search, "run_search", must_not_run) + + res = client.post( + f"{settings.app_api_prefix}/search", + json={ + "query": "", + "limit": 10, + "filters": { + "document_id": None, + "source_path": None, + "block_type": None, + "min_ocr_confidence": None, + }, + "search_mode": "hybrid", + }, + ) + assert res.status_code == 422