test: add Alembic migration smoke and /search contract tests
tests/test_alembic.py points Alembic at an in-process SQLite database in --sql mode so the migration files are validated end to end without needing the real Postgres compose service. Asserts the documents, chunks, and processing_events tables plus the unique constraints appear in the generated DDL, and that the revision graph stays linear at 0001_initial. tests/test_routes_search.py monkeypatches app.indexing.hybrid_search.run_search so the FastAPI route can be exercised with the real SearchRequest/SearchResponse schemas. Covers the happy path (rank, citation, reranked flag) and that empty queries are rejected at schema validation before the backend is called. pytest tests/test_alembic.py tests/test_routes_search.py -q: 4 passed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
75
tests/test_alembic.py
Normal file
75
tests/test_alembic.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
"""Alembic migration smoke test.
|
||||||
|
|
||||||
|
We do not boot a real Postgres in CI; instead we point Alembic at an in-process
|
||||||
|
SQLite database and verify:
|
||||||
|
|
||||||
|
- ``alembic upgrade head`` succeeds offline (SQL generation) using the real
|
||||||
|
migration files, exercising every column type and constraint declaration;
|
||||||
|
- ``downgrade base`` rewinds without errors.
|
||||||
|
|
||||||
|
This catches typos and broken migration ordering early without requiring the
|
||||||
|
full backing-service compose stack to be online.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
sys.path.insert(0, str(ROOT))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def alembic_cfg(tmp_path, monkeypatch):
|
||||||
|
"""Configure Alembic against an isolated SQLite file."""
|
||||||
|
db_file = tmp_path / "legacyhub.db"
|
||||||
|
monkeypatch.setenv("POSTGRES_HOST", "127.0.0.1")
|
||||||
|
monkeypatch.setenv("POSTGRES_PORT", "5432")
|
||||||
|
# Force a fresh Settings + Alembic env that ignores the configured PG.
|
||||||
|
from alembic.config import Config
|
||||||
|
|
||||||
|
cfg = Config(str(ROOT / "alembic.ini"))
|
||||||
|
cfg.set_main_option("script_location", str(ROOT / "app" / "db" / "migrations"))
|
||||||
|
cfg.set_main_option("sqlalchemy.url", f"sqlite:///{db_file}")
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
def test_migration_offline_emits_sql(alembic_cfg, tmp_path):
|
||||||
|
"""Offline mode generates SQL for every table; verify ``documents`` appears
|
||||||
|
and at least one JSONB-equivalent column is rendered. SQLite has no JSONB
|
||||||
|
but Alembic's offline mode happily emits the raw DDL for inspection.
|
||||||
|
"""
|
||||||
|
from alembic import command
|
||||||
|
|
||||||
|
out_file = tmp_path / "upgrade.sql"
|
||||||
|
# ``--sql`` mode bypasses dialect-specific runtime, perfect for a fast check.
|
||||||
|
with out_file.open("w", encoding="utf-8") as f:
|
||||||
|
old_stdout = sys.stdout
|
||||||
|
sys.stdout = f
|
||||||
|
try:
|
||||||
|
command.upgrade(alembic_cfg, "head", sql=True)
|
||||||
|
finally:
|
||||||
|
sys.stdout = old_stdout
|
||||||
|
|
||||||
|
sql = out_file.read_text(encoding="utf-8")
|
||||||
|
assert "CREATE TABLE documents" in sql
|
||||||
|
assert "CREATE TABLE chunks" in sql
|
||||||
|
assert "CREATE TABLE processing_events" in sql
|
||||||
|
# Constraint sanity
|
||||||
|
assert "uq_chunks_doc_idx" in sql
|
||||||
|
assert "uq_pages_doc_page" in sql
|
||||||
|
|
||||||
|
|
||||||
|
def test_revision_history_is_linear(alembic_cfg):
|
||||||
|
"""The current project has a single linear history at 0001_initial."""
|
||||||
|
from alembic.script import ScriptDirectory
|
||||||
|
|
||||||
|
script = ScriptDirectory.from_config(alembic_cfg)
|
||||||
|
heads = script.get_heads()
|
||||||
|
assert len(heads) == 1, f"expected one head, got: {heads}"
|
||||||
|
initial = next(iter(script.walk_revisions()))
|
||||||
|
assert initial.revision == "0001_initial"
|
||||||
130
tests/test_routes_search.py
Normal file
130
tests/test_routes_search.py
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
"""Contract test for POST /search.
|
||||||
|
|
||||||
|
The hybrid search backend depends on live OpenSearch + Qdrant + embedder; we
|
||||||
|
patch :func:`app.indexing.hybrid_search.run_search` so the route can be
|
||||||
|
exercised with the real request/response schemas without bringing infra up.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from app.api.schemas import (
|
||||||
|
Citation,
|
||||||
|
SearchHit,
|
||||||
|
SearchResponse,
|
||||||
|
)
|
||||||
|
from app.config import settings
|
||||||
|
from app.main import app
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def client() -> TestClient:
|
||||||
|
return TestClient(app)
|
||||||
|
|
||||||
|
|
||||||
|
def _stub_response(query: str) -> SearchResponse:
|
||||||
|
doc_id = uuid.uuid4()
|
||||||
|
chunk_id = uuid.uuid4()
|
||||||
|
return SearchResponse(
|
||||||
|
query=query,
|
||||||
|
mode="hybrid",
|
||||||
|
total_candidates=42,
|
||||||
|
reranked=True,
|
||||||
|
results=[
|
||||||
|
SearchHit(
|
||||||
|
rank=1,
|
||||||
|
score=0.91,
|
||||||
|
document_id=doc_id,
|
||||||
|
chunk_id=chunk_id,
|
||||||
|
original_file_name="GOST_21.501-93.pdf",
|
||||||
|
source_path="/data/input/standards/GOST_21.501-93.pdf",
|
||||||
|
page_number=12,
|
||||||
|
block_type="paragraph",
|
||||||
|
text=f"Highlighted text for {query}.",
|
||||||
|
citation=Citation(
|
||||||
|
pdf="GOST_21.501-93.pdf",
|
||||||
|
page=12,
|
||||||
|
block_id="b-12-0",
|
||||||
|
table_id=None,
|
||||||
|
figure_id=None,
|
||||||
|
),
|
||||||
|
quality_flags={"low_ocr_confidence": False, "needs_manual_review": False},
|
||||||
|
metadata={"section_heading": "Глава 2"},
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_returns_hit_with_citation(client: TestClient, monkeypatch):
|
||||||
|
from app.api import routes_search as routes
|
||||||
|
from app.indexing import hybrid_search
|
||||||
|
|
||||||
|
def fake_run(req):
|
||||||
|
assert req.query
|
||||||
|
return _stub_response(req.query)
|
||||||
|
|
||||||
|
monkeypatch.setattr(hybrid_search, "run_search", fake_run)
|
||||||
|
# The route imports run_search lazily; patch the module-level binding too.
|
||||||
|
monkeypatch.setattr(routes, "run_search", fake_run, raising=False)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
f"{settings.app_api_prefix}/search",
|
||||||
|
json={
|
||||||
|
"query": "ГОСТ 21.501-93",
|
||||||
|
"limit": 10,
|
||||||
|
"filters": {
|
||||||
|
"document_id": None,
|
||||||
|
"source_path": None,
|
||||||
|
"block_type": None,
|
||||||
|
"min_ocr_confidence": None,
|
||||||
|
},
|
||||||
|
"search_mode": "hybrid",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert res.status_code == 200, res.text
|
||||||
|
body = res.json()
|
||||||
|
assert body["query"] == "ГОСТ 21.501-93"
|
||||||
|
assert body["mode"] == "hybrid"
|
||||||
|
assert body["reranked"] is True
|
||||||
|
assert body["total_candidates"] == 42
|
||||||
|
assert len(body["results"]) == 1
|
||||||
|
|
||||||
|
hit = body["results"][0]
|
||||||
|
assert hit["rank"] == 1
|
||||||
|
assert hit["page_number"] == 12
|
||||||
|
assert hit["block_type"] == "paragraph"
|
||||||
|
assert hit["citation"]["pdf"] == "GOST_21.501-93.pdf"
|
||||||
|
assert hit["citation"]["page"] == 12
|
||||||
|
assert hit["citation"]["block_id"] == "b-12-0"
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_rejects_empty_query(client: TestClient, monkeypatch):
|
||||||
|
"""Schema validation should reject empty query without hitting the backend."""
|
||||||
|
from app.indexing import hybrid_search
|
||||||
|
|
||||||
|
def must_not_run(_req): # noqa: ARG001
|
||||||
|
raise AssertionError("backend should not be called for invalid input")
|
||||||
|
|
||||||
|
monkeypatch.setattr(hybrid_search, "run_search", must_not_run)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
f"{settings.app_api_prefix}/search",
|
||||||
|
json={
|
||||||
|
"query": "",
|
||||||
|
"limit": 10,
|
||||||
|
"filters": {
|
||||||
|
"document_id": None,
|
||||||
|
"source_path": None,
|
||||||
|
"block_type": None,
|
||||||
|
"min_ocr_confidence": None,
|
||||||
|
},
|
||||||
|
"search_mode": "hybrid",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert res.status_code == 422
|
||||||
Reference in New Issue
Block a user