Files
LegacyHUB/app/api/schemas.py
Vadim Malanov 7f72171572 chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 16:41:50 +03:00

100 lines
2.2 KiB
Python

"""Pydantic request/response schemas for the LegacyHUB API."""
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any, Literal
from pydantic import BaseModel, Field
# ---------------- Health ----------------
class ComponentHealth(BaseModel):
name: str
status: Literal["ok", "error", "degraded"]
detail: dict[str, Any] = Field(default_factory=dict)
class HealthResponse(BaseModel):
status: Literal["ok", "error", "degraded"]
version: str
components: list[ComponentHealth]
# ---------------- Ingestion ----------------
class IngestFolderRequest(BaseModel):
path: str = Field(..., description="Absolute path inside the API container")
recursive: bool = True
force: bool = False
class IngestFolderResponse(BaseModel):
run_id: uuid.UUID
discovered: int
queued: int
skipped_duplicates: int
invalid_files: int
class DocumentSummary(BaseModel):
id: uuid.UUID
original_file_name: str
source_path: str
sha256: str
status: str
file_size_bytes: int
created_at: datetime
# ---------------- Search ----------------
SearchMode = Literal["lexical", "semantic", "hybrid"]
class SearchFilters(BaseModel):
document_id: uuid.UUID | None = None
source_path: str | None = None
block_type: str | None = None
min_ocr_confidence: float | None = Field(None, ge=0.0, le=1.0)
class SearchRequest(BaseModel):
query: str = Field(..., min_length=1)
limit: int = Field(10, ge=1, le=100)
filters: SearchFilters = Field(default_factory=SearchFilters)
search_mode: SearchMode = "hybrid"
class Citation(BaseModel):
pdf: str
page: int
block_id: str | None = None
table_id: str | None = None
figure_id: str | None = None
class SearchHit(BaseModel):
rank: int
score: float
document_id: uuid.UUID
chunk_id: uuid.UUID
original_file_name: str
source_path: str
page_number: int
block_type: str
text: str
citation: Citation
quality_flags: dict[str, Any] = Field(default_factory=dict)
metadata: dict[str, Any] = Field(default_factory=dict)
class SearchResponse(BaseModel):
query: str
mode: SearchMode
total_candidates: int
reranked: bool
results: list[SearchHit]