chore: bootstrap repository with governance docs

Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadim Malanov
2026-05-13 16:41:50 +03:00
commit 7f72171572
157 changed files with 11298 additions and 0 deletions

99
app/api/schemas.py Normal file
View File

@@ -0,0 +1,99 @@
"""Pydantic request/response schemas for the LegacyHUB API."""
from __future__ import annotations
import uuid
from datetime import datetime
from typing import Any, Literal
from pydantic import BaseModel, Field
# ---------------- Health ----------------
class ComponentHealth(BaseModel):
name: str
status: Literal["ok", "error", "degraded"]
detail: dict[str, Any] = Field(default_factory=dict)
class HealthResponse(BaseModel):
status: Literal["ok", "error", "degraded"]
version: str
components: list[ComponentHealth]
# ---------------- Ingestion ----------------
class IngestFolderRequest(BaseModel):
path: str = Field(..., description="Absolute path inside the API container")
recursive: bool = True
force: bool = False
class IngestFolderResponse(BaseModel):
run_id: uuid.UUID
discovered: int
queued: int
skipped_duplicates: int
invalid_files: int
class DocumentSummary(BaseModel):
id: uuid.UUID
original_file_name: str
source_path: str
sha256: str
status: str
file_size_bytes: int
created_at: datetime
# ---------------- Search ----------------
SearchMode = Literal["lexical", "semantic", "hybrid"]
class SearchFilters(BaseModel):
document_id: uuid.UUID | None = None
source_path: str | None = None
block_type: str | None = None
min_ocr_confidence: float | None = Field(None, ge=0.0, le=1.0)
class SearchRequest(BaseModel):
query: str = Field(..., min_length=1)
limit: int = Field(10, ge=1, le=100)
filters: SearchFilters = Field(default_factory=SearchFilters)
search_mode: SearchMode = "hybrid"
class Citation(BaseModel):
pdf: str
page: int
block_id: str | None = None
table_id: str | None = None
figure_id: str | None = None
class SearchHit(BaseModel):
rank: int
score: float
document_id: uuid.UUID
chunk_id: uuid.UUID
original_file_name: str
source_path: str
page_number: int
block_type: str
text: str
citation: Citation
quality_flags: dict[str, Any] = Field(default_factory=dict)
metadata: dict[str, Any] = Field(default_factory=dict)
class SearchResponse(BaseModel):
query: str
mode: SearchMode
total_candidates: int
reranked: bool
results: list[SearchHit]