"""Quality flag computation for chunks."""

from __future__ import annotations

from typing import Any

from app.utils.text_cleaning import looks_garbled

LOW_OCR_CONFIDENCE_THRESHOLD = 0.6
SHORT_TEXT_THRESHOLD = 24


def compute_quality_flags(
    *,
    text: str,
    block_type: str,
    ocr_confidence: float | None,
    has_handwriting: bool = False,
) -> dict[str, Any]:
    flags: dict[str, Any] = {
        "low_ocr_confidence": False,
        "very_short_text": False,
        "possible_garbled_text": False,
        "table_detected": block_type == "table",
        "figure_detected": block_type in ("figure_caption", "figure_description"),
        "handwriting_detected": has_handwriting or block_type == "handwriting",
        "needs_manual_review": False,
    }
    if ocr_confidence is not None and ocr_confidence < LOW_OCR_CONFIDENCE_THRESHOLD:
        flags["low_ocr_confidence"] = True
    if text and len(text.strip()) < SHORT_TEXT_THRESHOLD:
        flags["very_short_text"] = True
    if looks_garbled(text):
        flags["possible_garbled_text"] = True
    if (
        flags["low_ocr_confidence"]
        or flags["possible_garbled_text"]
        or flags["handwriting_detected"]
    ):
        flags["needs_manual_review"] = True
    return flags