Files
LegacyHUB/app/utils/language.py
Vadim Malanov 7f72171572 chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line
endings), AGENTS.md (entry points, stack, discovery order, baseline
checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion,
failures, rollback, scaling notes), .env.prod.example with rotated
credential placeholders, and dev-only warnings on .env.example.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 16:41:50 +03:00

25 lines
664 B
Python

"""Language detection helper - tolerant to short / mixed text."""
from __future__ import annotations
from langdetect import DetectorFactory, LangDetectException, detect_langs
DetectorFactory.seed = 42
def detect_language(text: str, min_chars: int = 40) -> str | None:
"""Return ISO 639-1 language code or ``None`` if undetectable."""
if not text or len(text.strip()) < min_chars:
return None
try:
ranked = detect_langs(text)
except LangDetectException:
return None
if not ranked:
return None
return ranked[0].lang
def has_cyrillic(text: str) -> bool:
return any("Ѐ" <= ch <= "ӿ" for ch in text)