chore: bootstrap repository with governance docs
Initialize git, add Apache-2.0 LICENSE, .gitattributes (LF line endings), AGENTS.md (entry points, stack, discovery order, baseline checks), RUNBOOK.md (dev boot, prod deploy with overlay, ingestion, failures, rollback, scaling notes), .env.prod.example with rotated credential placeholders, and dev-only warnings on .env.example. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
24
app/utils/language.py
Normal file
24
app/utils/language.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Language detection helper - tolerant to short / mixed text."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from langdetect import DetectorFactory, LangDetectException, detect_langs
|
||||
|
||||
DetectorFactory.seed = 42
|
||||
|
||||
|
||||
def detect_language(text: str, min_chars: int = 40) -> str | None:
|
||||
"""Return ISO 639-1 language code or ``None`` if undetectable."""
|
||||
if not text or len(text.strip()) < min_chars:
|
||||
return None
|
||||
try:
|
||||
ranked = detect_langs(text)
|
||||
except LangDetectException:
|
||||
return None
|
||||
if not ranked:
|
||||
return None
|
||||
return ranked[0].lang
|
||||
|
||||
|
||||
def has_cyrillic(text: str) -> bool:
|
||||
return any("Ѐ" <= ch <= "ӿ" for ch in text)
|
||||
Reference in New Issue
Block a user