fix: Mejorar segmentación de traducciones para textos largos

- Limitar cada segmento a 400 caracteres
- Dividir textos largos por palabras para evitar truncamiento
- Limpiar logs de debug
This commit is contained in:
2026-03-20 04:30:10 -06:00
parent 47d2ba0e9a
commit afc446a9aa
2 changed files with 40 additions and 55 deletions

View File

@@ -49,58 +49,68 @@ REVERSE_MAPPING = {}
FLAG_MAPPING = {}
NAME_TO_CODE = {}
async def _do_translate_request(session, url, text, target_code):
async def _do_translate_request(session, url, text, target_code, max_length=500):
"""Función interna para realizar una única petición de traducción."""
if not text.strip() or not re.search(r'[a-zA-Z\u00C0-\u017F]', text):
return text
payload = {
"q": text,
"source": "auto",
"target": target_code,
"format": "html"
}
try:
async with session.post(url, json=payload, timeout=30) as resp:
if resp.status == 200:
data = await resp.json()
translated = data.get("translatedText", text)
print(f"[TRANSLATE] Segmento traducido: '{text[:30]}...' -> '{translated[:30]}...'")
return translated
chunks = []
if len(text) > max_length:
parts = text.split(' ')
current_chunk = ''
for part in parts:
if len(current_chunk) + len(part) + 1 <= max_length:
current_chunk += (' ' if current_chunk else '') + part
else:
print(f"[TRANSLATE] Error HTTP {resp.status}")
return text
except Exception as e:
print(f"[TRANSLATE] Error en petición: {e}")
return text
if current_chunk:
chunks.append(current_chunk)
current_chunk = part
if current_chunk:
chunks.append(current_chunk)
else:
chunks.append(text)
results = []
for chunk in chunks:
payload = {
"q": chunk,
"source": "auto",
"target": target_code,
"format": "html"
}
try:
async with session.post(url, json=payload, timeout=30) as resp:
if resp.status == 200:
data = await resp.json()
translated = data.get("translatedText", chunk)
results.append(translated)
else:
results.append(chunk)
except Exception as e:
print(f"[TRANSLATE] Error en petición: {e}")
results.append(chunk)
return ' '.join(results)
async def translate_text(text: str, target_lang: str) -> str:
url = get_libretranslate_url()
if not url:
print(f"[TRANSLATE] URL no configurada")
return text
print(f"[TRANSLATE] target_lang recibido: '{target_lang}'")
target_code = NAME_TO_CODE.get(target_lang, target_lang)
print(f"[TRANSLATE] target_code resuelto: '{target_code}'")
print(f"[TRANSLATE] texto a traducir: '{text[:100]}...' (largo: {len(text)})")
segments = re.split(r'([.!?]+\s*|\n+)', text)
segments = [s for s in segments if s]
print(f"[TRANSLATE] Segmentos: {len(segments)}")
async with aiohttp.ClientSession() as session:
tasks = []
for segment in segments:
tasks.append(_do_translate_request(session, url, segment, target_code))
tasks.append(_do_translate_request(session, url, segment, target_code, max_length=400))
translated_segments = await asyncio.gather(*tasks)
result = "".join(translated_segments)
print(f"[TRANSLATE] Resultado final: '{result[:100]}...'")
return result
return "".join(translated_segments)
def translate_text_sync(text: str, target_lang: str) -> str:
"""Versión síncrona de translate_text utilizando un hilo separado."""