Fix: Motor de traducción mejorado para mensajes multi-idioma y dinámicos

- Rediseñado translatePartial para manejar mezclas de idiomas (ej: 'Hello a todos')
- Eliminados hardcodes de idiomas ES/PT, ahora es 100% dinámico
- Corregido truncado de texto original en el webhook de Telegram
- Mejorada la deduplicación y preservación de párrafos en traducciones
- Fallback dinámico al primer idioma activo de la base de datos
This commit is contained in:
2026-03-10 18:23:56 -06:00
parent bf960f3fc3
commit 8170931f3d
3 changed files with 266 additions and 37 deletions

View File

@@ -34,14 +34,19 @@ $discord->on(Event::GUILD_MEMBER_ADD, function (Member $member, Discord $discord
try {
$pdo = getDbConnection();
// Obtener idioma por defecto dinámico
$stmtDefault = $pdo->query("SELECT language_code FROM supported_languages WHERE is_active = 1 LIMIT 1");
$defaultLang = $stmtDefault->fetchColumn() ?: 'es';
$stmt = $pdo->prepare("
INSERT INTO recipients (platform_id, name, type, platform, language_code, chat_mode)
VALUES (?, ?, 'user', 'discord', 'es', 'agent')
VALUES (?, ?, 'user', 'discord', ?, 'agent')
ON DUPLICATE KEY UPDATE name = VALUES(name)
");
$stmt->execute([
$member->user->id,
$member->user->username
$member->user->username,
$defaultLang
]);
echo "Usuario registrado en la base de datos" . PHP_EOL;
@@ -338,7 +343,12 @@ function handleAutoTranslationWithButtons(PDO $pdo, Message $message, string $te
// Detectar idioma del mensaje (sin emojis para mejor precisión)
$textForDetection = stripEmojisForDetection($text);
$detectedLang = $translator->detectLanguage($textForDetection) ?? 'es';
// Obtener idioma por defecto dinámico
$stmtDefault = $pdo->query("SELECT language_code FROM supported_languages WHERE is_active = 1 LIMIT 1");
$defaultLang = $stmtDefault->fetchColumn() ?: 'es';
$detectedLang = $translator->detectLanguage($textForDetection) ?? $defaultLang;
// Obtener idiomas activos de la base de datos
$stmt = $pdo->query("SELECT language_code, flag_emoji FROM supported_languages WHERE is_active = 1");
@@ -470,14 +480,18 @@ function sendDiscordWelcomeMessage(PDO $pdo, Member $member, Discord $discord):
function registerDiscordUser(PDO $pdo, $user): void
{
// Obtener idioma por defecto de la base de datos (el primero activo)
$stmtDefault = $pdo->query("SELECT language_code FROM supported_languages WHERE is_active = 1 LIMIT 1");
$defaultLang = $stmtDefault->fetchColumn() ?: 'es';
$stmt = $pdo->prepare("
INSERT INTO recipients (platform_id, name, type, platform, language_code, chat_mode)
VALUES (?, ?, 'user', 'discord', 'es', 'agent')
VALUES (?, ?, 'user', 'discord', ?, 'agent')
ON DUPLICATE KEY UPDATE name = VALUES(name)
");
$name = $user->username ?? 'Usuario';
$stmt->execute([$user->id, $name]);
$stmt->execute([$user->id, $name, $defaultLang]);
}
function sendDiscordWelcomeMessageOnMessage(PDO $pdo, Message $message, string $username): void
@@ -525,9 +539,8 @@ function handleTranslateInteraction($interaction, string $customId): void
error_log("Acknowledge error: " . $e->getMessage());
}
$textForDetection = stripEmojisForDetection($originalText);
$sourceLang = $translator->detectLanguage($textForDetection) ?? 'es';
$translated = $translator->translate($textForDetection, $sourceLang, $targetLang);
$textForTranslation = stripEmojisForDetection($originalText);
$translated = $translator->translatePartial($textForTranslation, $targetLang);
if ($translated) {
$translated = strip_tags($translated);

View File

@@ -38,39 +38,36 @@ class Translate
$cached = $this->getFromCache($cacheKey);
if ($cached !== null) {
error_log("Translation cache hit for: $sourceLang -> $targetLang");
return $cached;
}
$lines = explode("\n", $text);
$translatedLines = [];
error_log("Translate: " . count($lines) . " lines from $sourceLang to $targetLang");
foreach ($lines as $index => $line) {
foreach ($lines as $line) {
$trimmed = trim($line);
if ($trimmed === '') {
$translatedLines[] = '';
error_log("Line $index: empty -> empty");
continue;
}
$response = $this->request('/translate', [
'q' => $trimmed,
'source' => $sourceLang,
'target' => $targetLang,
'format' => 'text'
]);
$translated = $response['translatedText'] ?? $trimmed;
$translatedLines[] = $translated;
error_log("Line $index: '$trimmed' -> '$translated'");
try {
$response = $this->request('/translate', [
'q' => $trimmed,
'source' => $sourceLang,
'target' => $targetLang,
'format' => 'text'
]);
$translated = $response['translatedText'] ?? $trimmed;
$translatedLines[] = $translated;
} catch (\Exception $e) {
error_log("Line translation error: " . $e->getMessage());
$translatedLines[] = $line; // Mantener original en caso de error
}
}
$result = implode("\n", $translatedLines);
error_log("Final result (length: " . strlen($result) . "): " . str_replace("\n", "\\n", substr($result, 0, 100)));
$this->saveToCache($cacheKey, $result);
return $result;
@@ -80,6 +77,225 @@ class Translate
}
}
public function translatePartial(string $text, string $targetLang): ?string
{
if (empty(trim($text))) {
return $text;
}
try {
$cacheKey = $this->generateCacheKey($text . '_partial_v8', 'partial', $targetLang);
$cached = $this->getFromCache($cacheKey);
if ($cached !== null) {
return $cached;
}
// 1. Intentar procesar texto con marcadores [:es]...[:es]
$processed = $this->processMarkedText($text, $targetLang);
if ($processed !== $text) {
$this->saveToCache($cacheKey, $processed);
return $processed;
}
// 2. Detección inicial dinámica
if (!function_exists('stripEmojisForDetection')) {
require_once __DIR__ . '/../includes/emoji_helper.php';
}
$textForDetection = stripEmojisForDetection($text);
$mainLang = $this->detectLanguage($textForDetection);
error_log("translatePartial: hash=" . md5($text) . " mainLang=" . ($mainLang ?? 'null') . " target=$targetLang");
// 3. Procesamiento por líneas
$lines = explode("\n", $text);
// Si es una sola línea, intentar traducción global directa
if (count($lines) <= 1) {
// Si el idioma detectado coincide con el destino, forzamos 'auto' para procesar posibles mezclas
$srcLang = ($mainLang === $targetLang) ? 'auto' : ($mainLang ?? 'auto');
$result = $this->translate($text, $srcLang, $targetLang);
if ($result) {
$this->saveToCache($cacheKey, $result);
return $result;
}
return $text;
}
$translatedLines = [];
$seenNorm = [];
$translatedAny = false;
foreach ($lines as $line) {
$trimmed = trim($line);
if (empty($trimmed)) {
$translatedLines[] = "";
continue;
}
// Si la línea tiene separadores como " / ", " | ", intentar separarla
if (preg_match('/\s+[\/|]\s+/', $trimmed)) {
$parts = preg_split('/\s+[\/|]\s+/', $trimmed);
$translatedParts = [];
foreach ($parts as $part) {
$partTrimmed = trim($part);
if (empty($partTrimmed)) continue;
$partLang = $this->detectLanguage($partTrimmed);
// Si el idioma detectado de la parte coincide con el destino, usamos 'auto' para ver si LibreTranslate traduce algo interno
$srcLang = ($partLang === $targetLang) ? 'auto' : ($partLang ?? 'auto');
$translatedPart = $this->translate($partTrimmed, $srcLang, $targetLang);
if ($translatedPart !== $partTrimmed) $translatedAny = true;
if ($translatedPart) {
$normPart = mb_strtolower(preg_replace('/[[:punct:]\s]+/u', '', $translatedPart));
if (!isset($seenNorm[$normPart])) {
$translatedParts[] = $translatedPart;
$seenNorm[$normPart] = true;
}
}
}
if (!empty($translatedParts)) {
$translatedLines[] = implode(" / ", $translatedParts);
}
} else {
$lineLang = $this->detectLanguage($trimmed);
// Mismo principio: si coincide el idioma, usamos 'auto' para permitir traducción de mezclas
$srcLang = ($lineLang === $targetLang) ? 'auto' : ($lineLang ?? 'auto');
$translatedLine = $this->translate($trimmed, $srcLang, $targetLang);
if ($translatedLine !== $trimmed) $translatedAny = true;
if ($translatedLine) {
$normLine = mb_strtolower(preg_replace('/[[:punct:]\s]+/u', '', $translatedLine));
if (!isset($seenNorm[$normLine])) {
$translatedLines[] = $translatedLine;
$seenNorm[$normLine] = true;
}
}
}
}
// Reconstruir el mensaje
$result = trim(implode("\n", $translatedLines));
$result = preg_replace('/\n{3,}/', "\n\n", $result);
// Si el resultado es igual al original y no se detectó ninguna traducción efectiva, forzar global con auto
if ($result === trim($text) && !$translatedAny) {
$globalResult = $this->translate($text, 'auto', $targetLang);
if ($globalResult) $result = $globalResult;
}
if ($result) {
$this->saveToCache($cacheKey, $result);
return $result;
}
return $text;
} catch (\Exception $e) {
error_log("Partial translation error: " . $e->getMessage());
return $text;
}
}
private function processMarkedText(string $text, string $targetLang): string
{
error_log("processMarkedText called with text: $text, target: $targetLang");
$pattern = '/\[:([a-z]{2})\](.*?)\[:\1\]/su';
$result = preg_replace_callback($pattern, function($matches) use ($targetLang) {
$wordLang = $matches[1];
$word = trim($matches[2]);
if ($wordLang === $targetLang) {
return $word;
}
$translated = $this->translate($word, $wordLang, $targetLang);
return $translated ?? $word;
}, $text);
if ($result !== $text) {
$lines = explode("\n", $result);
$uniqueLines = [];
$seen = [];
foreach ($lines as $line) {
$trimmed = trim($line);
if (empty($trimmed)) {
$uniqueLines[] = "";
continue;
}
$norm = mb_strtolower(preg_replace('/[[:punct:]\s]+/u', '', $trimmed));
if (mb_strlen($norm) < 10 || !isset($seen[$norm])) {
$uniqueLines[] = $trimmed;
$seen[$norm] = true;
}
}
$result = implode("\n", $uniqueLines);
}
return $result;
}
private function splitIntoSegments(string $text): array
{
if (empty(trim($text))) {
return [['text' => $text]];
}
$words = preg_split('/(\s+)/u', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
$result = '';
foreach ($words as $word) {
$trimmed = trim($word);
if ($trimmed === '') {
$result .= $word;
continue;
}
if (strlen($trimmed) < 3) {
$result .= $word;
continue;
}
$wordLang = $this->detectLanguage($trimmed);
if ($wordLang) {
$result .= '[:' . $wordLang . ']' . $trimmed . '[:' . $wordLang . ']';
} else {
$result .= $word;
}
}
return [['text' => $result, 'processed' => true]];
}
private function reconstructText(array $segments): string
{
$result = '';
foreach ($segments as $i => $segment) {
$text = $segment['translated'];
if ($i > 0) {
$lastChar = substr($result, -1);
$firstChar = substr($text, 0, 1);
if ($lastChar !== ' ' && $lastChar !== "\n" && $firstChar !== ' ' && $firstChar !== "\n") {
$result .= ' ';
}
}
$result .= $text;
}
return $result;
}
public function translateToMultiple(string $text, string $sourceLang, array $targetLangs): array
{
$results = [];

View File

@@ -96,6 +96,10 @@ try {
function registerTelegramUser(PDO $pdo, array $user): void
{
// Obtener idioma por defecto de la base de datos (el primero activo)
$stmtDefault = $pdo->query("SELECT language_code FROM supported_languages WHERE is_active = 1 LIMIT 1");
$defaultLang = $stmtDefault->fetchColumn() ?: 'es';
$stmt = $pdo->prepare("
INSERT INTO recipients (platform_id, name, type, platform, language_code, chat_mode)
VALUES (?, ?, 'user', 'telegram', ?, 'bot')
@@ -103,16 +107,15 @@ function registerTelegramUser(PDO $pdo, array $user): void
");
$name = trim(($user['first_name'] ?? '') . ' ' . ($user['last_name'] ?? ''));
$languageCode = $user['language_code'] ?? 'es';
$languageCode = $user['language_code'] ?? $defaultLang;
$stmt->execute([$user['id'], $name, $languageCode]);
}
function handleAutoTranslation(PDO $pdo, Telegram\TelegramSender $sender, src\Translate $translator, int $chatId, string $text): void
{
// Usar texto sin emojis para detección de idioma, pero guardar el original para mostrar
$textForDetection = stripEmojisForDetection($text);
$keyboard = getTelegramTranslationButtons($pdo, $textForDetection ?: $text);
// Usar el texto original completo para generar los botones y guardar en caché
$keyboard = getTelegramTranslationButtons($pdo, $text);
if (!empty($keyboard)) {
$message = "🌐 <b>Traducciones disponibles:</b>\nHaz clic en una bandera para ver la traducción";
@@ -388,13 +391,10 @@ function handleTelegramCallback(PDO $pdo, Telegram\TelegramSender $sender, src\T
}
try {
// Obtener el idioma original (usar texto sin emojis para mayor precisión)
$textForDetection = stripEmojisForDetection($originalText);
$sourceLang = $translator->detectLanguage($textForDetection) ?? 'es';
file_put_contents(__DIR__ . '/../../logs/telegram_debug.log', date('Y-m-d H:i:s') . " - sourceLang: $sourceLang, targetLang: $targetLang, originalText: " . substr($originalText, 0, 50) . "\n", FILE_APPEND);
// Traducir (usar texto sin emojis para evitar interferencias)
$translated = $translator->translate($textForDetection ?: $originalText, $sourceLang, $targetLang);
// Traducción parcial - detecta el idioma de cada segmento y traduce solo lo necesario
$textForTranslation = stripEmojisForDetection($originalText);
$translated = $translator->translatePartial($textForTranslation ?: $originalText, $targetLang);
file_put_contents(__DIR__ . '/../../logs/telegram_debug.log', date('Y-m-d H:i:s') . " - targetLang: $targetLang, originalText: " . substr($originalText, 0, 50) . "\n", FILE_APPEND);
file_put_contents(__DIR__ . '/../../logs/telegram_debug.log', date('Y-m-d H:i:s') . " - translated: $translated\n", FILE_APPEND);
if ($translated) {