Files
lastwar/telegram/converters/HtmlToTelegramHtmlConverter.php

113 lines
4.1 KiB
PHP
Executable File

<?php
namespace Telegram\Converters;
class HtmlToTelegramHtmlConverter
{
public function convert(string $html): string
{
$content = $html;
$content = $this->convertBold($content);
$content = $this->convertItalic($content);
$content = $this->convertUnderline($content);
$content = $this->convertStrikethrough($content);
$content = $this->convertCode($content);
$content = $this->convertLinks($content);
$content = $this->convertHeaders($content);
$content = $this->convertLineBreaks($content);
$content = $this->cleanUp($content);
return trim($content);
}
private function convertBold(string $content): string
{
$content = preg_replace('/<strong[^>]*>(.*?)<\/strong>/is', '<b>$1</b>', $content);
$content = preg_replace('/<b[^>]*>(.*?)<\/b>/is', '<b>$1</b>', $content);
return $content;
}
private function convertItalic(string $content): string
{
$content = preg_replace('/<em[^>]*>(.*?)<\/em>/is', '<i>$1</i>', $content);
$content = preg_replace('/<i[^>]*>(.*?)<\/i>/is', '<i>$1</i>', $content);
return $content;
}
private function convertUnderline(string $content): string
{
$content = preg_replace('/<u[^>]*>(.*?)<\/u>/is', '<u>$1</u>', $content);
return $content;
}
private function convertStrikethrough(string $content): string
{
$content = preg_replace('/<s[^>]*>(.*?)<\/s>/is', '<s>$1</s>', $content);
$content = preg_replace('/<strike[^>]*>(.*?)<\/strike>/is', '<s>$1</s>', $content);
$content = preg_replace('/<del[^>]*>(.*?)<\/del>/is', '<s>$1</s>', $content);
return $content;
}
private function convertCode(string $content): string
{
$content = preg_replace('/<code[^>]*>(.*?)<\/code>/is', '<code>$1</code>', $content);
$content = preg_replace('/<pre[^>]*>(.*?)<\/pre>/is', "<pre>\n$1\n</pre>", $content);
return $content;
}
private function convertLinks(string $content): string
{
$content = preg_replace('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>(.*?)<\/a>/is', '<a href="$1">$2</a>', $content);
return $content;
}
private function convertHeaders(string $content): string
{
$content = preg_replace('/<h1[^>]*>(.*?)<\/h1>/is', "<b>$1</b>\n", $content);
$content = preg_replace('/<h2[^>]*>(.*?)<\/h2>/is', "<b>$1</b>\n", $content);
$content = preg_replace('/<h3[^>]*>(.*?)<\/h3>/is', "<b>$1</b>\n", $content);
$content = preg_replace('/<h4[^>]*>(.*?)<\/h4>/is', "<b>$1</b>\n", $content);
$content = preg_replace('/<h5[^>]*>(.*?)<\/h5>/is', "<b>$1</b>\n", $content);
$content = preg_replace('/<h6[^>]*>(.*?)<\/h6>/is', "<b>$1</b>\n", $content);
return $content;
}
private function convertLineBreaks(string $content): string
{
$content = preg_replace('/<br\s*\/?>/i', "\n", $content);
return $content;
}
private function cleanUp(string $content): string
{
$content = preg_replace('/<p[^>]*>(.*?)<\/p>/is', "$1\n", $content);
$content = preg_replace('/<div[^>]*>(.*?)<\/div>/is', "$1\n", $content);
$content = preg_replace('/<span[^>]*>(.*?)<\/span>/is', '$1', $content);
$content = preg_replace('/<li[^>]*>(.*?)<\/li>/is', "$1\n", $content);
$content = strip_tags($content, '<b><i><u><s><code><pre><a>');
$content = preg_replace('/&nbsp;/', ' ', $content);
$content = preg_replace('/&amp;/', '&', $content);
$content = preg_replace('/&lt;/', '<', $content);
$content = preg_replace('/&gt;/', '>', $content);
$content = preg_replace('/&quot;/', '"', $content);
$content = preg_replace('/\n{3,}/', "\n\n", $content);
return $content;
}
public function extractImages(string $html): array
{
preg_match_all('/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches);
return $matches[1] ?? [];
}
public function removeImages(string $html): string
{
return preg_replace('/<img[^>]+>/i', '', $html);
}
}