128 lines
4.6 KiB
PHP
Executable File
128 lines
4.6 KiB
PHP
Executable File
<?php
|
|
|
|
namespace Discord\Converters;
|
|
|
|
class HtmlToDiscordMarkdownConverter
|
|
{
|
|
public function convert(string $html): string
|
|
{
|
|
$content = $html;
|
|
|
|
$content = $this->convertImages($content);
|
|
$content = $this->convertBold($content);
|
|
$content = $this->convertItalic($content);
|
|
$content = $this->convertUnderline($content);
|
|
$content = $this->convertStrikethrough($content);
|
|
$content = $this->convertCode($content);
|
|
$content = $this->convertLinks($content);
|
|
$content = $this->convertLists($content);
|
|
$content = $this->convertHeaders($content);
|
|
$content = $this->convertLineBreaks($content);
|
|
$content = $this->cleanUp($content);
|
|
|
|
return trim($content);
|
|
}
|
|
|
|
private function convertImages(string $content): string
|
|
{
|
|
$content = preg_replace('/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', '($1)', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertBold(string $content): string
|
|
{
|
|
$content = preg_replace('/<strong[^>]*>(.*?)<\/strong>/is', '**$1**', $content);
|
|
$content = preg_replace('/<b[^>]*>(.*?)<\/b>/is', '**$1**', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertItalic(string $content): string
|
|
{
|
|
$content = preg_replace('/<em[^>]*>(.*?)<\/em>/is', '*$1*', $content);
|
|
$content = preg_replace('/<i[^>]*>(.*?)<\/i>/is', '*$1*', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertUnderline(string $content): string
|
|
{
|
|
$content = preg_replace('/<u[^>]*>(.*?)<\/u>/is', '__$1__', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertStrikethrough(string $content): string
|
|
{
|
|
$content = preg_replace('/<s[^>]*>(.*?)<\/s>/is', '~~$1~~', $content);
|
|
$content = preg_replace('/<strike[^>]*>(.*?)<\/strike>/is', '~~$1~~', $content);
|
|
$content = preg_replace('/<del[^>]*>(.*?)<\/del>/is', '~~$1~~', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertCode(string $content): string
|
|
{
|
|
$content = preg_replace('/<code[^>]*>(.*?)<\/code>/is', '`$1`', $content);
|
|
$content = preg_replace('/<pre[^>]*>(.*?)<\/pre>/is', "```\n$1\n```", $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertLinks(string $content): string
|
|
{
|
|
$content = preg_replace('/<a[^>]+href=["\']([^"\']+)["\'][^>]*>(.*?)<\/a>/is', '[$2]($1)', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertLists(string $content): string
|
|
{
|
|
$content = preg_replace('/<li[^>]*>(.*?)<\/li>/is', "\n• $1", $content);
|
|
$content = preg_replace('/<ul[^>]*>/is', '', $content);
|
|
$content = preg_replace('/<\/ul>/is', '', $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertHeaders(string $content): string
|
|
{
|
|
$content = preg_replace('/<h1[^>]*>(.*?)<\/h1>/is', "\n\n## $1\n", $content);
|
|
$content = preg_replace('/<h2[^>]*>(.*?)<\/h2>/is', "\n\n### $1\n", $content);
|
|
$content = preg_replace('/<h3[^>]*>(.*?)<\/h3>/is', "\n\n#### $1\n", $content);
|
|
$content = preg_replace('/<h4[^>]*>(.*?)<\/h4>/is', "\n\n##### $1\n", $content);
|
|
$content = preg_replace('/<h5[^>]*>(.*?)<\/h5>/is', "\n\n###### $1\n", $content);
|
|
$content = preg_replace('/<h6[^>]*>(.*?)<\/h6>/is', "\n\n###### $1\n", $content);
|
|
return $content;
|
|
}
|
|
|
|
private function convertLineBreaks(string $content): string
|
|
{
|
|
$content = preg_replace('/<br\s*\/?>/i', "\n", $content);
|
|
return $content;
|
|
}
|
|
|
|
private function cleanUp(string $content): string
|
|
{
|
|
$content = preg_replace('/<p[^>]*>(.*?)<\/p>/is', "\n$1\n", $content);
|
|
$content = preg_replace('/<div[^>]*>(.*?)<\/div>/is', "\n$1\n", $content);
|
|
$content = preg_replace('/<span[^>]*>(.*?)<\/span>/is', '$1', $content);
|
|
|
|
$content = strip_tags($content);
|
|
|
|
$content = preg_replace('/ /', ' ', $content);
|
|
$content = preg_replace('/&/', '&', $content);
|
|
$content = preg_replace('/</', '<', $content);
|
|
$content = preg_replace('/>/', '>', $content);
|
|
$content = preg_replace('/"/', '"', $content);
|
|
|
|
$content = preg_replace('/\n{3,}/', "\n\n", $content);
|
|
|
|
return $content;
|
|
}
|
|
|
|
public function extractImages(string $html): array
|
|
{
|
|
preg_match_all('/<img[^>]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches);
|
|
return $matches[1] ?? [];
|
|
}
|
|
|
|
public function removeImages(string $html): string
|
|
{
|
|
return preg_replace('/<img[^>]+>/i', '', $html);
|
|
}
|
|
}
|