]*>(.*?)<\/code>/is', '$1', $content);
$content = preg_replace('/]*>(.*?)<\/pre>/is', "\n$1\n
", $content);
return $content;
}
private function convertLinks(string $content): string
{
$content = preg_replace('/]+href=["\']([^"\']+)["\'][^>]*>(.*?)<\/a>/is', '$2', $content);
return $content;
}
private function convertHeaders(string $content): string
{
$content = preg_replace('/]*>(.*?)<\/h1>/is', "$1\n", $content);
$content = preg_replace('/]*>(.*?)<\/h2>/is', "$1\n", $content);
$content = preg_replace('/]*>(.*?)<\/h3>/is', "$1\n", $content);
$content = preg_replace('/]*>(.*?)<\/h4>/is', "$1\n", $content);
$content = preg_replace('/]*>(.*?)<\/h5>/is', "$1\n", $content);
$content = preg_replace('/]*>(.*?)<\/h6>/is', "$1\n", $content);
return $content;
}
private function convertLineBreaks(string $content): string
{
$content = preg_replace('/
/i', "\n", $content);
return $content;
}
private function cleanUp(string $content): string
{
$content = preg_replace('/
]*>(.*?)<\/p>/is', "$1\n", $content);
$content = preg_replace('/
]*>(.*?)<\/div>/is', "$1\n", $content);
$content = preg_replace('/]*>(.*?)<\/span>/is', '$1', $content);
$content = preg_replace('/]*>(.*?)<\/li>/is', "• $1\n", $content);
$content = strip_tags($content, '');
$content = preg_replace('/ /', ' ', $content);
$content = preg_replace('/&/', '&', $content);
$content = preg_replace('/</', '<', $content);
$content = preg_replace('/>/', '>', $content);
$content = preg_replace('/"/', '"', $content);
$content = preg_replace('/\n{3,}/', "\n\n", $content);
return $content;
}
public function extractImages(string $html): array
{
preg_match_all('/
]+src=["\']([^"\']+)["\'][^>]*>/i', $html, $matches);
return $matches[1] ?? [];
}
public function removeImages(string $html): string
{
return preg_replace('/
]+>/i', '', $html);
}
}