Files
php-func-lib/link-meta.php
2026-02-15 14:57:05 +01:00

188 lines
5.2 KiB
PHP

<?php
declare(strict_types=1);
require_once __DIR__ . '/http-limits.php';
function httpContext(?int $timeout = null) {
$limits = httpLimits();
$resolvedTimeout = $timeout === null ? $limits['timeout'] : max(1, $timeout);
return stream_context_create([
'http' => [
'timeout' => $resolvedTimeout,
'follow_location' => 1,
'max_redirects' => $limits['max_redirects'],
'user_agent' => $limits['user_agent'],
'ignore_errors' => true
],
'ssl' => [
'verify_peer' => true,
'verify_peer_name' => true
]
]);
}
function normalizeUrl(string $url): ?string {
$url = trim($url);
if (!filter_var($url, FILTER_VALIDATE_URL)) {
return null;
}
$parts = parse_url($url);
if (!$parts || !isset($parts['scheme'])) {
return null;
}
$scheme = strtolower($parts['scheme']);
if ($scheme !== 'http' && $scheme !== 'https') {
return null;
}
return $url;
}
function resolveUrl(string $url, string $baseUrl): ?string {
$url = trim($url);
if (filter_var($url, FILTER_VALIDATE_URL)) {
return $url;
}
$baseParts = parse_url($baseUrl);
if (!$baseParts || !isset($baseParts['scheme']) || !isset($baseParts['host'])) {
return null;
}
if (strpos($url, '//') === 0) {
return $baseParts['scheme'] . ':' . $url;
}
$path = '/';
if (!empty($baseParts['path'])) {
$path = preg_replace('#/[^/]*$#', '/', $baseParts['path']);
if ($path === null || $path === '') {
$path = '/';
}
}
if (strlen($url) && $url[0] === '/') {
return $baseParts['scheme'] . '://' . $baseParts['host'] . $url;
}
return $baseParts['scheme'] . '://' . $baseParts['host'] . $path . $url;
}
function safeFetch(string $url, ?int $timeout = null): ?string {
$limits = httpLimits();
$ctx = httpContext($timeout);
$content = @file_get_contents($url, false, $ctx);
if ($content === false || strlen($content) > $limits['max_bytes']) {
return null;
}
return $content;
}
function downloadImageFromUrl(string $url, string $baseUrl, string $destinationFolder = 'upl/'): ?string {
$resolved = resolveUrl($url, $baseUrl);
if ($resolved === null) {
return null;
}
if (!preg_match('/\.(jpg|jpeg|png|gif|bmp|webp)(?:\?|#|$)/i', $resolved)) {
return null;
}
$imageContent = safeFetch($resolved);
if ($imageContent === null || strlen($imageContent) === 0) {
return null;
}
$path = parse_url($resolved, PHP_URL_PATH) ?? '';
$ext = strtolower(pathinfo($path, PATHINFO_EXTENSION));
if ($ext === '' || !preg_match('/^(jpg|jpeg|png|gif|bmp|webp)$/', $ext)) {
$ext = 'png';
}
if (!is_dir($destinationFolder)) {
@mkdir($destinationFolder, 0775, true);
}
$md5Hash = md5($imageContent);
$filePath = rtrim($destinationFolder, '/\\') . '/' . $md5Hash . '.' . $ext;
$written = @file_put_contents($filePath, $imageContent);
if ($written === false) {
return null;
}
return $filePath;
}
function parseMetaContent(string $html, string $attr, string $name): ?string {
$pattern = '/<meta[^>]*' . $attr . '=["\']' . preg_quote($name, '/') . '["\'][^>]*content=["\']([^"\']+)["\'][^>]*>/i';
if (preg_match($pattern, $html, $matches) && isset($matches[1])) {
return trim(html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8'));
}
return null;
}
function getPageInfo(string $url): array {
$ret = [
'ok' => false,
'title' => '',
'description' => '',
'logo' => null,
'error' => null
];
$normalized = normalizeUrl($url);
if ($normalized === null) {
$ret['error'] = 'ungueltige_url';
return $ret;
}
$html = safeFetch($normalized);
if ($html === null) {
$ret['error'] = 'seite_nicht_erreichbar';
return $ret;
}
$title = parseMetaContent($html, 'property', 'og:title') ?? parseMetaContent($html, 'name', 'title');
$description = parseMetaContent($html, 'property', 'og:description') ?? parseMetaContent($html, 'name', 'description');
$image = parseMetaContent($html, 'property', 'og:image') ?? parseMetaContent($html, 'name', 'image');
if ($title === null && preg_match('/<title>\s*(.*?)\s*<\/title>/is', $html, $matchTitle)) {
$title = trim(html_entity_decode($matchTitle[1], ENT_QUOTES | ENT_HTML5, 'UTF-8'));
}
$logo = null;
if ($image !== null && $image !== '') {
$img = downloadImageFromUrl($image, $normalized);
if ($img !== null) {
$logo = '/' . ltrim($img, '/');
}
}
$ret['ok'] = true;
$ret['title'] = $title ?? '';
$ret['description'] = $description ?? '';
$ret['logo'] = $logo;
return $ret;
}
function sanitizeTags(array $input): array {
$ret = [];
foreach ($input as $tag) {
if (!is_string($tag)) {
continue;
}
$clean = onlyAlpha(trim($tag), '_\-');
if ($clean === '') {
continue;
}
$clean = ucfirst(substr($clean, 0, 35));
$ret[$clean] = true;
if (count($ret) >= 20) {
break;
}
}
return array_keys($ret);
}