From 93ae2cdaa9d1f06eec470acb7853557653026514 Mon Sep 17 00:00:00 2001 From: Troy Grunt Date: Sat, 14 Feb 2026 15:55:41 +0100 Subject: [PATCH] links --- link-meta.php | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 link-meta.php diff --git a/link-meta.php b/link-meta.php new file mode 100644 index 0000000..a92db62 --- /dev/null +++ b/link-meta.php @@ -0,0 +1,183 @@ + [ + 'timeout' => $timeout, + 'follow_location' => 1, + 'max_redirects' => 4, + 'user_agent' => 'star-citizen.de-linkbot/1.0', + 'ignore_errors' => true + ], + 'ssl' => [ + 'verify_peer' => true, + 'verify_peer_name' => true + ] + ]); +} + +function normalizeUrl(string $url): ?string { + $url = trim($url); + if (!filter_var($url, FILTER_VALIDATE_URL)) { + return null; + } + + $parts = parse_url($url); + if (!$parts || !isset($parts['scheme'])) { + return null; + } + + $scheme = strtolower($parts['scheme']); + if ($scheme !== 'http' && $scheme !== 'https') { + return null; + } + + return $url; +} + +function resolveUrl(string $url, string $baseUrl): ?string { + $url = trim($url); + if (filter_var($url, FILTER_VALIDATE_URL)) { + return $url; + } + + $baseParts = parse_url($baseUrl); + if (!$baseParts || !isset($baseParts['scheme']) || !isset($baseParts['host'])) { + return null; + } + + if (strpos($url, '//') === 0) { + return $baseParts['scheme'] . ':' . $url; + } + + $path = '/'; + if (!empty($baseParts['path'])) { + $path = preg_replace('#/[^/]*$#', '/', $baseParts['path']); + if ($path === null || $path === '') { + $path = '/'; + } + } + + if (strlen($url) && $url[0] === '/') { + return $baseParts['scheme'] . '://' . $baseParts['host'] . $url; + } + + return $baseParts['scheme'] . '://' . $baseParts['host'] . $path . $url; +} + +function safeFetch(string $url, int $timeout = 8): ?string { + $ctx = httpContext($timeout); + $content = @file_get_contents($url, false, $ctx); + return $content === false ? null : $content; +} + +function downloadImageFromUrl(string $url, string $baseUrl, string $destinationFolder = 'upl/'): ?string { + $resolved = resolveUrl($url, $baseUrl); + if ($resolved === null) { + return null; + } + + if (!preg_match('/\.(jpg|jpeg|png|gif|bmp|webp)(?:\?|#|$)/i', $resolved)) { + return null; + } + + $imageContent = safeFetch($resolved, 10); + if ($imageContent === null || strlen($imageContent) === 0 || strlen($imageContent) > (5 * 1024 * 1024)) { + return null; + } + + $path = parse_url($resolved, PHP_URL_PATH) ?? ''; + $ext = strtolower(pathinfo($path, PATHINFO_EXTENSION)); + if ($ext === '' || !preg_match('/^(jpg|jpeg|png|gif|bmp|webp)$/', $ext)) { + $ext = 'png'; + } + + if (!is_dir($destinationFolder)) { + @mkdir($destinationFolder, 0775, true); + } + + $md5Hash = md5($imageContent); + $filePath = rtrim($destinationFolder, '/\\') . '/' . $md5Hash . '.' . $ext; + $written = @file_put_contents($filePath, $imageContent); + if ($written === false) { + return null; + } + + return $filePath; +} + +function parseMetaContent(string $html, string $attr, string $name): ?string { + $pattern = '/]*' . $attr . '=["\']' . preg_quote($name, '/') . '["\'][^>]*content=["\']([^"\']+)["\'][^>]*>/i'; + if (preg_match($pattern, $html, $matches) && isset($matches[1])) { + return trim(html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8')); + } + return null; +} + +function getPageInfo(string $url): array { + $ret = [ + 'ok' => false, + 'title' => '', + 'description' => '', + 'logo' => null, + 'error' => null + ]; + + $normalized = normalizeUrl($url); + if ($normalized === null) { + $ret['error'] = 'ungueltige_url'; + return $ret; + } + + $html = safeFetch($normalized, 10); + if ($html === null) { + $ret['error'] = 'seite_nicht_erreichbar'; + return $ret; + } + + $title = parseMetaContent($html, 'property', 'og:title') ?? parseMetaContent($html, 'name', 'title'); + $description = parseMetaContent($html, 'property', 'og:description') ?? parseMetaContent($html, 'name', 'description'); + $image = parseMetaContent($html, 'property', 'og:image') ?? parseMetaContent($html, 'name', 'image'); + + if ($title === null && preg_match('/\s*(.*?)\s*<\/title>/is', $html, $matchTitle)) { + $title = trim(html_entity_decode($matchTitle[1], ENT_QUOTES | ENT_HTML5, 'UTF-8')); + } + + $logo = null; + if ($image !== null && $image !== '') { + $img = downloadImageFromUrl($image, $normalized); + if ($img !== null) { + $logo = '/' . ltrim($img, '/'); + } + } + + $ret['ok'] = true; + $ret['title'] = $title ?? ''; + $ret['description'] = $description ?? ''; + $ret['logo'] = $logo; + return $ret; +} + +function sanitizeTags($input): array { + if (!is_array($input)) { + return []; + } + + $ret = []; + foreach ($input as $tag) { + if (!is_string($tag)) { + continue; + } + $clean = onlyAlpha(trim($tag), '_\-'); + if ($clean === '') { + continue; + } + $clean = ucfirst(substr($clean, 0, 35)); + $ret[$clean] = true; + if (count($ret) >= 20) { + break; + } + } + return array_keys($ret); +} +