188 lines
5.2 KiB
PHP
188 lines
5.2 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
require_once __DIR__ . '/http-limits.php';
|
|
|
|
function httpContext(?int $timeout = null) {
|
|
$limits = httpLimits();
|
|
$resolvedTimeout = $timeout === null ? $limits['timeout'] : max(1, $timeout);
|
|
return stream_context_create([
|
|
'http' => [
|
|
'timeout' => $resolvedTimeout,
|
|
'follow_location' => 1,
|
|
'max_redirects' => $limits['max_redirects'],
|
|
'user_agent' => $limits['user_agent'],
|
|
'ignore_errors' => true
|
|
],
|
|
'ssl' => [
|
|
'verify_peer' => true,
|
|
'verify_peer_name' => true
|
|
]
|
|
]);
|
|
}
|
|
|
|
function normalizeUrl(string $url): ?string {
|
|
$url = trim($url);
|
|
if (!filter_var($url, FILTER_VALIDATE_URL)) {
|
|
return null;
|
|
}
|
|
|
|
$parts = parse_url($url);
|
|
if (!$parts || !isset($parts['scheme'])) {
|
|
return null;
|
|
}
|
|
|
|
$scheme = strtolower($parts['scheme']);
|
|
if ($scheme !== 'http' && $scheme !== 'https') {
|
|
return null;
|
|
}
|
|
|
|
return $url;
|
|
}
|
|
|
|
function resolveUrl(string $url, string $baseUrl): ?string {
|
|
$url = trim($url);
|
|
if (filter_var($url, FILTER_VALIDATE_URL)) {
|
|
return $url;
|
|
}
|
|
|
|
$baseParts = parse_url($baseUrl);
|
|
if (!$baseParts || !isset($baseParts['scheme']) || !isset($baseParts['host'])) {
|
|
return null;
|
|
}
|
|
|
|
if (strpos($url, '//') === 0) {
|
|
return $baseParts['scheme'] . ':' . $url;
|
|
}
|
|
|
|
$path = '/';
|
|
if (!empty($baseParts['path'])) {
|
|
$path = preg_replace('#/[^/]*$#', '/', $baseParts['path']);
|
|
if ($path === null || $path === '') {
|
|
$path = '/';
|
|
}
|
|
}
|
|
|
|
if (strlen($url) && $url[0] === '/') {
|
|
return $baseParts['scheme'] . '://' . $baseParts['host'] . $url;
|
|
}
|
|
|
|
return $baseParts['scheme'] . '://' . $baseParts['host'] . $path . $url;
|
|
}
|
|
|
|
function safeFetch(string $url, ?int $timeout = null): ?string {
|
|
$limits = httpLimits();
|
|
$ctx = httpContext($timeout);
|
|
$content = @file_get_contents($url, false, $ctx);
|
|
if ($content === false || strlen($content) > $limits['max_bytes']) {
|
|
return null;
|
|
}
|
|
return $content;
|
|
}
|
|
|
|
function downloadImageFromUrl(string $url, string $baseUrl, string $destinationFolder = 'upl/'): ?string {
|
|
$resolved = resolveUrl($url, $baseUrl);
|
|
if ($resolved === null) {
|
|
return null;
|
|
}
|
|
|
|
if (!preg_match('/\.(jpg|jpeg|png|gif|bmp|webp)(?:\?|#|$)/i', $resolved)) {
|
|
return null;
|
|
}
|
|
|
|
$imageContent = safeFetch($resolved);
|
|
if ($imageContent === null || strlen($imageContent) === 0) {
|
|
return null;
|
|
}
|
|
|
|
$path = parse_url($resolved, PHP_URL_PATH) ?? '';
|
|
$ext = strtolower(pathinfo($path, PATHINFO_EXTENSION));
|
|
if ($ext === '' || !preg_match('/^(jpg|jpeg|png|gif|bmp|webp)$/', $ext)) {
|
|
$ext = 'png';
|
|
}
|
|
|
|
if (!is_dir($destinationFolder)) {
|
|
@mkdir($destinationFolder, 0775, true);
|
|
}
|
|
|
|
$md5Hash = md5($imageContent);
|
|
$filePath = rtrim($destinationFolder, '/\\') . '/' . $md5Hash . '.' . $ext;
|
|
$written = @file_put_contents($filePath, $imageContent);
|
|
if ($written === false) {
|
|
return null;
|
|
}
|
|
|
|
return $filePath;
|
|
}
|
|
|
|
function parseMetaContent(string $html, string $attr, string $name): ?string {
|
|
$pattern = '/<meta[^>]*' . $attr . '=["\']' . preg_quote($name, '/') . '["\'][^>]*content=["\']([^"\']+)["\'][^>]*>/i';
|
|
if (preg_match($pattern, $html, $matches) && isset($matches[1])) {
|
|
return trim(html_entity_decode($matches[1], ENT_QUOTES | ENT_HTML5, 'UTF-8'));
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function getPageInfo(string $url): array {
|
|
$ret = [
|
|
'ok' => false,
|
|
'title' => '',
|
|
'description' => '',
|
|
'logo' => null,
|
|
'error' => null
|
|
];
|
|
|
|
$normalized = normalizeUrl($url);
|
|
if ($normalized === null) {
|
|
$ret['error'] = 'ungueltige_url';
|
|
return $ret;
|
|
}
|
|
|
|
$html = safeFetch($normalized);
|
|
if ($html === null) {
|
|
$ret['error'] = 'seite_nicht_erreichbar';
|
|
return $ret;
|
|
}
|
|
|
|
$title = parseMetaContent($html, 'property', 'og:title') ?? parseMetaContent($html, 'name', 'title');
|
|
$description = parseMetaContent($html, 'property', 'og:description') ?? parseMetaContent($html, 'name', 'description');
|
|
$image = parseMetaContent($html, 'property', 'og:image') ?? parseMetaContent($html, 'name', 'image');
|
|
|
|
if ($title === null && preg_match('/<title>\s*(.*?)\s*<\/title>/is', $html, $matchTitle)) {
|
|
$title = trim(html_entity_decode($matchTitle[1], ENT_QUOTES | ENT_HTML5, 'UTF-8'));
|
|
}
|
|
|
|
$logo = null;
|
|
if ($image !== null && $image !== '') {
|
|
$img = downloadImageFromUrl($image, $normalized);
|
|
if ($img !== null) {
|
|
$logo = '/' . ltrim($img, '/');
|
|
}
|
|
}
|
|
|
|
$ret['ok'] = true;
|
|
$ret['title'] = $title ?? '';
|
|
$ret['description'] = $description ?? '';
|
|
$ret['logo'] = $logo;
|
|
return $ret;
|
|
}
|
|
|
|
function sanitizeTags(array $input): array {
|
|
$ret = [];
|
|
foreach ($input as $tag) {
|
|
if (!is_string($tag)) {
|
|
continue;
|
|
}
|
|
$clean = onlyAlpha(trim($tag), '_\-');
|
|
if ($clean === '') {
|
|
continue;
|
|
}
|
|
$clean = ucfirst(substr($clean, 0, 35));
|
|
$ret[$clean] = true;
|
|
if (count($ret) >= 20) {
|
|
break;
|
|
}
|
|
}
|
|
return array_keys($ret);
|
|
}
|
|
|