diff --git a/NEXT_STEPS.md b/NEXT_STEPS.md index 1640e49..3b884b3 100644 --- a/NEXT_STEPS.md +++ b/NEXT_STEPS.md @@ -43,14 +43,6 @@ - Optionales Host-Allowlist-Feature vorhanden. - Tests fuer geblockte und erlaubte Ziele vorhanden. -- #TODO Centralize HTTP limits (timeout/redirect/size) -- Aufwand: `S` -- Labels: `robustness`, `network` -- Akzeptanzkriterien: -- Eine zentrale Konfiguration fuer HTTP-Limits. -- `og.php` und `link-meta.php` nutzen dieselben Limits. -- Default-Werte sind in README dokumentiert. - - #TODO Improve SQL error handling + logging - Aufwand: `M` - Labels: `sql`, `robustness` diff --git a/README.md b/README.md index fecb1cb..1906a3b 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ echo decade(12345); // "12.345 K" (je nach PHP-Konvertie - `numbers.php`: Zahlen-Helfer (`decade`, `onlyNumeric`) - `sql.php`: Klasse `SQL` fuer Datenbankzugriffe (`get`, `single`, `list`, `keyval`, `set`) - `mail.php`: Mailfunktionen (`send_mail`, `send_html_mail`, `send_php_mail`) +- `http-limits.php`: Zentrale HTTP-Limits (`httpLimits`) - `link-meta.php`: URL-Validierung, Fetching, Meta-Parsing, Bilddownload, Tag-Sanitization - `og.php`: Einfacher OG-Scan (`scanOG`) - `troy-api.php`: API-Helfer fuer Troy/Gitea (`sendToTroy`, `sendToGitea`) @@ -63,6 +64,21 @@ $giteaRepo = 'repo'; $giteaToken = 'token'; ``` +HTTP-Defaults fuer Netzwerkmodule (`link-meta.php`, `og.php`): + +- `LIB_HTTP_TIMEOUT = 8` (Sekunden) +- `LIB_HTTP_MAX_REDIRECTS = 4` +- `LIB_HTTP_MAX_BYTES = 5242880` (5 MiB) + +Optional vor dem Include ueberschreiben: + +```php + max(1, (int) LIB_HTTP_TIMEOUT), + 'max_redirects' => max(0, (int) LIB_HTTP_MAX_REDIRECTS), + 'max_bytes' => max(1, (int) LIB_HTTP_MAX_BYTES), + 'user_agent' => 'star-citizen.de-linkbot/1.0' + ]; +} + diff --git a/link-meta.php b/link-meta.php index 6c1b249..8d93078 100644 --- a/link-meta.php +++ b/link-meta.php @@ -1,13 +1,16 @@ [ - 'timeout' => $timeout, + 'timeout' => $resolvedTimeout, 'follow_location' => 1, - 'max_redirects' => 4, - 'user_agent' => 'star-citizen.de-linkbot/1.0', + 'max_redirects' => $limits['max_redirects'], + 'user_agent' => $limits['user_agent'], 'ignore_errors' => true ], 'ssl' => [ @@ -66,10 +69,14 @@ function resolveUrl(string $url, string $baseUrl): ?string { return $baseParts['scheme'] . '://' . $baseParts['host'] . $path . $url; } -function safeFetch(string $url, int $timeout = 8): ?string { +function safeFetch(string $url, ?int $timeout = null): ?string { + $limits = httpLimits(); $ctx = httpContext($timeout); $content = @file_get_contents($url, false, $ctx); - return $content === false ? null : $content; + if ($content === false || strlen($content) > $limits['max_bytes']) { + return null; + } + return $content; } function downloadImageFromUrl(string $url, string $baseUrl, string $destinationFolder = 'upl/'): ?string { @@ -82,8 +89,8 @@ function downloadImageFromUrl(string $url, string $baseUrl, string $destinationF return null; } - $imageContent = safeFetch($resolved, 10); - if ($imageContent === null || strlen($imageContent) === 0 || strlen($imageContent) > (5 * 1024 * 1024)) { + $imageContent = safeFetch($resolved); + if ($imageContent === null || strlen($imageContent) === 0) { return null; } @@ -130,7 +137,7 @@ function getPageInfo(string $url): array { return $ret; } - $html = safeFetch($normalized, 10); + $html = safeFetch($normalized); if ($html === null) { $ret['error'] = 'seite_nicht_erreichbar'; return $ret; diff --git a/og.php b/og.php index 24a2add..42c77c0 100644 --- a/og.php +++ b/og.php @@ -1,9 +1,27 @@ [ + 'timeout' => $limits['timeout'], + 'follow_location' => 1, + 'max_redirects' => $limits['max_redirects'], + 'user_agent' => $limits['user_agent'], + 'ignore_errors' => true + ], + 'ssl' => [ + 'verify_peer' => true, + 'verify_peer_name' => true + ] + ]); + $html = @file_get_contents($url, false, $ctx); + if ($html === false || strlen($html) > $limits['max_bytes']) { + return $og; + } $re = '/