2 Commits

Author SHA1 Message Date
Troy Grunt
644e03da9b Replace fragile HTML allowlist sanitizer 2026-02-15 15:01:50 +01:00
Troy Grunt
cb115bac40 Centralize HTTP limits 2026-02-15 14:57:05 +01:00
6 changed files with 168 additions and 159 deletions

View File

@@ -43,14 +43,6 @@
- Optionales Host-Allowlist-Feature vorhanden. - Optionales Host-Allowlist-Feature vorhanden.
- Tests fuer geblockte und erlaubte Ziele vorhanden. - Tests fuer geblockte und erlaubte Ziele vorhanden.
- #TODO Centralize HTTP limits (timeout/redirect/size)
- Aufwand: `S`
- Labels: `robustness`, `network`
- Akzeptanzkriterien:
- Eine zentrale Konfiguration fuer HTTP-Limits.
- `og.php` und `link-meta.php` nutzen dieselben Limits.
- Default-Werte sind in README dokumentiert.
- #TODO Improve SQL error handling + logging - #TODO Improve SQL error handling + logging
- Aufwand: `M` - Aufwand: `M`
- Labels: `sql`, `robustness` - Labels: `sql`, `robustness`
@@ -59,19 +51,20 @@
- Fehler enthalten Query-Kontext ohne Secrets. - Fehler enthalten Query-Kontext ohne Secrets.
- Verhalten entspricht der definierten Error-Strategie. - Verhalten entspricht der definierten Error-Strategie.
- #TODO Replace fragile HTML allowlist sanitizer - Code-Qualitaet (aufgeteilt in Unter-Issues)
- Aufwand: `M`
- Labels: `security`, `string`
- Akzeptanzkriterien:
- `onlySimpleHTML()` wird durch robusteren Ansatz ersetzt.
- Erlaubte Tags sind konfigurierbar dokumentiert.
- Regression-Tests fuer typische Eingaben vorhanden.
- #TODO Code-Qualitaet
- Sammel-Issue: Naming-Konvention, SQL-Binding-Refactor, Legacy-Markierung, Markdown-Konsolidierung, klare Modulgrenzen.
- Aufwand: `L` - Aufwand: `L`
- Empfehlung: in 3-5 Unter-Issues aufteilen. - Labels: `quality`, `refactor`
- Unter-Issues:
- Define and enforce naming conventions for functions, files and constants.
- Refactor SQL binding helpers to one consistent, typed API surface.
- Mark legacy functions/modules (`@deprecated`) and document replacement path.
- Consolidate Markdown docs (README + API notes) into one canonical structure.
- Clarify module boundaries and ownership (I/O, SQL, parsing, formatting).
- Akzeptanzkriterien:
- Kurzer Styleguide in `README.md` vorhanden und auf bestehende Dateien angewendet.
- Keine neuen Legacy-Einstiege ohne Markierung und Migrationshinweis.
- SQL-Helper nutzen einheitliche Signaturen in geaenderten Modulen.
- Modulgrenzen sind in Doku und Dateistruktur konsistent nachvollziehbar.
- #TODO Tests und Tooling - #TODO Tests und Tooling

View File

@@ -36,6 +36,7 @@ echo decade(12345); // "12.345 K" (je nach PHP-Konvertie
- `numbers.php`: Zahlen-Helfer (`decade`, `onlyNumeric`) - `numbers.php`: Zahlen-Helfer (`decade`, `onlyNumeric`)
- `sql.php`: Klasse `SQL` fuer Datenbankzugriffe (`get`, `single`, `list`, `keyval`, `set`) - `sql.php`: Klasse `SQL` fuer Datenbankzugriffe (`get`, `single`, `list`, `keyval`, `set`)
- `mail.php`: Mailfunktionen (`send_mail`, `send_html_mail`, `send_php_mail`) - `mail.php`: Mailfunktionen (`send_mail`, `send_html_mail`, `send_php_mail`)
- `http-limits.php`: Zentrale HTTP-Limits (`httpLimits`)
- `link-meta.php`: URL-Validierung, Fetching, Meta-Parsing, Bilddownload, Tag-Sanitization - `link-meta.php`: URL-Validierung, Fetching, Meta-Parsing, Bilddownload, Tag-Sanitization
- `og.php`: Einfacher OG-Scan (`scanOG`) - `og.php`: Einfacher OG-Scan (`scanOG`)
- `troy-api.php`: API-Helfer fuer Troy/Gitea (`sendToTroy`, `sendToGitea`) - `troy-api.php`: API-Helfer fuer Troy/Gitea (`sendToTroy`, `sendToGitea`)
@@ -63,6 +64,21 @@ $giteaRepo = 'repo';
$giteaToken = 'token'; $giteaToken = 'token';
``` ```
HTTP-Defaults fuer Netzwerkmodule (`link-meta.php`, `og.php`):
- `LIB_HTTP_TIMEOUT = 8` (Sekunden)
- `LIB_HTTP_MAX_REDIRECTS = 4`
- `LIB_HTTP_MAX_BYTES = 5242880` (5 MiB)
Optional vor dem Include ueberschreiben:
```php
<?php
define('LIB_HTTP_TIMEOUT', 10);
define('LIB_HTTP_MAX_REDIRECTS', 5);
define('LIB_HTTP_MAX_BYTES', 8 * 1024 * 1024);
```
## Runnable Examples ## Runnable Examples
### `string.php` ### `string.php`

24
http-limits.php Normal file
View File

@@ -0,0 +1,24 @@
<?php
declare(strict_types=1);
if (!defined('LIB_HTTP_TIMEOUT')) {
define('LIB_HTTP_TIMEOUT', 8);
}
if (!defined('LIB_HTTP_MAX_REDIRECTS')) {
define('LIB_HTTP_MAX_REDIRECTS', 4);
}
if (!defined('LIB_HTTP_MAX_BYTES')) {
define('LIB_HTTP_MAX_BYTES', 5 * 1024 * 1024);
}
function httpLimits(): array {
return [
'timeout' => max(1, (int) LIB_HTTP_TIMEOUT),
'max_redirects' => max(0, (int) LIB_HTTP_MAX_REDIRECTS),
'max_bytes' => max(1, (int) LIB_HTTP_MAX_BYTES),
'user_agent' => 'star-citizen.de-linkbot/1.0'
];
}

View File

@@ -1,13 +1,16 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
require_once __DIR__ . '/http-limits.php';
function httpContext(int $timeout = 8) { function httpContext(?int $timeout = null) {
$limits = httpLimits();
$resolvedTimeout = $timeout === null ? $limits['timeout'] : max(1, $timeout);
return stream_context_create([ return stream_context_create([
'http' => [ 'http' => [
'timeout' => $timeout, 'timeout' => $resolvedTimeout,
'follow_location' => 1, 'follow_location' => 1,
'max_redirects' => 4, 'max_redirects' => $limits['max_redirects'],
'user_agent' => 'star-citizen.de-linkbot/1.0', 'user_agent' => $limits['user_agent'],
'ignore_errors' => true 'ignore_errors' => true
], ],
'ssl' => [ 'ssl' => [
@@ -66,10 +69,14 @@ function resolveUrl(string $url, string $baseUrl): ?string {
return $baseParts['scheme'] . '://' . $baseParts['host'] . $path . $url; return $baseParts['scheme'] . '://' . $baseParts['host'] . $path . $url;
} }
function safeFetch(string $url, int $timeout = 8): ?string { function safeFetch(string $url, ?int $timeout = null): ?string {
$limits = httpLimits();
$ctx = httpContext($timeout); $ctx = httpContext($timeout);
$content = @file_get_contents($url, false, $ctx); $content = @file_get_contents($url, false, $ctx);
return $content === false ? null : $content; if ($content === false || strlen($content) > $limits['max_bytes']) {
return null;
}
return $content;
} }
function downloadImageFromUrl(string $url, string $baseUrl, string $destinationFolder = 'upl/'): ?string { function downloadImageFromUrl(string $url, string $baseUrl, string $destinationFolder = 'upl/'): ?string {
@@ -82,8 +89,8 @@ function downloadImageFromUrl(string $url, string $baseUrl, string $destinationF
return null; return null;
} }
$imageContent = safeFetch($resolved, 10); $imageContent = safeFetch($resolved);
if ($imageContent === null || strlen($imageContent) === 0 || strlen($imageContent) > (5 * 1024 * 1024)) { if ($imageContent === null || strlen($imageContent) === 0) {
return null; return null;
} }
@@ -130,7 +137,7 @@ function getPageInfo(string $url): array {
return $ret; return $ret;
} }
$html = safeFetch($normalized, 10); $html = safeFetch($normalized);
if ($html === null) { if ($html === null) {
$ret['error'] = 'seite_nicht_erreichbar'; $ret['error'] = 'seite_nicht_erreichbar';
return $ret; return $ret;

20
og.php
View File

@@ -1,9 +1,27 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
require_once __DIR__ . '/http-limits.php';
function scanOG(string $url): array { function scanOG(string $url): array {
$og = array(); $og = array();
$html = file_get_contents($url); $limits = httpLimits();
$ctx = stream_context_create([
'http' => [
'timeout' => $limits['timeout'],
'follow_location' => 1,
'max_redirects' => $limits['max_redirects'],
'user_agent' => $limits['user_agent'],
'ignore_errors' => true
],
'ssl' => [
'verify_peer' => true,
'verify_peer_name' => true
]
]);
$html = @file_get_contents($url, false, $ctx);
if ($html === false || strlen($html) > $limits['max_bytes']) {
return $og;
}
$re = '/<meta (name|property)=("|\')(.*?)("|\').*?content=("|\')(.*?)("|\')/m'; $re = '/<meta (name|property)=("|\')(.*?)("|\').*?content=("|\')(.*?)("|\')/m';
preg_match_all($re, $html, $matches, PREG_SET_ORDER, 0); preg_match_all($re, $html, $matches, PREG_SET_ORDER, 0);

View File

@@ -129,137 +129,88 @@ function markUp(string $text): string {
} }
return $r; return $r;
} }
function onlySimpleHTML(string $s): string { function onlySimpleHTML(string $s, ?array $allowedTags = null): string {
$s = str_replace ( array ( if ($s === '') {
'<', return '';
'>' }
), array (
'{{|-&lt;-|}}',
'{{|-&gt;-|}}'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}b{{|-&gt;-|}}',
'{{|-&lt;-|}}b/{{|-&gt;-|}}'
), array (
'<b>',
'<b/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}u{{|-&gt;-|}}',
'{{|-&lt;-|}}u/{{|-&gt;-|}}'
), array (
'<u>',
'<u/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}i{{|-&gt;-|}}',
'{{|-&lt;-|}}i/{{|-&gt;-|}}'
), array (
'<i>',
'<i/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}span{{|-&gt;-|}}',
'{{|-&lt;-|}}span/{{|-&gt;-|}}'
), array (
'<span>',
'<span/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}b{{|-&gt;-|}}',
'{{|-&lt;-|}}b/{{|-&gt;-|}}'
), array (
'<b>',
'<b/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}br{{|-&gt;-|}}',
'{{|-&lt;-|}}br/{{|-&gt;-|}}'
), array (
'<br>',
'<br/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}h1{{|-&gt;-|}}',
'{{|-&lt;-|}}h1/{{|-&gt;-|}}'
), array (
'<h1>',
'<h1/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}h2{{|-&gt;-|}}',
'{{|-&lt;-|}}h2/{{|-&gt;-|}}'
), array (
'<h2>',
'<h2/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}h3{{|-&gt;-|}}',
'{{|-&lt;-|}}h3/{{|-&gt;-|}}'
), array (
'<h3>',
'<h3/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}h4{{|-&gt;-|}}',
'{{|-&lt;-|}}h4/{{|-&gt;-|}}'
), array (
'<h4>',
'<h4/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}h5{{|-&gt;-|}}',
'{{|-&lt;-|}}h5/{{|-&gt;-|}}'
), array (
'<h5>',
'<h5/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}h6{{|-&gt;-|}}',
'{{|-&lt;-|}}h6/{{|-&gt;-|}}'
), array (
'<h6>',
'<h6/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}li{{|-&gt;-|}}',
'{{|-&lt;-|}}li/{{|-&gt;-|}}'
), array (
'<li>',
'<li/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}ul{{|-&gt;-|}}',
'{{|-&lt;-|}}ul/{{|-&gt;-|}}'
), array (
'<ul>',
'<ul/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}ol{{|-&gt;-|}}',
'{{|-&lt;-|}}ol/{{|-&gt;-|}}'
), array (
'<ol>',
'<ol/>'
), $s );
$s = str_replace ( array (
'{{|-&lt;-|}}pre{{|-&gt;-|}}',
'{{|-&lt;-|}}pre/{{|-&gt;-|}}'
), array (
'<pre>',
'<pre/>'
), $s );
// cleanup if ($allowedTags === null) {
$s = str_replace ( array ( $allowedTags = array (
'{{|-', 'b',
'-|}}' 'u',
), array ( 'i',
'', 'span',
'' 'br',
), $s ); 'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'li',
'ul',
'ol',
'pre'
);
}
return $s; $allow = array_fill_keys ( array_map ( 'strtolower', $allowedTags ), true );
$selfClosing = array (
'br' => true
);
$parts = preg_split ( '/(<[^>]*>)/', $s, - 1, PREG_SPLIT_DELIM_CAPTURE );
if ($parts === false) {
return htmlspecialchars ( $s, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8' );
}
$out = '';
foreach ( $parts as $part ) {
if ($part === '') {
continue;
}
if ($part[0] !== '<') {
$out .= $part;
continue;
}
if (preg_match ( '/^<\s*(\/?)\s*([a-z0-9]+)\s*(\/?)\s*>$/i', $part, $m ) !== 1) {
$out .= htmlspecialchars ( $part, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8' );
continue;
}
$isClose = ($m[1] === '/');
$tag = strtolower ( $m[2] );
$isSelfClose = ($m[3] === '/');
if (! isset ( $allow[$tag] )) {
$out .= htmlspecialchars ( $part, ENT_QUOTES | ENT_SUBSTITUTE, 'UTF-8' );
continue;
}
if ($isClose) {
if (isset ( $selfClosing[$tag] )) {
continue;
}
$out .= "</{$tag}>";
continue;
}
if ($isSelfClose && ! isset ( $selfClosing[$tag] )) {
$out .= "</{$tag}>";
continue;
}
if (isset ( $selfClosing[$tag] )) {
$out .= "<{$tag}>";
continue;
}
$out .= "<{$tag}>";
}
return $out;
} }
function linkify(string $input): string { function linkify(string $input): string {
$pattern = '@(http(s)?://[a-zA-Z0-9/\.\#\-\_]*)@'; $pattern = '@(http(s)?://[a-zA-Z0-9/\.\#\-\_]*)@';