Skip to content

Commit ab2e596

Browse files
committed
headers migrados para rotas e padronização de sanitização de urls
1 parent 7013b56 commit ab2e596

File tree

4 files changed

+103
-111
lines changed

4 files changed

+103
-111
lines changed

app/inc/URLAnalyzer.php

Lines changed: 23 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -234,33 +234,27 @@ public function analyze($url)
234234
// Reset das regras ativadas para nova análise
235235
$this->activatedRules = [];
236236

237-
// 1. Clean URL / Limpa a URL
238-
$cleanUrl = $this->cleanUrl($url);
239-
if (!$cleanUrl) {
240-
$this->throwError(self::ERROR_INVALID_URL);
241-
}
242-
243-
// 2. Check cache / Verifica cache
244-
if ($this->cache->exists($cleanUrl)) {
245-
return $this->cache->get($cleanUrl);
237+
// 1. Check cache / Verifica cache
238+
if ($this->cache->exists($url)) {
239+
return $this->cache->get($url);
246240
}
247241

248-
// 3. Check blocked domains / Verifica domínios bloqueados
249-
$host = parse_url($cleanUrl, PHP_URL_HOST);
242+
// 2. Check blocked domains / Verifica domínios bloqueados
243+
$host = parse_url($url, PHP_URL_HOST);
250244
if (!$host) {
251245
$this->throwError(self::ERROR_INVALID_URL);
252246
}
253247
$host = preg_replace('/^www\./', '', $host);
254248

255249
if (in_array($host, BLOCKED_DOMAINS)) {
256-
Logger::getInstance()->logUrl($cleanUrl, 'BLOCKED_DOMAIN');
250+
Logger::getInstance()->logUrl($url, 'BLOCKED_DOMAIN');
257251
$this->throwError(self::ERROR_BLOCKED_DOMAIN);
258252
}
259253

260-
// Check URL status code before proceeding
261-
$redirectInfo = $this->checkStatus($cleanUrl);
254+
// 3. Check URL status code before proceeding
255+
$redirectInfo = $this->checkStatus($url);
262256
if ($redirectInfo['httpCode'] !== 200) {
263-
Logger::getInstance()->logUrl($cleanUrl, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
257+
Logger::getInstance()->logUrl($url, 'INVALID_STATUS_CODE', "HTTP {$redirectInfo['httpCode']}");
264258
if ($redirectInfo['httpCode'] === 404) {
265259
$this->throwError(self::ERROR_NOT_FOUND);
266260
} else {
@@ -279,33 +273,33 @@ public function analyze($url)
279273
$content = null;
280274
switch ($fetchStrategy) {
281275
case 'fetchContent':
282-
$content = $this->fetchContent($cleanUrl);
276+
$content = $this->fetchContent($url);
283277
break;
284278
case 'fetchFromWaybackMachine':
285-
$content = $this->fetchFromWaybackMachine($cleanUrl);
279+
$content = $this->fetchFromWaybackMachine($url);
286280
break;
287281
case 'fetchFromSelenium':
288-
$content = $this->fetchFromSelenium($cleanUrl, isset($domainRules['browser']) ? $domainRules['browser'] : 'firefox');
282+
$content = $this->fetchFromSelenium($url, isset($domainRules['browser']) ? $domainRules['browser'] : 'firefox');
289283
break;
290284
}
291285

292286
if (!empty($content)) {
293287
$this->activatedRules[] = "fetchStrategy: $fetchStrategy";
294-
$processedContent = $this->processContent($content, $host, $cleanUrl);
295-
$this->cache->set($cleanUrl, $processedContent);
288+
$processedContent = $this->processContent($content, $host, $url);
289+
$this->cache->set($url, $processedContent);
296290
return $processedContent;
297291
}
298292
} catch (Exception $e) {
299-
Logger::getInstance()->logUrl($cleanUrl, strtoupper($fetchStrategy) . '_ERROR', $e->getMessage());
293+
Logger::getInstance()->logUrl($url, strtoupper($fetchStrategy) . '_ERROR', $e->getMessage());
300294
throw $e;
301295
}
302296
}
303297

304298
// 5. Try all strategies in sequence
305299
$fetchStrategies = [
306-
['method' => 'fetchContent', 'args' => [$cleanUrl]],
307-
['method' => 'fetchFromWaybackMachine', 'args' => [$cleanUrl]],
308-
['method' => 'fetchFromSelenium', 'args' => [$cleanUrl, 'firefox']]
300+
['method' => 'fetchContent', 'args' => [$url]],
301+
['method' => 'fetchFromWaybackMachine', 'args' => [$url]],
302+
['method' => 'fetchFromSelenium', 'args' => [$url, 'firefox']]
309303
];
310304

311305
$lastError = null;
@@ -314,8 +308,8 @@ public function analyze($url)
314308
$content = call_user_func_array([$this, $strategy['method']], $strategy['args']);
315309
if (!empty($content)) {
316310
$this->activatedRules[] = "fetchStrategy: {$strategy['method']}";
317-
$processedContent = $this->processContent($content, $host, $cleanUrl);
318-
$this->cache->set($cleanUrl, $processedContent);
311+
$processedContent = $this->processContent($content, $host, $url);
312+
$this->cache->set($url, $processedContent);
319313
return $processedContent;
320314
}
321315
} catch (Exception $e) {
@@ -326,7 +320,7 @@ public function analyze($url)
326320
}
327321

328322
// If we get here, all strategies failed
329-
Logger::getInstance()->logUrl($cleanUrl, 'GENERAL_FETCH_ERROR');
323+
Logger::getInstance()->logUrl($url, 'GENERAL_FETCH_ERROR');
330324
if ($lastError) {
331325
$message = $lastError->getMessage();
332326
if (strpos($message, 'DNS') !== false) {
@@ -432,8 +426,8 @@ private function fetchContent($url)
432426
*/
433427
private function fetchFromWaybackMachine($url)
434428
{
435-
$cleanUrl = preg_replace('#^https?://#', '', $url);
436-
$availabilityUrl = "https://archive.org/wayback/available?url=" . urlencode($cleanUrl);
429+
$url = preg_replace('#^https?://#', '', $url);
430+
$availabilityUrl = "https://archive.org/wayback/available?url=" . urlencode($url);
437431

438432
$curl = new Curl();
439433
$curl->setOpt(CURLOPT_FOLLOWLOCATION, true);
@@ -552,36 +546,6 @@ private function fetchFromSelenium($url, $browser = 'firefox')
552546
}
553547
}
554548

555-
/**
556-
* Clean and normalize a URL
557-
* Limpa e normaliza uma URL
558-
*/
559-
private function cleanUrl($url)
560-
{
561-
$url = trim($url);
562-
563-
if (!filter_var($url, FILTER_VALIDATE_URL)) {
564-
return false;
565-
}
566-
567-
if (preg_match('#https://([^.]+)\.cdn\.ampproject\.org/v/s/([^/]+)(.*)#', $url, $matches)) {
568-
$url = 'https://' . $matches[2] . $matches[3];
569-
}
570-
571-
$parts = parse_url($url);
572-
if (!isset($parts['scheme']) || !isset($parts['host'])) {
573-
return false;
574-
}
575-
576-
$cleanedUrl = $parts['scheme'] . '://' . $parts['host'];
577-
578-
if (isset($parts['path'])) {
579-
$cleanedUrl .= $parts['path'];
580-
}
581-
582-
return $cleanedUrl;
583-
}
584-
585549
/**
586550
* Get specific rules for a domain
587551
* Obtém regras específicas para um domínio

app/src/Router.php

Lines changed: 77 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,19 @@ public function __construct()
5050
$message_type = '';
5151
$url = '';
5252

53-
// Processa mensagens da query string
54-
// Process query string messages
53+
// Sanitize and process query string messages
5554
if (isset($_GET['message'])) {
56-
$message_key = $_GET['message'];
55+
$message_key = htmlspecialchars(trim($_GET['message']), ENT_QUOTES | ENT_HTML5, 'UTF-8');
5756
$messageData = \Language::getMessage($message_key);
58-
$message = $messageData['message'];
59-
$message_type = $messageData['type'];
57+
$message = htmlspecialchars($messageData['message'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
58+
$message_type = htmlspecialchars($messageData['type'], ENT_QUOTES | ENT_HTML5, 'UTF-8');
6059
}
6160

62-
// Processa submissão do formulário
6361
// Process form submission
6462
if ($_SERVER['REQUEST_METHOD'] === 'POST' && isset($_POST['url'])) {
65-
$url = filter_var($_POST['url'], FILTER_SANITIZE_URL);
63+
$url = $this->sanitizeUrl($_POST['url']);
6664
if (filter_var($url, FILTER_VALIDATE_URL)) {
67-
header('Location: ' . SITE_URL . '/p/' . urlencode($url));
65+
header('Location: ' . SITE_URL . '/p/' . $url);
6866
exit;
6967
} else {
7068
$messageData = \Language::getMessage('INVALID_URL');
@@ -84,7 +82,7 @@ public function __construct()
8482
// Rota da API - usa URLProcessor em modo API
8583
// API route - uses URLProcessor in API mode
8684
$r->addRoute('GET', '/api/{url:.+}', function($vars) {
87-
$processor = new URLProcessor($vars['url'], true);
85+
$processor = new URLProcessor($this->sanitizeUrl($vars['url']), true);
8886
$processor->process();
8987
});
9088

@@ -98,23 +96,23 @@ public function __construct()
9896
// Rota de processamento - usa URLProcessor em modo web
9997
// Processing route - uses URLProcessor in web mode
10098
$r->addRoute('GET', '/p/{url:.+}', function($vars) {
101-
$processor = new URLProcessor($vars['url'], false);
99+
$processor = new URLProcessor($this->sanitizeUrl($vars['url']), false);
102100
$processor->process();
103101
});
104102

105-
// Rota de processamento com query parameter ou sem parâmetros
106103
// Processing route with query parameter or without parameters
107104
$r->addRoute('GET', '/p[/]', function() {
108105
if (isset($_GET['url']) || isset($_GET['text'])) {
109-
$url = isset($_GET['url']) ? $_GET['url'] : '';
110-
$text = isset($_GET['text']) ? $_GET['text'] : '';
106+
// Sanitize input parameters
107+
$url = isset($_GET['url']) ? $this->sanitizeUrl($_GET['url']) : '';
108+
$text = isset($_GET['text']) ? $this->sanitizeUrl($_GET['text']) : '';
111109

112110
// Check which parameter is a valid URL
113111
if (filter_var($url, FILTER_VALIDATE_URL)) {
114-
header('Location: /p/' . urlencode($url));
112+
header('Location: /p/' . $url);
115113
exit;
116114
} elseif (filter_var($text, FILTER_VALIDATE_URL)) {
117-
header('Location: /p/' . urlencode($text));
115+
header('Location: /p/' . $text);
118116
exit;
119117
} else {
120118
header('Location: /?message=INVALID_URL');
@@ -134,11 +132,73 @@ public function __construct()
134132
}
135133

136134
/**
137-
* Despacha a requisição para a rota apropriada
138-
* Dispatches the request to the appropriate route
135+
* Sanitizes URLs to prevent XSS and injection attacks
136+
* Sanitiza URLs para prevenir ataques XSS e injeções
137+
*
138+
* @param string $url The URL to sanitize
139+
* @return string The sanitized URL
139140
*/
141+
/**
142+
* Sanitizes and normalizes URLs
143+
* Sanitiza e normaliza URLs
144+
*
145+
* @param string $url The URL to sanitize and normalize
146+
* @return string|false The cleaned URL or false if invalid
147+
*/
148+
private function sanitizeUrl(string $url): string
149+
{
150+
$url = trim($url);
151+
152+
// Basic URL validation
153+
if (!filter_var($url, FILTER_VALIDATE_URL)) {
154+
return '';
155+
}
156+
157+
// Handle AMP URLs
158+
if (preg_match('#https://([^.]+)\.cdn\.ampproject\.org/v/s/([^/]+)(.*)#', $url, $matches)) {
159+
$url = 'https://' . $matches[2] . $matches[3];
160+
}
161+
162+
// Parse and reconstruct URL to ensure proper structure
163+
$parts = parse_url($url);
164+
if (!isset($parts['scheme']) || !isset($parts['host'])) {
165+
return '';
166+
}
167+
168+
$cleanedUrl = $parts['scheme'] . '://' . $parts['host'];
169+
170+
if (isset($parts['path'])) {
171+
$cleanedUrl .= $parts['path'];
172+
}
173+
174+
// Remove control characters and sanitize
175+
$cleanedUrl = preg_replace('/[\x00-\x1F\x7F]/', '', $cleanedUrl);
176+
$cleanedUrl = filter_var($cleanedUrl, FILTER_SANITIZE_URL);
177+
178+
// Convert special characters to HTML entities
179+
return htmlspecialchars($cleanedUrl, ENT_QUOTES | ENT_HTML5, 'UTF-8');
180+
}
181+
182+
/**
183+
* Sets security headers for all responses
184+
* Define cabeçalhos de segurança para todas as respostas
185+
*/
186+
private function setSecurityHeaders()
187+
{
188+
// Set security headers
189+
header("Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline' https://cdn.tailwindcss.com; style-src 'self' 'unsafe-inline'; img-src 'self' data:;");
190+
header("X-Content-Type-Options: nosniff");
191+
header("X-Frame-Options: DENY");
192+
header("X-XSS-Protection: 1; mode=block");
193+
header("Referrer-Policy: strict-origin-when-cross-origin");
194+
header("Permissions-Policy: geolocation=(), microphone=(), camera=()");
195+
header("Strict-Transport-Security: max-age=31536000; includeSubDomains");
196+
}
197+
140198
public function dispatch()
141199
{
200+
$this->setSecurityHeaders();
201+
142202
$httpMethod = $_SERVER['REQUEST_METHOD'];
143203
$uri = $_SERVER['REQUEST_URI'];
144204

app/src/URLProcessor.php

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public function __construct(string $url = '', bool $isApi = false)
3131
require_once __DIR__ . '/../inc/URLAnalyzer.php';
3232
require_once __DIR__ . '/../inc/Language.php';
3333

34-
$this->url = urldecode($url);
34+
$this->url = $url;
3535
$this->isApi = $isApi;
3636
$this->analyzer = new \URLAnalyzer();
3737

@@ -82,20 +82,6 @@ private function redirect(string $path, string $message = ''): void
8282
*/
8383
public function process(): void
8484
{
85-
// Validate URL format
86-
if (!filter_var($this->url, FILTER_VALIDATE_URL)) {
87-
if ($this->isApi) {
88-
$this->sendApiResponse([
89-
'error' => [
90-
'type' => \URLAnalyzer::ERROR_INVALID_URL,
91-
'message' => \Language::getMessage('INVALID_URL')['message']
92-
]
93-
], 400);
94-
} else {
95-
$this->redirect(SITE_URL, \URLAnalyzer::ERROR_INVALID_URL);
96-
}
97-
}
98-
9985
try {
10086
// Check for redirects in web mode
10187
if (!$this->isApi) {

default.conf

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,26 +11,8 @@ server {
1111
# Oculta a versão do NGINX para reduzir informações expostas
1212
server_tokens off;
1313

14-
# Security Headers / Cabeçalhos de Segurança
15-
# Enable HSTS (HTTP Strict Transport Security) to force HTTPS connections
16-
# Habilita HSTS (HTTP Strict Transport Security) para forçar conexões HTTPS
17-
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
18-
19-
# Prevent clickjacking attacks by allowing the site to be displayed only in its own domain
20-
# Previne ataques de clickjacking, permitindo que o site seja exibido apenas em seu próprio domínio
21-
add_header X-Frame-Options "SAMEORIGIN" always;
22-
23-
# Enable protection against Cross-Site Scripting (XSS) attacks
24-
# Ativa proteção contra ataques de Cross-Site Scripting (XSS)
25-
add_header X-XSS-Protection "1; mode=block" always;
26-
27-
# Prevent browsers from MIME-type sniffing
28-
# Impede que navegadores tentem adivinhar (sniff) o tipo MIME dos arquivos
29-
add_header X-Content-Type-Options "nosniff" always;
30-
31-
# Control how referrer headers are sent
32-
# Controla como os cabeçalhos de referência são enviados
33-
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
14+
# NGINX-specific security configurations
15+
# Configurações de segurança específicas do NGINX
3416

3517
# Limit upload size to prevent denial of service attacks
3618
# Limita o tamanho de uploads para prevenir ataques de negação de serviço

0 commit comments

Comments
 (0)