Skip to content

Commit c9e59df

Browse files
committed
removido logs e liberado selenium como ultimo fallback
1 parent 775b6bd commit c9e59df

File tree

3 files changed

+17
-3
lines changed

3 files changed

+17
-3
lines changed

TESTED_URLS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ https://oantagonista.com.br/brasil/lewandowski-insiste-na-pec-da-seguranca/
3939
https://jornaldebrasilia.com.br/noticias/politica-e-poder/lula-aguarda-pt-para-troca-em-pastas-chefiadas-por-petistas-em-reforma-ministerial/
4040
https://opopular.com.br/cidades/ex-secretario-de-saude-de-goiania-deixa-hospital-e-volta-para-a-cadeia-1.3207162
4141
https://www.cartacapital.com.br/politica/surpresa-natalina/
42+
https://seucreditodigital.com.br/123milhas-devera-apresentar-plano-de-recuperacao-ainda-este-mes/
43+
https://www.matinaljornalismo.com.br/matinal/reportagem-matinal/vazao-guaiba-porto-alegre/
4244

4345
## Internacional
4446
https://www.nytimes.com/2024/11/20/us/politics/matt-gaetz-venmo-payments-sex.html

app/data/blocked_domains.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
'utppublishing.com',
1717
'chronicle.com',
1818
'nexojornal.com',
19+
'nexojornal.com.br',
1920
'lesoir.be',
2021
'weeklytimesnow.com.au',
2122
'barrons.com',

app/inc/URLAnalyzer.php

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ public function analyze($url)
123123
$host = preg_replace('/^www\./', '', $host);
124124

125125
if (in_array($host, BLOCKED_DOMAINS)) {
126-
Logger::getInstance()->log($cleanUrl, 'BLOCKED_DOMAIN');
127126
throw new Exception('Este domínio está bloqueado para extração.');
128127
}
129128

@@ -153,7 +152,7 @@ public function analyze($url)
153152
return $processedContent;
154153
}
155154
} catch (Exception $e) {
156-
Logger::getInstance()->log($cleanUrl, 'DIRECT_FETCH_ERROR', $e->getMessage());
155+
error_log("DIRECT_FETCH_ERROR: " . $e->getMessage());
157156
}
158157

159158
// 6. Tenta buscar do Wayback Machine como fallback
@@ -165,7 +164,19 @@ public function analyze($url)
165164
return $processedContent;
166165
}
167166
} catch (Exception $e) {
168-
Logger::getInstance()->log($cleanUrl, 'WAYBACK_FETCH_ERROR', $e->getMessage());
167+
error_log("WAYBACK_FETCH_ERROR: " . $e->getMessage());
168+
}
169+
170+
// 7. Tenta buscar com Selenium como fallback
171+
try {
172+
$content = $this->fetchFromSelenium($cleanUrl, 'firefox');
173+
if (!empty($content)) {
174+
$processedContent = $this->processContent($content, $host, $cleanUrl);
175+
$this->cache->set($cleanUrl, $processedContent);
176+
return $processedContent;
177+
}
178+
} catch (Exception $e) {
179+
error_log("SELENIUM_ERROR: " . $e->getMessage());
169180
}
170181

171182
Logger::getInstance()->log($cleanUrl, 'GENERAL_FETCH_ERROR');

0 commit comments

Comments
 (0)