Skip to content

Commit d1582a3

Browse files
committed
adicionado o referer nas chamadas do selenium
1 parent 5fbd397 commit d1582a3

File tree

2 files changed

+64
-20
lines changed

2 files changed

+64
-20
lines changed

app/data/domain_rules.php

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -129,16 +129,16 @@
129129
],
130130
'scriptTagRemove' => ['wrapperMessagingWithoutDetection.js'],
131131
'customCode' => '
132-
var artBodyContainer = document.querySelector("article.article");
133-
var artBody = artBodyContainer.innerHTML;
134-
checkPaywall();
135-
function checkPaywall() {
136-
let paywallBox = document.querySelector(".layout-article-regwall");
137-
if (paywallBox) {
138-
artBodyContainer.innerHTML = artBody;
139-
}
132+
var artBodyContainer = document.querySelector("article.article");
133+
var artBody = artBodyContainer.innerHTML;
134+
checkPaywall();
135+
function checkPaywall() {
136+
let paywallBox = document.querySelector(".layout-article-regwall");
137+
if (paywallBox) {
138+
artBodyContainer.innerHTML = artBody;
140139
}
141-
'
140+
}
141+
'
142142
],
143143
'ft.com' => [
144144
'cookies' => [
@@ -150,9 +150,47 @@ function checkPaywall() {
150150
]
151151
],
152152
'nytimes.com' => [
153-
'cookies' => [
154-
'nyt-gdpr' => '1',
155-
'nyt-purr' => 'cfh'
153+
'useSelenium' => true,
154+
'excludeGlobalRules' => [
155+
'scriptTagRemove' => [
156+
'gtm.js',
157+
'ga.js',
158+
'fbevents.js',
159+
'pixel.js',
160+
'chartbeat',
161+
'analytics.js',
162+
'cmp.js',
163+
'wall.js',
164+
'paywall.js',
165+
'subscriber.js',
166+
'piano.js',
167+
'tiny.js',
168+
'pywll.js',
169+
'content-gate.js',
170+
'signwall.js',
171+
'pw.js',
172+
'pw-',
173+
'piano-',
174+
'tinypass',
175+
'tp.min.js',
176+
'premium.js',
177+
'amp-access-0.1.js',
178+
'zephrBarriersScripts',
179+
'leaky-paywall',
180+
'cookie',
181+
'gdpr',
182+
'lgpd',
183+
'push',
184+
'sw.js',
185+
'stats.js',
186+
'piano.io',
187+
'onesignal.com',
188+
'getsitecontrol.com',
189+
'navdmp.com',
190+
'getblue.io',
191+
'smartocto.com',
192+
'cdn.pn.vg'
193+
]
156194
]
157195
],
158196
'correio24horas.com.br' => [

app/inc/URLAnalyzer.php

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,17 @@ public function analyze($url)
134134
// 4. Verifica se deve usar Selenium
135135
$domainRules = $this->getDomainRules($host);
136136
if (isset($domainRules['useSelenium']) && $domainRules['useSelenium'] === true) {
137-
$content = $this->fetchFromSelenium($cleanUrl);
138-
if (!empty($content)) {
139-
$processedContent = $this->processContent($content, $host, $cleanUrl);
140-
$this->cache->set($cleanUrl, $processedContent);
141-
return $processedContent;
137+
try {
138+
$content = $this->fetchFromSelenium($cleanUrl);
139+
if (!empty($content)) {
140+
$processedContent = $this->processContent($content, $host, $cleanUrl);
141+
$this->cache->set($cleanUrl, $processedContent);
142+
return $processedContent;
143+
}
144+
} catch (Exception $e) {
145+
$this->logError($cleanUrl, "Selenium fetch error: " . $e->getMessage());
146+
throw new Exception("Não foi possível obter o conteúdo via Selenium");
142147
}
143-
144-
$this->logError($cleanUrl, "Selenium fetch error: " . $e->getMessage());
145-
throw new Exception("Não foi possível obter o conteúdo via Selenium");
146148
}
147149

148150
// 5. Tenta buscar conteúdo diretamente
@@ -186,6 +188,10 @@ private function fetchFromSelenium($url)
186188
$profile = new FirefoxProfile();
187189
$profile->setPreference("permissions.default.image", 2); // Não carrega imagens
188190
$profile->setPreference("javascript.enabled", true); // Mantem habilitado javascripts
191+
$profile->setPreference("network.http.referer.defaultPolicy", 0); // Sempre envia referer
192+
$profile->setPreference("network.http.referer.defaultReferer", "https://www.google.com.br"); // Define referer padrão
193+
$profile->setPreference("network.http.referer.spoofSource", true); // Permite spoofing do referer
194+
$profile->setPreference("network.http.referer.trimmingPolicy", 0); // Não corta o referer
189195

190196
$options = new FirefoxOptions();
191197
$options->setProfile($profile);

0 commit comments

Comments
 (0)