Skip to content

Commit a822bf5

Browse files
committed
se o dominio usa selenium, ignorar outras tentativas
1 parent e4e29af commit a822bf5

File tree

1 file changed

+32
-35
lines changed

1 file changed

+32
-35
lines changed

app/inc/URLAnalyzer.php

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -134,45 +134,42 @@ public function analyze($url)
134134
// 4. Verifica se deve usar Selenium
135135
$domainRules = $this->getDomainRules($host);
136136
if (isset($domainRules['useSelenium']) && $domainRules['useSelenium'] === true) {
137-
try {
138-
$content = $this->fetchFromSelenium($cleanUrl);
139-
if (!empty($content)) {
140-
$processedContent = $this->processContent($content, $host, $cleanUrl);
141-
$this->cache->set($cleanUrl, $processedContent);
142-
return $processedContent;
143-
}
144-
} catch (Exception $e) {
145-
$this->logError($cleanUrl, "Selenium fetch error: " . $e->getMessage());
146-
}
147-
} else {
148-
// 5. Tenta buscar conteúdo diretamente
149-
try {
150-
$content = $this->fetchContent($cleanUrl);
151-
if (!empty($content)) {
152-
$processedContent = $this->processContent($content, $host, $cleanUrl);
153-
$this->cache->set($cleanUrl, $processedContent);
154-
return $processedContent;
155-
}
156-
} catch (Exception $e) {
157-
$this->logError($cleanUrl, "Direct fetch error: " . $e->getMessage());
137+
$content = $this->fetchFromSelenium($cleanUrl);
138+
if (!empty($content)) {
139+
$processedContent = $this->processContent($content, $host, $cleanUrl);
140+
$this->cache->set($cleanUrl, $processedContent);
141+
return $processedContent;
158142
}
159143

160-
// 6. Tenta buscar do Wayback Machine como fallback
161-
try {
162-
$content = $this->fetchFromWaybackMachine($cleanUrl);
163-
if (!empty($content)) {
164-
$processedContent = $this->processContent($content, $host, $cleanUrl);
165-
$this->cache->set($cleanUrl, $processedContent);
166-
return $processedContent;
167-
}
168-
} catch (Exception $e) {
169-
$this->logError($cleanUrl, "Wayback Machine error: " . $e->getMessage());
170-
}
144+
$this->logError($cleanUrl, "Selenium fetch error: " . $e->getMessage());
145+
throw new Exception("Não foi possível obter o conteúdo via Selenium");
146+
}
171147

172-
throw new Exception("Não foi possível obter o conteúdo da URL");
148+
// 5. Tenta buscar conteúdo diretamente
149+
try {
150+
$content = $this->fetchContent($cleanUrl);
151+
if (!empty($content)) {
152+
$processedContent = $this->processContent($content, $host, $cleanUrl);
153+
$this->cache->set($cleanUrl, $processedContent);
154+
return $processedContent;
155+
}
156+
} catch (Exception $e) {
157+
$this->logError($cleanUrl, "Direct fetch error: " . $e->getMessage());
173158
}
174159

160+
// 6. Tenta buscar do Wayback Machine como fallback
161+
try {
162+
$content = $this->fetchFromWaybackMachine($cleanUrl);
163+
if (!empty($content)) {
164+
$processedContent = $this->processContent($content, $host, $cleanUrl);
165+
$this->cache->set($cleanUrl, $processedContent);
166+
return $processedContent;
167+
}
168+
} catch (Exception $e) {
169+
$this->logError($cleanUrl, "Wayback Machine error: " . $e->getMessage());
170+
}
175171

172+
throw new Exception("Não foi possível obter o conteúdo da URL");
176173
}
177174

178175
/**
@@ -187,8 +184,8 @@ private function fetchFromSelenium($url)
187184
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
188185

189186
$profile = new FirefoxProfile();
190-
$profile->setPreference("permissions.default.image", 2);
191-
$profile->setPreference("javascript.enabled", true);
187+
$profile->setPreference("permissions.default.image", 2); // Não carrega imagens
188+
$profile->setPreference("javascript.enabled", true); // Mantem habilitado javascripts
192189

193190
$options = new FirefoxOptions();
194191
$options->setProfile($profile);

0 commit comments

Comments
 (0)