Skip to content

Commit 02ec5c8

Browse files
committed
adicionada opção de escolher o browser do selenium nas regras
1 parent 1a132e2 commit 02ec5c8

File tree

4 files changed

+50
-16
lines changed

4 files changed

+50
-16
lines changed

app/data/domain_rules.php

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,8 @@
116116
'classElementRemove' => ['header-top-wrapper'],
117117
],
118118
'estadao.com.br' => [
119-
'useSelenium' => true
119+
'useSelenium' => true,
120+
'browser' => 'chrome'
120121
],
121122
'stcatharinesstandard.ca' => [
122123
'useSelenium' => true

app/inc/Rules.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ class Rules
3636
'customCode',
3737
'excludeGlobalRules',
3838
'customStyle',
39-
'useSelenium'
39+
'useSelenium',
40+
'browser'
4041
];
4142

42-
4343
/**
4444
* Obtém o domínio base removendo o prefixo www
4545
*

app/inc/URLAnalyzer.php

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
use Facebook\WebDriver\Remote\RemoteWebDriver;
2222
use Facebook\WebDriver\Firefox\FirefoxOptions;
2323
use Facebook\WebDriver\Firefox\FirefoxProfile;
24+
use Facebook\WebDriver\Chrome\ChromeOptions;
2425

2526
class URLAnalyzer
2627
{
@@ -134,7 +135,7 @@ public function analyze($url)
134135
$domainRules = $this->getDomainRules($host);
135136
if (isset($domainRules['useSelenium']) && $domainRules['useSelenium'] === true) {
136137
try {
137-
$content = $this->fetchFromSelenium($cleanUrl);
138+
$content = $this->fetchFromSelenium($cleanUrl, isset($domainRules['browser']) ? $domainRules['browser'] : 'firefox');
138139
if (!empty($content)) {
139140
$processedContent = $this->processContent($content, $host, $cleanUrl);
140141
$this->cache->set($cleanUrl, $processedContent);
@@ -177,26 +178,42 @@ public function analyze($url)
177178
* Tenta obter o conteúdo da URL usando Selenium
178179
*
179180
* @param string $url URL para buscar
181+
* @param array $domainRules Regras específicas do domínio
180182
* @return string|null Conteúdo HTML da página
181183
* @throws Exception Em caso de erro na requisição
182184
*/
183-
private function fetchFromSelenium($url)
185+
private function fetchFromSelenium($url, $browser)
184186
{
185187
$host = 'http://'.SELENIUM_HOST.'/wd/hub';
186188

187-
$profile = new FirefoxProfile();
188-
$profile->setPreference("permissions.default.image", 2); // Não carrega imagens
189-
$profile->setPreference("javascript.enabled", true); // Mantem habilitado javascripts
190-
$profile->setPreference("network.http.referer.defaultPolicy", 0); // Sempre envia referer
191-
$profile->setPreference("network.http.referer.defaultReferer", "https://www.google.com.br"); // Define referer padrão
192-
$profile->setPreference("network.http.referer.spoofSource", true); // Permite spoofing do referer
193-
$profile->setPreference("network.http.referer.trimmingPolicy", 0); // Não corta o referer
189+
if ($browser === 'chrome') {
190+
$options = new ChromeOptions();
191+
$options->addArguments([
192+
'--headless',
193+
'--disable-gpu',
194+
'--no-sandbox',
195+
'--disable-dev-shm-usage',
196+
'--disable-images',
197+
'--blink-settings=imagesEnabled=false'
198+
]);
199+
200+
$capabilities = DesiredCapabilities::chrome();
201+
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
202+
} else {
203+
$profile = new FirefoxProfile();
204+
$profile->setPreference("permissions.default.image", 2); // Não carrega imagens
205+
$profile->setPreference("javascript.enabled", true); // Mantem habilitado javascripts
206+
$profile->setPreference("network.http.referer.defaultPolicy", 0); // Sempre envia referer
207+
$profile->setPreference("network.http.referer.defaultReferer", "https://www.google.com.br"); // Define referer padrão
208+
$profile->setPreference("network.http.referer.spoofSource", true); // Permite spoofing do referer
209+
$profile->setPreference("network.http.referer.trimmingPolicy", 0); // Não corta o referer
194210

195-
$options = new FirefoxOptions();
196-
$options->setProfile($profile);
211+
$options = new FirefoxOptions();
212+
$options->setProfile($profile);
197213

198-
$capabilities = DesiredCapabilities::firefox();
199-
$capabilities->setCapability(FirefoxOptions::CAPABILITY, $options);
214+
$capabilities = DesiredCapabilities::firefox();
215+
$capabilities->setCapability(FirefoxOptions::CAPABILITY, $options);
216+
}
200217

201218
try {
202219
$driver = RemoteWebDriver::create($host, $capabilities);

docker-compose-selenium.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,20 @@
11
services:
2+
selenium-chromium:
3+
container_name: selenium-chromium
4+
image: selenium/node-chromium:4.27.0-20241204
5+
shm_size: 2gb
6+
environment:
7+
- SE_EVENT_BUS_HOST=selenium-hub
8+
- SE_EVENT_BUS_PUBLISH_PORT=4442
9+
- SE_EVENT_BUS_SUBSCRIBE_PORT=4443
10+
- SE_ENABLE_TRACING=false
11+
- SE_NODE_MAX_SESSIONS=10
12+
- SE_NODE_OVERRIDE_MAX_SESSIONS=true
13+
entrypoint: bash -c 'SE_OPTS="--host $$HOSTNAME" /opt/bin/entry_point.sh'
14+
depends_on:
15+
- selenium-hub
16+
networks:
17+
- selenium
218
selenium-firefox:
319
container_name: selenium-firefox
420
image: selenium/node-firefox:4.27.0-20241204

0 commit comments

Comments
 (0)