@@ -134,45 +134,42 @@ public function analyze($url)
134134 // 4. Verifica se deve usar Selenium
135135 $ domainRules = $ this ->getDomainRules ($ host );
136136 if (isset ($ domainRules ['useSelenium ' ]) && $ domainRules ['useSelenium ' ] === true ) {
137- try {
138- $ content = $ this ->fetchFromSelenium ($ cleanUrl );
139- if (!empty ($ content )) {
140- $ processedContent = $ this ->processContent ($ content , $ host , $ cleanUrl );
141- $ this ->cache ->set ($ cleanUrl , $ processedContent );
142- return $ processedContent ;
143- }
144- } catch (Exception $ e ) {
145- $ this ->logError ($ cleanUrl , "Selenium fetch error: " . $ e ->getMessage ());
146- }
147- } else {
148- // 5. Tenta buscar conteúdo diretamente
149- try {
150- $ content = $ this ->fetchContent ($ cleanUrl );
151- if (!empty ($ content )) {
152- $ processedContent = $ this ->processContent ($ content , $ host , $ cleanUrl );
153- $ this ->cache ->set ($ cleanUrl , $ processedContent );
154- return $ processedContent ;
155- }
156- } catch (Exception $ e ) {
157- $ this ->logError ($ cleanUrl , "Direct fetch error: " . $ e ->getMessage ());
137+ $ content = $ this ->fetchFromSelenium ($ cleanUrl );
138+ if (!empty ($ content )) {
139+ $ processedContent = $ this ->processContent ($ content , $ host , $ cleanUrl );
140+ $ this ->cache ->set ($ cleanUrl , $ processedContent );
141+ return $ processedContent ;
158142 }
159143
160- // 6. Tenta buscar do Wayback Machine como fallback
161- try {
162- $ content = $ this ->fetchFromWaybackMachine ($ cleanUrl );
163- if (!empty ($ content )) {
164- $ processedContent = $ this ->processContent ($ content , $ host , $ cleanUrl );
165- $ this ->cache ->set ($ cleanUrl , $ processedContent );
166- return $ processedContent ;
167- }
168- } catch (Exception $ e ) {
169- $ this ->logError ($ cleanUrl , "Wayback Machine error: " . $ e ->getMessage ());
170- }
144+ $ this ->logError ($ cleanUrl , "Selenium fetch error: " . $ e ->getMessage ());
145+ throw new Exception ("Não foi possível obter o conteúdo via Selenium " );
146+ }
171147
172- throw new Exception ("Não foi possível obter o conteúdo da URL " );
148+ // 5. Tenta buscar conteúdo diretamente
149+ try {
150+ $ content = $ this ->fetchContent ($ cleanUrl );
151+ if (!empty ($ content )) {
152+ $ processedContent = $ this ->processContent ($ content , $ host , $ cleanUrl );
153+ $ this ->cache ->set ($ cleanUrl , $ processedContent );
154+ return $ processedContent ;
155+ }
156+ } catch (Exception $ e ) {
157+ $ this ->logError ($ cleanUrl , "Direct fetch error: " . $ e ->getMessage ());
173158 }
174159
160+ // 6. Tenta buscar do Wayback Machine como fallback
161+ try {
162+ $ content = $ this ->fetchFromWaybackMachine ($ cleanUrl );
163+ if (!empty ($ content )) {
164+ $ processedContent = $ this ->processContent ($ content , $ host , $ cleanUrl );
165+ $ this ->cache ->set ($ cleanUrl , $ processedContent );
166+ return $ processedContent ;
167+ }
168+ } catch (Exception $ e ) {
169+ $ this ->logError ($ cleanUrl , "Wayback Machine error: " . $ e ->getMessage ());
170+ }
175171
172+ throw new Exception ("Não foi possível obter o conteúdo da URL " );
176173 }
177174
178175 /**
@@ -187,8 +184,8 @@ private function fetchFromSelenium($url)
187184 $ host = 'http:// ' .SELENIUM_HOST .'/wd/hub ' ;
188185
189186 $ profile = new FirefoxProfile ();
190- $ profile ->setPreference ("permissions.default.image " , 2 );
191- $ profile ->setPreference ("javascript.enabled " , true );
187+ $ profile ->setPreference ("permissions.default.image " , 2 ); // Não carrega imagens
188+ $ profile ->setPreference ("javascript.enabled " , true ); // Mantem habilitado javascripts
192189
193190 $ options = new FirefoxOptions ();
194191 $ options ->setProfile ($ profile );
0 commit comments