Skip to content

Commit 008b11f

Browse files
committed
regra de corrigir paginas sem urls relativas se torna global
1 parent 7293e44 commit 008b11f

File tree

3 files changed

+18
-27
lines changed

3 files changed

+18
-27
lines changed

app/data/domain_rules.php

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
* - scriptTagRemove: Array de scripts que devem ser removidos (partial match)
1515
* - cookies: Array associativo de cookies a serem definidos (null remove o cookie)
1616
* - classAttrRemove: Array de classes a serem removidas de elementos
17-
* - fixRelativeUrls: Boolean para habilitar correção de URLs relativas
1817
* - customCode: String contendo código JavaScript personalizado para execução
1918
* - excludeGlobalRules: Array de regras globais que devem ser ignoradas
2019
*/
@@ -51,24 +50,8 @@
5150
'estadao_paywall' => null
5251
]
5352
],
54-
'exame.com' => [
55-
'fixRelativeUrls' => true,
56-
],
57-
'diarinho.net' => [
58-
'fixRelativeUrls' => true,
59-
],
60-
'em.com.br' => [
61-
'fixRelativeUrls' => true,
62-
],
63-
'businessinsider.com' => [
64-
'fixRelativeUrls' => true,
65-
],
6653
'opovo.com.br' => [
67-
'fixRelativeUrls' => true,
68-
'classElementRemove' => ['screen-loading', 'overlay-advise'],
69-
],
70-
'folhadelondrina.com.br' => [
71-
'fixRelativeUrls' => true,
54+
'classElementRemove' => ['screen-loading', 'overlay-advise']
7255
],
7356
'crusoe.com.br' => [
7457
'cookies' => [
@@ -125,7 +108,7 @@ function checkPaywall() {
125108
],
126109
'foreignpolicy.com' => [
127110
'idElementRemove' => ['paywall_bg'],
128-
'classAttrRemove' => ['overlay-no-scroll', 'overlay-no-scroll'],
111+
'classAttrRemove' => ['overlay-no-scroll', 'overlay-no-scroll']
129112
],
130113
'dgabc.com.br' => [
131114
'customCode' => '
@@ -135,8 +118,7 @@ function checkPaywall() {
135118
$(".linhaSuperBanner").show();
136119
$(".footer").show();
137120
$(".NoticiaExclusivaLogado").show();
138-
',
139-
'fixRelativeUrls' => true,
121+
'
140122
],
141123
'forbes.com' => [
142124
'classElementRemove' => ['zephr-backdrop', 'zephr-generic-modal'],
@@ -149,7 +131,7 @@ function checkPaywall() {
149131
],
150132
],
151133
'seudinheiro.com' => [
152-
'idElementRemove' => ['premium-paywall'],
134+
'idElementRemove' => ['premium-paywall']
153135
],
154136
'technologyreview.com' => [
155137
'cookies' => [

app/data/global_rules.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
],
5555
'scriptTagRemove' => [
5656
'tracking' => [
57+
'gtm.js',
5758
'ga.js',
5859
'fbevents.js',
5960
'pixel.js',

app/inc/URLAnalyzer.php

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,9 @@ private function fixRelativeUrls($dom, $xpath, $baseUrl)
611611
foreach ($elements as $element) {
612612
if ($element instanceof DOMElement) {
613613
$src = $element->getAttribute('src');
614+
if (strpos($src, 'base64') !== false) {
615+
continue;
616+
}
614617
if (strpos($src, 'http') !== 0 && strpos($src, '//') !== 0) {
615618
$src = ltrim($src, '/');
616619
$element->setAttribute('src', $baseHost . '/' . $src);
@@ -624,7 +627,13 @@ private function fixRelativeUrls($dom, $xpath, $baseUrl)
624627
foreach ($elements as $element) {
625628
if ($element instanceof DOMElement) {
626629
$href = $element->getAttribute('href');
627-
if (strpos($href, 'http') !== 0 && strpos($href, '//') !== 0 && strpos($href, '#') !== 0 && strpos($href, 'mailto:') !== 0) {
630+
if (strpos($href, 'mailto:') === 0 ||
631+
strpos($href, 'tel:') === 0 ||
632+
strpos($href, 'javascript:') === 0 ||
633+
strpos($href, '#') === 0) {
634+
continue;
635+
}
636+
if (strpos($href, 'http') !== 0 && strpos($href, '//') !== 0) {
628637
$href = ltrim($href, '/');
629638
$element->setAttribute('href', $baseHost . '/' . $href);
630639
}
@@ -651,12 +660,11 @@ private function processContent($content, $domain, $url)
651660

652661
$xpath = new DOMXPath($dom);
653662

663+
// Sempre aplica a correção de URLs relativas
664+
$this->fixRelativeUrls($dom, $xpath, $url);
665+
654666
$domainRules = $this->getDomainRules($domain);
655667
if ($domainRules !== null) {
656-
if (isset($domainRules['fixRelativeUrls']) && $domainRules['fixRelativeUrls'] === true) {
657-
$this->fixRelativeUrls($dom, $xpath, $url);
658-
}
659-
660668
if (isset($domainRules['customStyle'])) {
661669
$styleElement = $dom->createElement('style');
662670
$styleContent = '';

0 commit comments

Comments
 (0)