Skip to content

Commit 25f2292

Browse files
committed
Fix multiple sites
1 parent 897168f commit 25f2292

File tree

1 file changed

+39
-24
lines changed

1 file changed

+39
-24
lines changed

src/sites.ts

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { consentShadowRoot, getCmpBoxConsent, getConsentCdnSetup, getContentPassConsent } from './test_utils.js'
1+
import { consentShadowRoot, getCmpBoxConsent, getConsentCdnSetup } from './test_utils.js'
22

33
import { PartialSite, Sites } from './types.js'
44

@@ -20,7 +20,7 @@ const createQuery = (text: string, quoted = true, startSlice = START_SLICE, endS
2020
}
2121
return queryParts.join(' ')
2222
}
23-
const makeQueryFunc = (selector: string|string[], quoted = true, startSlice = START_SLICE, endSlice = END_SLICE) => {
23+
const makeQueryFunc = (selector: string | string[], quoted = true, startSlice = START_SLICE, endSlice = END_SLICE) => {
2424
if (!Array.isArray(selector)) {
2525
selector = [selector]
2626
}
@@ -102,7 +102,7 @@ const sites: Sites = {
102102
query: makeQueryFunc(['.leading-tight span:not(:first-child), .leading-none .leading-normal, h2 span:not(:first-child) span:not(:first-child)', '.leading-loose'], false),
103103
date: 'time',
104104
main: 'article section.relative',
105-
paywall: "div[data-component='Paywall'], div[data-target-id='paywall']"
105+
paywall: "div[data-component='Paywall'], div[data-target-id='paywall'], div[data-area='paywall']"
106106
},
107107
mimic: (content) => {
108108
return `
@@ -115,6 +115,7 @@ const sites: Sites = {
115115
},
116116
dateRange: [7, 1], // search from 7 days before to one day after given date
117117
source: 'genios.de',
118+
waitOnLoad: 1500,
118119
sourceParams: {
119120
dbShortcut: 'SPPL,SPII,KULS,SPIE,SSPE,UNIS,LISP,SPBE',
120121
sourceNames: ['SPIEGEL Plus', 'kulturSPIEGEL', 'DER SPIEGEL', 'SPIEGEL special', 'uniSPIEGEL', 'LiteraturSPIEGEL', 'SPIEGEL Bestseller']
@@ -138,12 +139,12 @@ const sites: Sites = {
138139
{
139140
url: 'https://www.tagesspiegel.de/kultur/comics/im-sumpf-der-verschworungsideologien-es-wurde-immer-schwieriger-mit-meinem-vater-ein-gesprach-zu-fuhren-11626376.html',
140141
selectors: {
141-
query: '"kommt diesmal beim Einräumen der Spülmaschine „Wach endlich auf “ ruft der Vater seiner"'
142+
query: 'kommt diesmal beim Einräumen der Spülmaschine „Wach endlich auf “ ruft der Vater seiner'
142143
}
143144
}
144145
],
145146
selectors: {
146-
query: makeQueryFunc('#story-elements p'),
147+
query: makeQueryFunc('#story-elements p', false),
147148
main: '#story-elements',
148149
paywall: '#paywall',
149150
date: 'time'
@@ -155,7 +156,7 @@ const sites: Sites = {
155156
}
156157
},
157158
'www.zeit.de': {
158-
testSetup: getConsentCdnSetup({ }),
159+
testSetup: getConsentCdnSetup({}),
159160
examples: [
160161
{
161162
url: 'https://www.zeit.de/2021/11/soziale-ungleichheit-identitaetspolitik-diskriminierung-armut-bildung',
@@ -449,6 +450,7 @@ const sites: Sites = {
449450
}
450451
},
451452
'www.wiwo.de': {
453+
testSetup: getConsentCdnSetup({ framePart: 'cmp-sp' }),
452454
examples: [
453455
{
454456
url: 'https://www.wiwo.de/my/unternehmen/industrie/mischkonzern-zeppelin-ein-ausschluss-russlands-aus-swift-wuerde-eine-weltwirtschaftskrise-ausloesen/28091946.html',
@@ -458,17 +460,31 @@ const sites: Sites = {
458460
}
459461
],
460462
selectors: {
461-
query: makeQueryFunc('.c-leadtext', false),
462-
main: '.o-article__content',
463-
paywall: '.o-paywall',
464-
date: 'time'
463+
query: makeQueryFunc('app-header-content-lead-text', false),
464+
main: 'app-story-detail-page article app-blind-text',
465+
paywall: 'app-paywall',
466+
date: 'app-story-date'
467+
},
468+
start: (root) => {
469+
const blindText = root.querySelector('app-blind-text')
470+
blindText.classList.remove('blurry-text')
471+
blindText.querySelector('app-storyline-elements')?.remove()
472+
const paywall: HTMLElement = root.querySelector('app-paywall')
473+
if (paywall) {
474+
paywall.style.display = 'none'
475+
}
476+
},
477+
insertContent: (siteBot, main, content) => {
478+
siteBot.hideBot()
479+
main.innerHTML += content
465480
},
466481
dateRange: [8, 1], // search from roughly week before
467482
source: 'genios.de',
468483
sourceParams: {
469484
dbShortcut: 'WWLATE,WWONLATE,WWBW,WWGR',
470485
sourceNames: ['WirtschaftsWoche', 'WirtschaftsWoche online', 'WirtschaftsWoche Green']
471-
}
486+
},
487+
waitOnLoad: 2000
472488
},
473489
'www.heise.de': {
474490
examples: [
@@ -682,9 +698,9 @@ const sites: Sites = {
682698
const main: HTMLElement = root.querySelector('header div[class*="ArticleHeadstyled__ArticleTeaserContainer"]')
683699
main.style.height = 'auto'
684700
main.style.overflow = 'auto'
685-
// const obj = JSON.parse(document.evaluate('//script[@type="application/ld+json" and contains(./text(), "mainEntityOfPage")]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent)
686-
// paywall.textContent = obj.articleBody
687-
// return true
701+
// const obj = JSON.parse(document.evaluate('//script[@type="application/ld+json" and contains(./text(), "mainEntityOfPage")]', document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent)
702+
// paywall.textContent = obj.articleBody
703+
// return true
688704
},
689705
mimic: '*[class*="Textstyled__InlineText"]',
690706
source: 'genios.de',
@@ -886,7 +902,7 @@ const sites: Sites = {
886902
}
887903
},
888904
'www.mopo.de': {
889-
testSetup: consentShadowRoot({ }),
905+
testSetup: consentShadowRoot({}),
890906
examples: [
891907
{
892908
url: 'https://www.mopo.de/hamburg/vor-29-jahren-stillgelegt-das-wird-jetzt-aus-dem-schellfischtunnel/?reduced=true',
@@ -920,21 +936,20 @@ const sites: Sites = {
920936
}
921937
},
922938
'www.saechsische.de': {
923-
testSetup: getContentPassConsent({ }),
939+
testSetup: getConsentCdnSetup({ framePart: 'cmp-sp' }),
924940
examples: [
925941
{
926-
url: 'https://www.saechsische.de/sachsen/die-dresdner-lehrerin-und-ihre-radikale-sekte-5418484-plus.html',
942+
url: 'https://www.saechsische.de/lokales/dresden/ocg-sekte-dresdner-lehrerin-gehoert-zum-fuehrungskreis-W6O37RLLZLV4K3G7WI3DPTAV3E.html',
927943
selectors: {
928-
query: '"Lehrerin und ihre radikale Sekte"'
944+
query: '"in einer Sekte war teils bekannt Jetzt kommt heraus Die Lehrerin aus Dresden"'
929945
}
930946
}
931947
],
932948
selectors: {
933-
query: makeQueryFunc('.article-detail-title h2'),
934-
headline: '.article-detail-title h2',
935-
date: 'time',
936-
paywall: '#piano-inline',
937-
main: '.article-detail-content'
949+
query: makeQueryFunc('.paywalledContent', true),
950+
headline: 'h2',
951+
paywall: 'div[data-testid="piano-container"]',
952+
main: 'div[class*="Articlestyled__ArticleBodyWrapper"]'
938953
},
939954
start: (root) => {
940955
const blur = root.querySelector('.plus-overlay-blur')
@@ -1159,7 +1174,7 @@ const sites: Sites = {
11591174
}
11601175
},
11611176
'www.mittelbayerische.de': {
1162-
testSetup: consentShadowRoot({ }),
1177+
testSetup: consentShadowRoot({}),
11631178
examples: [
11641179
{
11651180
url: 'https://www.mittelbayerische.de/lokales/stadt-regensburg/geister-parkhaus-am-regensburger-tech-campus-die-nutzungsquote-steigt-14904402',

0 commit comments

Comments
 (0)