Skip to content

Commit 5d1557d

Browse files
refactor(backend): remove Zenrows proxying functions
1 parent 7baf7b6 commit 5d1557d

File tree

7 files changed

+57
-85
lines changed

7 files changed

+57
-85
lines changed

.env.example

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ ACLED_EMAIL=
33
ACLED_KEY=
44
# media data
55
MEDIACLOUD_API_TOKEN=
6-
ZENROWS_API_KEY=
76
# google trends data
87
DATAFORSEO_EMAIL=
98
DATAFORSEO_PASSWORD=

.github/workflows/deploy.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ jobs:
5858
MEDIACLOUD_API_TOKEN: ${{ secrets.MEDIACLOUD_API_TOKEN }}
5959
ACLED_EMAIL: ${{ secrets.ACLED_EMAIL }}
6060
ACLED_KEY: ${{ secrets.ACLED_KEY }}
61-
ZENROWS_API_KEY: ${{ secrets.ZENROWS_API_KEY }}
6261
AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
6362
AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
6463
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}

backend-python/media_impact_monitor/util/cache.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44

55
from joblib import Memory
66
from requests import get as _get, post as _post, Response
7-
from zenrows import ZenRowsClient
8-
9-
from media_impact_monitor.util.env import ZENROWS_API_KEY
107

118
memory = Memory("cache", verbose=0)
129
cache = memory.cache
@@ -52,43 +49,3 @@ def post(url, sleep=None, **kwargs) -> Response | None:
5249
if sleep is not None:
5350
_sleep(sleep)
5451
return response
55-
56-
57-
@cache
58-
def get_proxied(url, **kwargs):
59-
if "timeout" not in kwargs:
60-
kwargs["timeout"] = 10
61-
try:
62-
response = get(url, **kwargs)
63-
return response
64-
except Exception:
65-
pass
66-
client = ZenRowsClient(ZENROWS_API_KEY, retries=2, concurrency=10)
67-
response = client.get(url, **kwargs)
68-
if response.text.startswith('{"code":'):
69-
zenrows_errors = [
70-
"REQS001",
71-
"REQS004",
72-
"REQS006",
73-
"RESP004",
74-
"AUTH001",
75-
"AUTH002",
76-
"AUTH003",
77-
"AUTH004",
78-
"AUTH005",
79-
"AUTH009",
80-
"BLK0001",
81-
"AUTH007",
82-
"AUTH006",
83-
"AUTH008",
84-
"CTX0001",
85-
"ERR0001",
86-
"ERR0000",
87-
"RESP003",
88-
]
89-
if any(error in response.text for error in zenrows_errors):
90-
# problem with zenrows -> inform the developer
91-
raise Exception(response.text)
92-
# otherwise, problem with the site itself -> just don't use this site
93-
return None
94-
return response

backend-python/media_impact_monitor/util/cache_test.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
import pytest
2-
3-
from media_impact_monitor.util.cache import get, get_proxied, post
1+
from media_impact_monitor.util.cache import get, post
42

53
# URLs for the stable testing endpoints (preferably dedicated for testing purposes)
64
GET_URL = "https://httpbin.org/get"
@@ -30,17 +28,3 @@ def test_post_retrieval():
3028
assert (
3129
response.json().get("json") == POST_DATA
3230
), "The response body should contain the JSON data we sent"
33-
34-
35-
@pytest.mark.skip(
36-
reason="Our API key has expired, we will get a new one once we really need it."
37-
)
38-
def test_get_proxied():
39-
"""
40-
Test if the `get_proxied` function can successfully retrieve content.
41-
"""
42-
response = get_proxied(GET_URL)
43-
assert response.status_code == 200
44-
assert (
45-
"args" in response.json()
46-
), "The response should contain 'args' to confirm it's from httpbin.org"

backend-python/media_impact_monitor/util/env.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
ACLED_EMAIL = environ["ACLED_EMAIL"]
1111
ACLED_KEY = environ["ACLED_KEY"]
1212
MEDIACLOUD_API_TOKEN = environ["MEDIACLOUD_API_TOKEN"]
13-
ZENROWS_API_KEY = environ["ZENROWS_API_KEY"]
1413
AZURE_API_BASE = environ["AZURE_API_BASE"]
1514
AZURE_API_VERSION = environ["AZURE_API_VERSION"]
1615
AZURE_API_KEY = environ["AZURE_API_KEY"]
@@ -24,7 +23,6 @@
2423
assert ACLED_EMAIL
2524
assert ACLED_KEY
2625
assert MEDIACLOUD_API_TOKEN
27-
assert ZENROWS_API_KEY
2826
assert AZURE_API_BASE
2927
assert AZURE_API_VERSION
3028
assert AZURE_API_KEY

backend-python/poetry.lock

Lines changed: 56 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

backend-python/pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ matplotlib = "^3.8.2"
2020
mediacloud = "^4.1.3"
2121
backoff = "^2.2.1"
2222
openai = "^1.12.0"
23-
zenrows = "^1.3.2"
2423
html2text = "^2020.1.16"
2524
openpyxl = "^3.1.2"
2625
fastapi = "^0.110.0"

0 commit comments

Comments
 (0)