-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbuild_data.py
More file actions
348 lines (306 loc) · 13.2 KB
/
build_data.py
File metadata and controls
348 lines (306 loc) · 13.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
"""
This is a VERY simple script to build the static JSON data files used by the
package support detail page in the PyScript documentation site.
How?
1. It fetches community contributed package status updates from a
Google Sheets document (published as CSV) and uses those to override the
generated data files as needed.
2. It uses requests to fetch and parse the Pyodide package support data JSON
files found here:
https://raw.githubusercontent.com/pyscript/polyscript/refs/heads/main/rollup/pyodide_graph.json
Then iterates over them to generate individual package JSON files used by
the package support detail page. These end up in the /api/packages directory.
If there has not been an update to the supported versions of Pyodide for a
given package, the existing JSON file is preserved to avoid overwriting any
community contributed updates.
3. It also grabs the download stats for PyPI and creates a JSON description
including info about the top 100 packages by download count and whether
they are supported in Pyodide.
4. Finally, it records when the script was last run to avoid overwriting
newer community contributed updates, and generates an all.json file
containing all package data for easy access.
This is a DELIBERATELY simple script without much error handling or
sophistication. It is intended to be run occasionally by hand to refresh
the data files. Since this website is advertised as being "curated" this
manual step is REQUIRED, so that we can review the changes before pushing
them live via a git based PR.
"""
import requests
import json
import datetime
import csv
import os
from io import StringIO
PYSCRIPT_PYODIDE_MAP = {
"2024.10.1": "0.26.2",
"2024.10.2": "0.26.3",
"2024.11.1": "0.26.4",
"2025.2.1": "0.26.4",
"2025.2.2": "0.27.2",
"2025.2.3": "0.27.2",
"2025.2.4": "0.27.2",
"2025.3.1": "0.27.3",
"2025.5.1": "0.27.6",
"2025.7.1": "0.27.7",
"2025.7.2": "0.27.7",
"2025.7.3": "0.27.7",
"2025.8.1": "0.28.1",
"2025.10.1": "0.29.0",
"2025.10.2": "0.29.0",
"2025.10.3": "0.29.0",
"2025.11.1": "0.29.0",
"2026.1.1": "0.29.1",
}
PYODIDE_PYSCRIPT_MAP = {
v: k for k, v in PYSCRIPT_PYODIDE_MAP.items()
}
#############################################
# Step 1: Process community contributed package status updates.
#############################################
print("Processing community contributed package status updates...")
# Discover when the script was last run to avoid overwriting newer data.
if os.path.exists(os.path.join("api", "last_run.json")):
with open(os.path.join("api", "last_run.json"), "r") as f:
last_run_data = json.load(f)
last_run_time = datetime.datetime.fromisoformat(
last_run_data.get("last_run")
)
else:
last_run_time = datetime.datetime(2025, 1, 1, tzinfo=datetime.timezone.utc)
CSV_URL = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQRcJ_Co69zrLdxbOi7b5zlO7fuqooypL5ejpVPe59YC1CPXHWA-MpLhJBpGJ44FkM0ewmwMo7yq27Z/pub?output=csv"
response = requests.get(CSV_URL)
response.raise_for_status()
csv_file = StringIO(response.text)
reader = csv.DictReader(csv_file)
for row in reader:
timestamp = datetime.datetime.strptime(
row.get("Timestamp"), "%d/%m/%Y %H:%M:%S"
).replace(tzinfo=datetime.timezone.utc)
if timestamp <= last_run_time:
# This update is older than the last run of the script, so skip it.
continue
package_name = row.get("Package name (e.g. pandas, numba, my-cool-lib)")
print(f"Processing community update for package: {package_name}")
status = row.get("Suggested status").lower()
if "red" in status:
status = "red"
elif "green" in status:
status = "green"
else:
status = "amber"
notes = row.get("Comments about status (Markdown allowed)")
filename = os.path.join("api", "package", f"{package_name}.json")
try:
with open(filename, "r") as f:
data = json.load(f)
except FileNotFoundError:
data = {"supported_versions": {}, "summary": None}
if not data["summary"]:
# Try to fetch the summary from PyPI.
pypi_url = f"https://pypi.org/pypi/{package_name}/json"
pypi_response = requests.get(pypi_url)
if pypi_response.status_code == 200:
pypi_data = pypi_response.json()
data["summary"] = pypi_data["info"].get("summary", "")
else:
data["summary"] = ""
data["status"] = status
if notes:
data["notes"] = notes
data["updated_by"] = "Community contribution via Google Forms"
data["updated_at"] = timestamp.isoformat()
print(
f"Updating package '{package_name}' with community status '{status}'"
)
with open(filename, "w") as f:
json.dump(data, f, indent=4)
############################################
# Step 2: Generate per-package JSON files from Pyodide data.
############################################
print("Generating per-package JSON files from Pyodide data...")
# Grab the raw JSON data
response = requests.get(
"https://raw.githubusercontent.com/pyscript/polyscript/refs/heads/main/rollup/pyodide_graph.json"
)
response.raise_for_status()
package_data = response.json()
# To hold the per-package data to later be turned into JSON files.
packages = {}
# Get the latest release of Pyodide.
latest_release = max([k for k in package_data.keys() if k not in {"latest", "stable"}])
print(f"Latest Pyodide release detected: {latest_release}")
# Iterate over the releases of Pyodide
for release, package_list in package_data.items():
print(f"Processing release: {release}")
if release in {"latest", "stable"}:
continue
for package_name, version in package_list.items():
print(f" Processing package: {package_name}")
if package_name not in packages:
packages[package_name] = {}
# Add the supported version of this package for the given release of
# Pyodide.
packages[package_name][release] = {
"package_version": version,
"pyscript_version": PYODIDE_PYSCRIPT_MAP.get(release, "unknown")
}
updated_by = "automated script"
updated_at = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()
# Write out the per-package JSON files
for package_name, data in packages.items():
# Package names are case insensitive, so we convert to lowercase.
package_name = package_name.lower()
print(f"Processing package data for: {package_name}")
# Check if the package already has a JSON file (possibly updated by
# community contributions) and preserve any existing notes.
filename = os.path.join("api", "package", f"{package_name}.json")
try:
with open(filename, "r") as f:
existing_data = json.load(f)
notes = existing_data.get("notes", "")
updated_by = existing_data.get("updated_by", updated_by)
updated_at = existing_data.get("updated_at", updated_at)
except FileNotFoundError:
notes = ""
# Check if the supported versions of Pyodide have changed; if not, skip
# rewriting the file.
if os.path.exists(filename):
with open(filename, "r") as f:
existing_data = json.load(f)
if existing_data.get("pyodide_versions", {}) == data:
print(
f"No changes in supported versions for package '{package_name}'. Skipping."
)
continue
else:
print(
f"Changes detected in supported Pyodide versions for package '{package_name}'. Updating."
)
notes = "" # Reset notes to repopulate with updated info.
# Check if the latest release of Pyodide supports this package.
has_latest = True
if latest_release not in data:
print(
f"Warning: Latest Pyodide release '{latest_release}' does not support package '{package_name}'. Warning added to notes."
)
has_latest = False
# Fetch the package summary from PyPI
response = requests.get(f"https://pypi.org/pypi/{package_name}/json")
if response.status_code != 200:
print(
f"Warning: Could not fetch PyPI metadata for package '{package_name}'. Skipping."
)
continue
pypi_metadata = response.json()
summary = pypi_metadata.get("info", {}).get(
"summary", "No summary available."
)
if not summary:
# Some packages have an empty string or None as summary.
summary = "No summary available."
pyodide_versions = [f"`{version}`" for version in data.keys()]
if not notes:
if has_latest:
header = f"Great news! The package `{package_name}` is [officially supported](https://pyodide.org/en/stable/usage/packages-in-pyodide.html) in the latest Pyodide release used by PyScript.\n\n"
else:
header = f"⚠️ The package `{package_name}` has been supported in previous versions of Pyodide, but is not supported in the latest Pyodide release (used by default in PyScript). Supported versions of Pyodide and PyScript are listed below, and details of how to pin PyScript to use a specific version of Pyodide can be [found here](https://docs.pyscript.net/2025.11.1/user-guide/configuration/#interpreter).\n\n"
notes = header + f"""To use it in PyScript simply add it to the `packages` section of your TOML configuration like this:
```
packages = ["{package_name}" ]
```
Or if you're using a JSON configuration, like this:
```
{{
packages: ["{package_name}"]
}}
```
Read more about using packages in PyScript [in our documentation](https://docs.pyscript.net/latest/user-guide/configuration/#packages).
Specifically, the following versions of the package are available for the following Pyodide releases:
Pyodide version: package name (version) (PyScript Version)
"""
for k in sorted(data.keys(), reverse=True):
notes += f"\n* {k}: {package_name} ({data[k]['package_version']})"
pyscript_version = data[k]["pyscript_version"]
if pyscript_version != "unknown":
notes += f" ([PyScript {pyscript_version}](https://pyscript.net/releases/{pyscript_version}/))"
output = {
"status": "green",
"notes": notes,
"pyodide_versions": data,
"updated_by": updated_by,
"updated_at": updated_at,
"summary": summary,
}
filename = os.path.join("api", "package", f"{package_name}.json")
print(f"Writing data for package '{package_name}' to '{filename}'")
with open(filename, "w") as f:
json.dump(output, f, indent=4)
#############################################
# Step 3: Generate top_100_pypi_packages.json
#############################################
print("Generating top_100_pypi_packages.json...")
# HugoVK generates these stats each month.
url = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.json"
response = requests.get(url)
response.raise_for_status()
top_pypi_data = response.json()
last_updated = top_pypi_data.get("last_update", "unknown")
top100 = top_pypi_data.get("rows", [])[:100]
# Create a summary JSON file for the top 100 packages. Include a check of the
# Pyodide support status from the previously generated data in the /data/
# directory, if it exists. Otherwise, default to "amber" status.
summary = {"last_updated": last_updated, "packages": []}
for entry in top100:
package_name = entry.get("project")
print("Processing top package: ", package_name)
downloads = entry.get("download_count", 0)
# Check for support data
try:
with open(
os.path.join("api", "package", f"{package_name}.json"), "r"
) as f:
support_data = json.load(f)
status = support_data.get("status", "amber")
desc = support_data.get("summary", "No summary available.")
except FileNotFoundError:
status = "amber"
response = requests.get(f"https://pypi.org/pypi/{package_name}/json")
if response.status_code == 200:
pypi_metadata = response.json()
desc = pypi_metadata.get("info", {}).get(
"summary", "No summary available."
)
else:
desc = "No summary available."
summary["packages"].append(
{
"package_name": package_name,
"downloads": downloads,
"status": status,
"summary": desc,
}
)
# Write out the summary JSON file
with open(os.path.join("api", "top_100_pypi_packages.json"), "w") as f:
json.dump(summary, f, indent=4)
print("Generated top_100_pypi_packages.json")
#############################################
# Step 4: Record last run time and output all.json
#############################################
# Record when the script was last run.
with open(os.path.join("api", "last_run.json"), "w") as f:
now = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()
print(f"Recording last run time: {now}")
json.dump({"last_run": now}, f)
# Generate a final all.json file in the API directory containing details of
# all packages for easy access.
all_packages = {}
for filename in os.listdir(os.path.join("api", "package")):
if filename.endswith(".json"):
package_name = filename[:-5]
with open(os.path.join("api", "package", filename), "r") as f:
data = json.load(f)
all_packages[package_name] = data
with open(os.path.join("api", "all.json"), "w") as f:
json.dump(all_packages, f, indent=4)
print("Generated api/all.json")