Skip to content

Commit c8b225d

Browse files
authored
Merge branch 'main' into optimize-combine-py-15720569534957339199
2 parents 092621f + 71a8cf0 commit c8b225d

File tree

3 files changed

+98
-63
lines changed

3 files changed

+98
-63
lines changed

Cachyos/git-fetch.py

Lines changed: 97 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
import urllib.parse
2121
import urllib.request
2222
import urllib.error
23-
from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED
23+
from concurrent.futures import ThreadPoolExecutor, as_completed
2424
from dataclasses import dataclass
2525
from pathlib import Path
2626
from typing import Literal, Optional
@@ -91,89 +91,126 @@ def http_get(url: str, headers: dict[str, str] | None = None) -> bytes:
9191

9292

9393
def fetch_github(spec: RepoSpec, output: Path, token: Optional[str] = None) -> None:
94-
"""Download from GitHub using Contents API."""
94+
"""Download from GitHub using Tree API (recursive)."""
9595
token = token or os.getenv("GITHUB_TOKEN", "")
9696
headers = {"Accept": "application/vnd.github.v3+json"}
9797
if token:
9898
headers["Authorization"] = f"token {token}"
9999

100-
files_to_download = []
101-
102-
def process_node(current_spec: RepoSpec, current_output: Path):
103-
api_url = f"https://api.github.com/repos/{current_spec.owner}/{current_spec.repo}/contents/{current_spec.path}"
104-
if current_spec.branch != "main":
105-
api_url += f"?ref={current_spec.branch}"
100+
# Fetch the entire tree recursively
101+
api_url = (
102+
f"https://api.github.com/repos/{spec.owner}/{spec.repo}/git/trees/"
103+
f"{urllib.parse.quote(spec.branch, safe='')}?recursive=1"
104+
)
106105

107-
try:
108-
data_bytes = http_get(api_url, headers)
109-
data = json.loads(data_bytes)
110-
except urllib.error.HTTPError as e:
111-
if e.code == 404:
112-
# Fallback to raw file download if API fails (maybe it's a file, not dir)
113-
raw_url = f"https://raw.githubusercontent.com/{current_spec.owner}/{current_spec.repo}/{current_spec.branch}/{current_spec.path}"
114-
content = http_get(raw_url, headers)
115-
current_output.parent.mkdir(parents=True, exist_ok=True)
116-
current_output.write_bytes(content)
117-
print(f"✓ {current_spec.path}")
118-
return [], []
106+
try:
107+
data_bytes = http_get(api_url, headers)
108+
data = json.loads(data_bytes)
109+
except urllib.error.HTTPError as e:
110+
if e.code == 404:
111+
# Fallback: maybe spec.path is a file and not in a tree or branch issue?
112+
# Or the branch doesn't exist.
113+
# We can try raw download if spec.path is set, similar to original fallback.
114+
if spec.path:
115+
raw_url = f"https://raw.githubusercontent.com/{spec.owner}/{spec.repo}/{spec.branch}/{urllib.parse.quote(spec.path)}"
116+
try:
117+
content = http_get(raw_url, headers)
118+
output.parent.mkdir(parents=True, exist_ok=True)
119+
output.write_bytes(content)
120+
print(f"✓ {spec.path}")
121+
return
122+
except urllib.error.HTTPError:
123+
pass # Original 404 was correct
119124
raise
125+
raise
120126

121-
if isinstance(data, dict):
122-
data = [data]
123-
124-
local_files = []
125-
local_dirs = []
126-
127-
for item in data:
128-
item_path = item["path"]
129-
local_path = current_output / Path(item_path).name
130-
131-
if item["type"] == "file":
132-
local_files.append((item["download_url"], local_path, item_path))
133-
elif item["type"] == "dir":
134-
local_path.mkdir(parents=True, exist_ok=True)
135-
local_dirs.append((item_path, local_path))
127+
if data.get("truncated"):
128+
print(
129+
"Error: GitHub Tree API response is truncated; aborting to avoid an incomplete download.",
130+
file=sys.stderr,
131+
)
132+
sys.exit(1)
136133

137-
return local_files, local_dirs
134+
files_to_download = []
138135

139-
max_workers = min(32, (os.cpu_count() or 1) * 4)
136+
# Filter items based on spec.path
137+
target_path = spec.path.strip("/")
140138

141-
# Discovery phase
142-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
143-
futures = {}
139+
found_any = False
144140

145-
def submit_spec(s, o):
146-
f = executor.submit(process_node, s, o)
147-
futures[f] = (s, o)
141+
for item in data.get("tree", []):
142+
item_path = item["path"]
148143

149-
submit_spec(spec, output)
144+
# Check if item matches target_path
145+
if (
146+
target_path
147+
and item_path != target_path
148+
and not item_path.startswith(target_path + "/")
149+
):
150+
continue
151+
152+
found_any = True
153+
154+
# Determine local path
155+
if target_path:
156+
# Relative path from target_path
157+
rel_path = item_path[len(target_path) :].lstrip("/")
158+
# Detect whether the user-supplied path was intended as a directory
159+
requested_is_dir = spec.path.endswith("/")
160+
if not rel_path and item_path == target_path:
161+
# Exact match of the target path
162+
if requested_is_dir and item["type"] != "tree":
163+
raise ValueError(
164+
f"Requested path {spec.path!r} is a directory, but repository "
165+
f"contains a {item['type']} at that path."
166+
)
167+
if not requested_is_dir and item["type"] != "blob":
168+
raise ValueError(
169+
f"Requested path {spec.path!r} is a file, but repository "
170+
f"contains a {item['type']} at that path."
171+
)
172+
# For an exact match with the expected type, use the output path directly.
173+
local_path = output
174+
else:
175+
local_path = output / rel_path
176+
else:
177+
local_path = output / item_path
150178

151-
while futures:
152-
done, _ = wait(futures, return_when=FIRST_COMPLETED)
153-
for future in done:
154-
s, o = futures.pop(future)
155-
try:
156-
f_list, d_list = future.result()
157-
files_to_download.extend(f_list)
179+
if item["type"] == "tree":
180+
local_path.mkdir(parents=True, exist_ok=True)
181+
elif item["type"] == "blob":
182+
encoded_path = "/".join(urllib.parse.quote(p) for p in item_path.split("/"))
183+
raw_url = f"https://raw.githubusercontent.com/{spec.owner}/{spec.repo}/{spec.branch}/{encoded_path}"
184+
files_to_download.append((raw_url, local_path, item_path))
158185

159-
for item_path, local_path in d_list:
160-
sub_spec = RepoSpec(
161-
s.platform, s.owner, s.repo, item_path, s.branch
162-
)
163-
submit_spec(sub_spec, local_path)
186+
if not found_any:
187+
# If path not found in tree (or tree truncated), try raw download as fallback
188+
if target_path:
189+
raw_url = f"https://raw.githubusercontent.com/{spec.owner}/{spec.repo}/{spec.branch}/{urllib.parse.quote(target_path)}"
190+
try:
191+
content = http_get(raw_url, headers)
192+
output.parent.mkdir(parents=True, exist_ok=True)
193+
output.write_bytes(content)
194+
print(f"✓ {target_path}")
195+
return
196+
except urllib.error.HTTPError:
197+
pass
164198

165-
except Exception as e:
166-
print(f"Error processing {s.path}: {e}", file=sys.stderr)
167-
raise
199+
print(f"✗ Path not found: {spec.path}", file=sys.stderr)
200+
# We don't raise here to allow main to exit cleanly?
201+
# But original code raised or returned empty.
202+
# If we return, we print nothing else.
203+
return
168204

169205
if not files_to_download:
170206
return
171207

172208
# Parallel file downloads
209+
max_workers = min(32, (os.cpu_count() or 1) * 4)
210+
173211
def download_file(url, path, item_path):
174212
try:
175213
content = http_get(url, headers)
176-
path.parent.mkdir(parents=True, exist_ok=True)
177214
path.write_bytes(content)
178215
print(f"✓ {item_path}")
179216
except Exception as e:
@@ -191,7 +228,6 @@ def download_file(url, path, item_path):
191228
except Exception:
192229
pass # Already logged
193230

194-
195231
def fetch_gitlab(spec: RepoSpec, output: Path, token: Optional[str] = None) -> None:
196232
"""Download from GitLab using Repository API."""
197233
token = token or os.getenv("GITLAB_TOKEN", "")

RaspberryPi/Scripts/combine.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
chardet = None
99

1010
WORD_PATTERN = re.compile(r"[a-zA-Z0-9]+")
11-
VALID_WORD_PATTERN = re.compile(r"^[a-zA-Z0-9_.,!?@#$%^&*()-=+ ]+$")
1211

1312

1413
def detect_encoding(data: bytes) -> str:

lint-format.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ fi
6565

6666
if has shfmt && ((${#shell_files[@]})); then
6767
if ((check_mode)); then
68-
if ! diff_out=$(shfmt -i 2 -bn -ci -s -ln bash -d "${shell_files[@]}"); then
68+
if ! diff_out=$(shfmt -i 2 -bn -ln bash -d "${shell_files[@]}"); then
6969
status=1
7070
fi
7171
if [[ -n ${diff_out:-} ]]; then

0 commit comments

Comments
 (0)