Skip to content

Commit 26ce430

Browse files
committed
test fixes
1 parent da8c49f commit 26ce430

File tree

10 files changed

+105
-101
lines changed

10 files changed

+105
-101
lines changed

.github/workflows/ci.yml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,32 @@ jobs:
1616
strategy:
1717
matrix:
1818
python-version: ['3.11', '3.12']
19-
19+
2020
steps:
2121
- uses: actions/checkout@v4
22-
22+
2323
- name: Set up Python ${{ matrix.python-version }}
2424
uses: actions/setup-python@v5
2525
with:
2626
python-version: ${{ matrix.python-version }}
27-
27+
2828
- name: Install uv
2929
uses: astral-sh/setup-uv@v3
3030
with:
3131
version: "latest"
32-
32+
3333
- name: Install dependencies
3434
run: |
3535
uv pip install --system -e ".[test]"
36-
36+
37+
- name: Install Playwright browsers
38+
run: |
39+
playwright install chromium
40+
3741
- name: Run linting with ruff
3842
run: |
3943
ruff check piedomains/
40-
44+
4145
- name: Run tests
4246
run: |
43-
pytest tests/ -v -m "not ml and not performance" --cov=piedomains --cov-report=term-missing
47+
pytest tests/ -v -m "not ml and not performance" --cov=piedomains --cov-report=term-missing

piedomains/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import os
1212
import re
13-
from datetime import datetime
13+
from datetime import UTC, datetime
1414

1515
# LLM imports happen lazily when needed
1616
from .piedomains_logging import get_logger
@@ -660,7 +660,7 @@ def classify_from_collection(
660660
os.makedirs(os.path.dirname(output_file), exist_ok=True)
661661

662662
output_data = {
663-
"inference_timestamp": datetime.utcnow().isoformat() + "Z",
663+
"inference_timestamp": datetime.now(UTC).isoformat(),
664664
"model_used": "combined/text_image_ml",
665665
"total_domains": len(combined_results),
666666
"successful": len(

piedomains/content_processor.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,6 @@ def extract_all_content(
107107
fetch_results.append(result)
108108
except Exception as e:
109109
# Create a failed FetchResult for network errors
110-
from ..fetchers import FetchResult
111-
112110
failed_result = FetchResult(
113111
url=url, success=False, error=f"Network error: {str(e)}"
114112
)
@@ -334,6 +332,6 @@ def _parse_domain_name(self, url_or_domain: str) -> str:
334332
str: Clean domain name
335333
"""
336334
# Import here to avoid circular imports
337-
from ..piedomain import Piedomain
335+
from .piedomain import Piedomain
338336

339337
return Piedomain.parse_url_to_domain(url_or_domain)

piedomains/data_collector.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import json
1010
import uuid
11-
from datetime import datetime
11+
from datetime import UTC, datetime
1212
from pathlib import Path
1313

1414
from .config import get_config
@@ -96,7 +96,7 @@ def collect(
9696
raise ValueError("domains list cannot be empty")
9797

9898
collection_id = collection_id or str(uuid.uuid4())
99-
collection_start = datetime.utcnow()
99+
collection_start = datetime.now(UTC)
100100

101101
logger.info(f"Starting data collection for {len(domains)} domains")
102102

@@ -147,7 +147,7 @@ def _collect_single_domain(self, domain: str, use_cache: bool) -> dict:
147147
Dictionary with domain collection results
148148
"""
149149
domain_name = self._parse_domain_name(domain)
150-
collection_time = datetime.utcnow()
150+
collection_time = datetime.now(UTC)
151151

152152
# Define file paths
153153
html_file = self.html_dir / f"{domain_name}.html"
@@ -246,7 +246,7 @@ def collect_batch(
246246
raise ValueError("domains list cannot be empty")
247247

248248
collection_id = collection_id or str(uuid.uuid4())
249-
collection_start = datetime.utcnow()
249+
collection_start = datetime.now(UTC)
250250

251251
logger.info(f"Starting batch data collection for {len(domains)} domains")
252252

@@ -273,7 +273,7 @@ def collect_batch(
273273
"domain": domain_name,
274274
"text_path": None,
275275
"image_path": None,
276-
"date_time_collected": datetime.utcnow().isoformat() + "Z",
276+
"date_time_collected": datetime.now(UTC).isoformat(),
277277
"fetch_success": False,
278278
"cached": False,
279279
"error": f"Batch processing failed: {e}",
@@ -341,7 +341,7 @@ def _collect_batch_parallel(
341341
"domain": domain_name,
342342
"text_path": str(html_file.relative_to(self.cache_dir)),
343343
"image_path": str(image_file.relative_to(self.cache_dir)),
344-
"date_time_collected": datetime.utcnow().isoformat() + "Z",
344+
"date_time_collected": datetime.now(UTC).isoformat(),
345345
"fetch_success": True,
346346
"cached": True,
347347
"error": None,
@@ -363,7 +363,7 @@ def _collect_batch_parallel(
363363
fetched_results = []
364364
for result in fetch_results:
365365
domain_name = self._parse_domain_name(result.url)
366-
collection_time = datetime.utcnow()
366+
collection_time = datetime.now(UTC)
367367

368368
if result.success:
369369
# Save HTML content

piedomains/image.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
import os
7+
from datetime import UTC
78
from pathlib import Path
89

910
import numpy as np
@@ -239,7 +240,7 @@ def classify_from_paths(
239240

240241
# Add metadata
241242
output_data = {
242-
"inference_timestamp": datetime.utcnow().isoformat() + "Z",
243+
"inference_timestamp": datetime.now(UTC).isoformat(),
243244
"model_used": "image/shallalist_ml",
244245
"total_domains": len(data_paths),
245246
"successful": len([r for r in results if r["category"] is not None]),

piedomains/llm_classifier.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import base64
66
import json
77
import time
8-
from datetime import datetime
8+
from datetime import UTC, datetime
99
from io import BytesIO
1010
from pathlib import Path
1111
from typing import Any
@@ -557,7 +557,7 @@ def classify_from_paths(
557557

558558
# Add metadata
559559
output_data = {
560-
"inference_timestamp": datetime.utcnow().isoformat() + "Z",
560+
"inference_timestamp": datetime.now(UTC).isoformat(),
561561
"model_used": f"{mode}/llm_{self.config.provider}_{self.config.model}",
562562
"total_domains": len(data_paths),
563563
"successful": len([r for r in results if r["category"] is not None]),

piedomains/text.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""
55

66
import os
7+
from datetime import UTC
78
from pathlib import Path
89

910
import numpy as np
@@ -304,7 +305,7 @@ def classify_from_paths(
304305

305306
# Add metadata
306307
output_data = {
307-
"inference_timestamp": datetime.utcnow().isoformat() + "Z",
308+
"inference_timestamp": datetime.now(UTC).isoformat(),
308309
"model_used": "text/shallalist_ml",
309310
"total_domains": len(data_paths),
310311
"successful": len([r for r in results if r["category"] is not None]),

0 commit comments

Comments
 (0)