88
99import json
1010import uuid
11- from datetime import datetime
11+ from datetime import UTC , datetime
1212from pathlib import Path
1313
1414from .config import get_config
@@ -96,7 +96,7 @@ def collect(
9696 raise ValueError ("domains list cannot be empty" )
9797
9898 collection_id = collection_id or str (uuid .uuid4 ())
99- collection_start = datetime .utcnow ( )
99+ collection_start = datetime .now ( UTC )
100100
101101 logger .info (f"Starting data collection for { len (domains )} domains" )
102102
@@ -147,7 +147,7 @@ def _collect_single_domain(self, domain: str, use_cache: bool) -> dict:
147147 Dictionary with domain collection results
148148 """
149149 domain_name = self ._parse_domain_name (domain )
150- collection_time = datetime .utcnow ( )
150+ collection_time = datetime .now ( UTC )
151151
152152 # Define file paths
153153 html_file = self .html_dir / f"{ domain_name } .html"
@@ -246,7 +246,7 @@ def collect_batch(
246246 raise ValueError ("domains list cannot be empty" )
247247
248248 collection_id = collection_id or str (uuid .uuid4 ())
249- collection_start = datetime .utcnow ( )
249+ collection_start = datetime .now ( UTC )
250250
251251 logger .info (f"Starting batch data collection for { len (domains )} domains" )
252252
@@ -273,7 +273,7 @@ def collect_batch(
273273 "domain" : domain_name ,
274274 "text_path" : None ,
275275 "image_path" : None ,
276- "date_time_collected" : datetime .utcnow ( ).isoformat () + "Z" ,
276+ "date_time_collected" : datetime .now ( UTC ).isoformat (),
277277 "fetch_success" : False ,
278278 "cached" : False ,
279279 "error" : f"Batch processing failed: { e } " ,
@@ -341,7 +341,7 @@ def _collect_batch_parallel(
341341 "domain" : domain_name ,
342342 "text_path" : str (html_file .relative_to (self .cache_dir )),
343343 "image_path" : str (image_file .relative_to (self .cache_dir )),
344- "date_time_collected" : datetime .utcnow ( ).isoformat () + "Z" ,
344+ "date_time_collected" : datetime .now ( UTC ).isoformat (),
345345 "fetch_success" : True ,
346346 "cached" : True ,
347347 "error" : None ,
@@ -363,7 +363,7 @@ def _collect_batch_parallel(
363363 fetched_results = []
364364 for result in fetch_results :
365365 domain_name = self ._parse_domain_name (result .url )
366- collection_time = datetime .utcnow ( )
366+ collection_time = datetime .now ( UTC )
367367
368368 if result .success :
369369 # Save HTML content
0 commit comments