Skip to content

Commit 6317217

Browse files
authored
Include project and spider name in job logs on object storage (#47, PR #51)
This is the behaviour of scrapyd, and makes it much easier to find logs by accessing object storage.
1 parent 01619f8 commit 6317217

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

kubernetes.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ data:
9797
env_config = spider-example-env
9898
repository = ghcr.io/q-m/scrapyd-k8s-spider-example
9999
100-
# It is strongly recomended to set resource requests and limits on production.
100+
# It is strongly recommended to set resource requests and limits on production.
101101
# They can be overridden on the project and spider level.
102102
[default.resources]
103103
requests_cpu = 0.2

scrapyd_k8s/joblogs/log_handler_k8s.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ def handle_events(self, event):
298298
if pod.metadata.labels.get("org.scrapy.job_id"):
299299
job_id = pod.metadata.labels.get("org.scrapy.job_id")
300300
pod_name = pod.metadata.name
301+
spider = pod.metadata.labels.get("org.scrapy.spider")
302+
project = pod.metadata.labels.get("org.scrapy.project")
301303
thread_name = f"{self.namespace}_{pod_name}"
302304
if pod.status.phase == 'Running':
303305
if (thread_name in self.watcher_threads
@@ -320,7 +322,7 @@ def handle_events(self, event):
320322
logger.info(
321323
f"Removed local log file '{log_filename}' since it already exists in storage.")
322324
else:
323-
self.object_storage_provider.upload_file(log_filename)
325+
self.object_storage_provider.upload_file(project, spider, log_filename)
324326
os.remove(log_filename)
325327
logger.info(f"Removed local log file '{log_filename}' after successful upload.")
326328
else:

scrapyd_k8s/object_storage/libcloud_driver.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from libcloud.storage.providers import get_driver
1111

1212
logger = logging.getLogger(__name__)
13+
logging.basicConfig(level=logging.DEBUG)
1314

1415
class LibcloudObjectStorage:
1516
"""
@@ -113,14 +114,18 @@ def replace_var(match):
113114
result = result.replace(r'\${', '${')
114115
return result
115116

116-
def upload_file(self, local_path):
117+
def upload_file(self, project, spider, local_path):
117118
"""
118119
Uploads a file to the object storage container.
119120
120121
Parameters
121122
----------
122123
local_path : str
123-
The local file path of the file to be uploaded.
124+
The job_id that is passed as a local path.
125+
project : str
126+
The name of the project.
127+
spider : str
128+
The name of the spider.
124129
125130
Returns
126131
-------
@@ -130,7 +135,8 @@ def upload_file(self, local_path):
130135
----
131136
Logs information about the upload status or errors encountered.
132137
"""
133-
object_name = os.path.basename(local_path)
138+
job_id = os.path.basename(local_path).replace('.txt', '')
139+
object_name = f"logs/{project}/{spider}/{job_id}.log"
134140
try:
135141
container = self.driver.get_container(container_name=self._container_name)
136142
self.driver.upload_object(

0 commit comments

Comments
 (0)