1+ """
2+ 檔案保留期限管理系統
3+ 用於自動清理超過保留期限的衛星數據檔案
4+ 支持嵌套目錄結構: Satellite/figure/file_type/年份/月份/檔案
5+ """
6+ import logging
7+ from pathlib import Path
8+ from datetime import datetime , timedelta
9+
10+ logger = logging .getLogger (__name__ )
11+
12+
13+ class FileRetentionManager :
14+ """管理檔案保留期限,自動清理過期檔案"""
15+
16+ def __init__ (self , retention_days ):
17+ """
18+ 初始化檔案保留管理器
19+
20+ 參數:
21+ retention_days (int): 要保留檔案的天數
22+ """
23+ self .retention_days = retention_days
24+
25+ def clean_directories (self , base_dir , subdirs = None ):
26+ """
27+ 清理特定目錄下超過保留期限的檔案
28+
29+ 參數:
30+ base_dir (str or Path): 基礎目錄路徑
31+ subdirs (list): 子目錄列表,如果為None則直接清理base_dir
32+
33+ 返回:
34+ int: 被清理的檔案數量
35+ """
36+ base_path = Path (base_dir )
37+
38+ if not base_path .exists ():
39+ logger .warning (f"目錄不存在: { base_path } " )
40+ return 0
41+
42+ dirs_to_clean = []
43+ if subdirs :
44+ for subdir in subdirs :
45+ full_path = base_path / subdir
46+ if full_path .exists ():
47+ dirs_to_clean .append (full_path )
48+ else :
49+ dirs_to_clean .append (base_path )
50+
51+ total_removed = 0
52+ cutoff_date = datetime .now () - timedelta (days = self .retention_days )
53+
54+ for directory in dirs_to_clean :
55+ removed = self ._clean_directory (directory , cutoff_date )
56+ total_removed += removed
57+
58+ return total_removed
59+
60+ def _clean_directory (self , directory , cutoff_date ):
61+ """
62+ 清理單個目錄中的舊檔案
63+
64+ 參數:
65+ directory (Path): 目錄路徑
66+ cutoff_date (datetime): 截止日期,早於此日期的檔案將被刪除
67+
68+ 返回:
69+ int: 被刪除的檔案數量
70+ """
71+ logger .info (f"開始清理目錄: { directory } " )
72+ removed_count = 0
73+
74+ # 獲取目錄中所有檔案
75+ files = [f for f in directory .iterdir () if f .is_file ()]
76+
77+ for file_path in files :
78+ # 獲取檔案修改時間
79+ file_mtime = datetime .fromtimestamp (file_path .stat ().st_mtime )
80+
81+ # 如果檔案早於截止日期,則刪除
82+ if file_mtime < cutoff_date :
83+ try :
84+ file_path .unlink ()
85+ logger .info (f"已刪除舊檔案: { file_path } " )
86+ removed_count += 1
87+ except Exception as e :
88+ logger .error (f"刪除檔案 { file_path } 時出錯: { str (e )} " )
89+
90+ return removed_count
91+
92+ def clean_satellite_figure_data (self , data_root , file_types = None ):
93+ """
94+ 清理衛星圖像目錄,支持嵌套目錄結構: Satellite/figure/file_type/年份/月份/檔案
95+
96+ 參數:
97+ data_root (str or Path): 數據根目錄 (通常是 Config.DATA_ROOT)
98+ file_types (list): 檔案類型列表,如 ['NO2____', 'CO_____'],如果為None則清理所有類型
99+
100+ 返回:
101+ dict: 每個類型清理的檔案數量
102+ """
103+ data_root_path = Path (data_root )
104+ figure_path = data_root_path / "figure"
105+
106+ if not figure_path .exists ():
107+ logger .warning (f"衛星圖像目錄不存在: { figure_path } " )
108+ return {}
109+
110+ results = {}
111+
112+ # 如果未指定file_types,則獲取所有子目錄作為file_types
113+ if file_types is None :
114+ file_types = [d .name for d in figure_path .iterdir () if d .is_dir ()]
115+
116+ cutoff_date = datetime .now () - timedelta (days = self .retention_days )
117+
118+ # 遍歷每個文件類型目錄
119+ for file_type in file_types :
120+ file_type_dir = figure_path / file_type
121+ if not file_type_dir .exists ():
122+ logger .warning (f"文件類型目錄不存在: { file_type_dir } " )
123+ results [file_type ] = 0
124+ continue
125+
126+ removed_count = 0
127+
128+ # 遍歷年份目錄
129+ for year_dir in [d for d in file_type_dir .iterdir () if d .is_dir ()]:
130+ # 遍歷月份目錄
131+ for month_dir in [d for d in year_dir .iterdir () if d .is_dir ()]:
132+ # 清理所有PNG檔案
133+ png_files = list (month_dir .glob ("*.png" ))
134+
135+ for png_file in png_files :
136+ # 獲取檔案修改時間
137+ file_mtime = datetime .fromtimestamp (png_file .stat ().st_mtime )
138+
139+ # 如果檔案早於截止日期,則刪除
140+ if file_mtime < cutoff_date :
141+ try :
142+ png_file .unlink ()
143+ logger .info (f"已刪除舊圖像檔案: { png_file } " )
144+ removed_count += 1
145+ except Exception as e :
146+ logger .error (f"刪除檔案 { png_file } 時出錯: { str (e )} " )
147+
148+ # 如果月份目錄為空,也刪除它
149+ if not any (month_dir .iterdir ()):
150+ try :
151+ month_dir .rmdir ()
152+ logger .info (f"已刪除空月份目錄: { month_dir } " )
153+ except Exception as e :
154+ logger .error (f"刪除目錄 { month_dir } 時出錯: { str (e )} " )
155+
156+ # 如果年份目錄為空,也刪除它
157+ if not any (year_dir .iterdir ()):
158+ try :
159+ year_dir .rmdir ()
160+ logger .info (f"已刪除空年份目錄: { year_dir } " )
161+ except Exception as e :
162+ logger .error (f"刪除目錄 { year_dir } 時出錯: { str (e )} " )
163+
164+ results [file_type ] = removed_count
165+
166+ return results
167+
168+ def clean_all_satellite_data (self , data_root , file_types = None ):
169+ """
170+ 清理所有衛星數據相關檔案
171+
172+ 參數:
173+ data_root (str or Path): 數據根目錄 (通常是 Config.DATA_ROOT)
174+ file_types (list): 檔案類型列表,如 ['NO2____', 'CO_____']
175+
176+ 返回:
177+ dict: 各類別被清理的檔案數量
178+ """
179+ data_root_path = Path (data_root )
180+ results = {}
181+
182+ # 清理圖像檔案
183+ figure_results = self .clean_satellite_figure_data (data_root_path , file_types )
184+ results .update ({f'figure_{ k } ' : v for k , v in figure_results .items ()})
185+
186+ # 清理下載的原始數據文件 (如果有)
187+ data_dir = data_root_path / "Satellite" / "data"
188+ if data_dir .exists ():
189+ download_count = self .clean_directories (data_dir )
190+ results ['data_files' ] = download_count
191+
192+ # 清理處理後的數據文件 (如果有)
193+ processed_dir = data_root_path / "Satellite" / "processed"
194+ if processed_dir .exists ():
195+ processed_count = self .clean_directories (processed_dir )
196+ results ['processed_files' ] = processed_count
197+
198+ # 清理標記檔案 (processed_*.flag)
199+ flag_dir = data_root_path / "Satellite"
200+ if flag_dir .exists ():
201+ flag_count = self ._clean_flag_files (flag_dir ,
202+ cutoff_date = datetime .now () - timedelta (days = self .retention_days ))
203+ results ['flag_files' ] = flag_count
204+
205+ return results
206+
207+ def _clean_flag_files (self , directory , cutoff_date ):
208+ """
209+ 清理舊的標記檔案
210+
211+ 參數:
212+ directory (Path): 目錄路徑
213+ cutoff_date (datetime): 截止日期
214+
215+ 返回:
216+ int: 被刪除的檔案數量
217+ """
218+ directory_path = Path (directory )
219+ flag_pattern = "processed_*.flag"
220+ flag_files = list (directory_path .glob (flag_pattern ))
221+
222+ removed_count = 0
223+
224+ for flag_file in flag_files :
225+ # 從檔案名中提取日期
226+ try :
227+ file_name = flag_file .name
228+ date_str = file_name .replace ("processed_" , "" ).replace (".flag" , "" )
229+ file_date = datetime .strptime (date_str , "%Y-%m-%d" )
230+
231+ if file_date < cutoff_date :
232+ flag_file .unlink ()
233+ logger .info (f"已刪除舊標記檔案: { flag_file } " )
234+ removed_count += 1
235+ except Exception as e :
236+ logger .error (f"處理標記檔案 { flag_file } 時出錯: { str (e )} " )
237+
238+ return removed_count
0 commit comments