Alex870521
diff --git a/‎main.py‎
Lines changed: 2 additions & 2 deletions b/‎main.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎main_earthdata.py‎
Lines changed: 91 additions & 0 deletions b/‎main_earthdata.py‎
Lines changed: 91 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 3 additions & 1 deletion b/‎requirements.txt‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎run_pipeline.py‎
Lines changed: 32 additions & 5 deletions b/‎run_pipeline.py‎
Lines changed: 32 additions & 5 deletions
diff --git a/‎src/api/earthdata_api.py‎
Lines changed: 146 additions & 0 deletions b/‎src/api/earthdata_api.py‎
Lines changed: 146 additions & 0 deletions
@@ -89,8 +89,8 @@ def main():
     # 步驟：
     # 1. 前往src.config.settings中更改輸出路徑（硬碟路徑）
     # 2. 設定參數
-    start, end = '2025-02-20', '2025-03-06'
-    file_class: ClassInput = 'OFFL'
+    start, end = '2025-03-01', '2025-03-13'
+    file_class: ClassInput = 'NRTI'
     file_type: TypeInput = 'NO2___'
 
     # 3. 設定輸入輸出配置
 
@@ -0,0 +1,91 @@
+"""主程式"""
+import logging
+import asyncio
+from datetime import datetime
+from pathlib import Path
+
+from src.api.earthdata_api import EARTHDATAFetcher
+from src.processing.modis_processor import MODISProcessor
+
+from src.config.richer import rich_print
+from src.config.catalog import ClassInput, TypeInput, PRODUCT_CONFIGS
+from src.config.setup import setup, setup_nasa
+
+
+logger = logging.getLogger(__name__)
+
+
+async def fetch_data(file_type,
+                     start_date: str | datetime,
+                     end_date: str | datetime):
+    """下載數據的工作流程"""
+    try:
+        rich_print(
+            f"Fetching Earthdata (MODIS) products from {start_date} to {end_date} ...")
+
+        fetcher = EARTHDATAFetcher()
+
+        products = await fetcher.fetch_data(
+            file_type=file_type,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        if products:
+            rich_print(f"Start download Earthdata (MODIS) products from {start_date} to {end_date} ...")
+            fetcher.download(products)
+            rich_print("Data download completed！")
+            return True
+        else:
+            rich_print("No data matching the criteria was found")
+
+    except Exception as e:
+        error_message = f"Failed to download data: {str(e)}"
+        rich_print(error_message)
+        logger.error(error_message)
+
+
+def process_data(file_type,
+                 start_date: str | datetime,
+                 end_date: str | datetime):
+    """處理數據的工作流程"""
+    try:
+        rich_print(
+            f"Processing Earthdata (MODIS) products from {start_date} to {end_date} ...")
+
+        processor = MODISProcessor(file_type)
+
+        # 處理所有文件
+        processor.process_all_files(
+            pattern=f"{file_type}/**/*.hdf",  # 從組織好的文件結構中尋找文件
+            start_date=start_date,
+            end_date=end_date
+        )
+
+        rich_print("Data processing completed")
+
+    except Exception as e:
+        error_message = f"Failed to process data: {str(e)}"
+        rich_print(error_message)
+        logger.error(error_message)
+
+
+def main():
+    # 步驟：
+    # 1. 前往src.config.settings中更改輸出路徑（硬碟路徑）
+    # 2. 設定參數
+    start, end = '2025-03-01', '2025-03-12'
+    file_type = "MYD04"
+
+    # 3. 設定輸入輸出配置
+    setup_nasa(file_type=file_type, start_date=start, end_date=end)
+
+    # 4. 下載數據 (需要有.env 內含 EARTHDATA_USERNAME and EARTHDATA_PASSWORD 才能用)
+    asyncio.run(fetch_data(file_type=file_type, start_date=start, end_date=end))
+
+    # 5. 處理與繪製數據
+    process_data(file_type=file_type, start_date=start, end_date=end)
+
+
+if __name__ == "__main__":
+    main()
@@ -21,4 +21,6 @@ pillow~=10.4.0
 pytest~=8.3.4
 tkcalendar~=1.6.1
 certifi~=2024.8.30
-schedule~=1.2.2
+schedule~=1.2.2
+earthaccess~=0.14.0
+pyhdf~=0.11.6
@@ -6,15 +6,17 @@
 import logging
 import asyncio
 import time
-from datetime import datetime
+from datetime import datetime, timedelta
 import schedule
 
 from src.api.sentinel_api import S5PFetcher
 from src.processing.data_processor import S5Processor
 from src.config.catalog import ClassInput, TypeInput, PRODUCT_CONFIGS
-from src.config.setup import setup
+from src.config.setup import setup, setup_nasa
 from src.config.settings import FILTER_BOUNDARY, DATA_RETENTION_DAYS, LOGS_DIR, BASE_DIR
 
+from main_earthdata import fetch_data, process_data
+
 # 導入檔案保留管理器
 from file_retention_manager import FileRetentionManager
 
@@ -134,26 +136,51 @@ async def daily_task():
 
     # 設定參數 - 只處理當天的數據
     today = datetime.now().strftime('%Y-%m-%d')
+    two_days_ago = (datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')
+    seven_days_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
+
+    # 開始執行 Sentinel-5P
     file_class: ClassInput = 'NRTI'
     file_type: list[TypeInput] = ['NO2___', 'HCHO__', 'CO____']
 
     for file_tp in file_type:
         # 設定輸入輸出配置
-        setup(file_type=file_tp, start_date=today, end_date=today)
+        setup(file_type=file_tp, start_date=two_days_ago, end_date=today)
+
+        # 檢查並下載當天的數據
+        has_data = await fetch_data_auto(file_class=file_class, file_type=file_tp, start_date=two_days_ago, end_date=today)
+
+        # 如果有數據，則處理並繪製
+        if has_data:
+            success = process_data_auto(file_class=file_class, file_type=file_tp, start_date=two_days_ago, end_date=today)
+            if success:
+                logger.info(f"每日衛星數據處理pipeline執行完成 - {today}")
+            else:
+                logger.error(f"處理數據失敗 - {today}")
+        else:
+            logger.info(f"今日({today})無可用的衛星數據")
+
+    # 開始執行 MODIS
+    file_type: list[str] = ['MYD04', 'MOD04']
+
+    for file_tp in file_type:
+        # 設定輸入輸出配置
+        setup_nasa(file_type=file_tp, start_date=seven_days_ago, end_date=today)
 
         # 檢查並下載當天的數據
-        has_data = await fetch_data_auto(file_class=file_class, file_type=file_tp, start_date=today, end_date=today)
+        has_data = await fetch_data(file_type=file_tp, start_date=seven_days_ago, end_date=today)
 
         # 如果有數據，則處理並繪製
         if has_data:
-            success = process_data_auto(file_class=file_class, file_type=file_tp, start_date=today, end_date=today)
+            success = process_data(file_type=file_tp, start_date=seven_days_ago, end_date=today)
             if success:
                 logger.info(f"每日衛星數據處理pipeline執行完成 - {today}")
             else:
                 logger.error(f"處理數據失敗 - {today}")
         else:
             logger.info(f"今日({today})無可用的衛星數據")
 
+
     # 執行舊檔案清理任務
     # logger.info("執行舊檔案清理任務")
     # clean_old_files()
 
@@ -0,0 +1,146 @@
+import re
+import shutil
+import earthaccess
+
+from datetime import datetime
+from pathlib import Path
+from dotenv import load_dotenv
+from src.config.richer import console, rich_print, DisplayManager
+from src.config.settings import MODIS_RAW_DATA_DIR
+
+
+# 加載環境變數
+load_dotenv()
+
+# 地理範圍設定
+SEARCH_BOUNDARY = (119.0, 21.0, 123.0, 26.0)  # 搜索數據的邊界 (west_lon, south_lat, east_lon, north_lat)
+
+
+class EARTHDATAFetcher:
+    def __init__(self):
+        # 使用 earthaccess 登入
+        self.auth = earthaccess.login(strategy="environment")
+
+        # 配置下載數據
+        self.download_dir = MODIS_RAW_DATA_DIR
+
+    async def fetch_data(self, file_type, start_date: datetime, end_date: datetime, boundary: tuple = SEARCH_BOUNDARY):
+        self.file_type = file_type
+
+        products = earthaccess.search_data(
+            short_name=f"{file_type}_L2",
+            temporal=(f'{start_date}T00:00:00.000Z', f'{end_date}T23:59:59.999Z'),
+            bounding_box=boundary,
+        )
+
+        # 進一步過濾結果，排除 NRT 文件
+        filtered_products = []
+        for product in products:
+            # 檢查是否有下載連結
+            if not hasattr(product, 'data_links') or not callable(getattr(product, 'data_links')):
+                continue
+
+            links = product.data_links()
+            if not links:
+                continue
+
+            # 檢查文件名是否包含 .NRT.hdf
+            filename = Path(links[0]).name
+            if '.NRT.hdf' not in filename:
+                filtered_products.append(product)
+
+        # Display product info
+        if filtered_products:
+            DisplayManager().display_products_nasa(filtered_products)
+            return filtered_products
+        else:
+            print("No valid products found (NRT files excluded)")
+            return []
+
+    def download(self, products):
+        if not products:
+            print("沒有找到符合條件的數據")
+            return []
+
+        # 用於跟踪下載的文件
+        downloaded_files = []
+
+        # 檢查哪些文件需要下載
+        for result in products:
+            try:
+                # 獲取文件名和下載鏈接
+                if not result.data_links():
+                    continue
+
+                file_url = result.data_links()[0]
+                filename = Path(file_url).name
+
+                # 跳過 NRT 文件
+                if '.NRT.hdf' in filename:
+                    print(f"跳過 NRT 文件: {filename}")
+                    continue
+
+                # 從檔案名稱提取日期信息
+                date_match = re.search(r'\.A(\d{7})\.', filename)
+                if not date_match:
+                    print(f"無法從文件名提取日期: {filename}")
+                    continue
+
+                date_str = date_match.group(1)
+                year = date_str[:4]
+                day_of_year = int(date_str[4:7])
+
+                # 將日期轉換為年月
+                file_date = datetime.strptime(f"{year}-{day_of_year}", "%Y-%j")
+                year_month_dir = file_date.strftime("%Y/%m")
+
+                # 創建目標目錄
+                target_dir = self.download_dir / self.file_type / year_month_dir
+                target_dir.mkdir(parents=True, exist_ok=True)
+
+                # 檢查文件是否已存在
+                target_file = target_dir / filename
+                if target_file.exists():
+                    # print(f"檔案已存在: {target_file}")
+                    downloaded_files.append(str(target_file))
+                    continue
+
+                # 下載單個文件
+                # print(f"下載文件: {filename} 到 {target_dir}")
+
+                # 使用 earthaccess 下載到臨時位置
+                temp_files = earthaccess.download([result], self.download_dir / "temp")
+
+                # 如果下載成功，移動到目標位置
+                if temp_files and len(temp_files) > 0:
+                    temp_file = Path(temp_files[0])
+                    if temp_file.exists():
+                        # 確保目標目錄存在
+                        target_dir.mkdir(parents=True, exist_ok=True)
+
+                        # 移動文件到正確的目錄
+                        temp_file.rename(target_file)
+                        # print(f"成功下載: {target_file}")
+                        downloaded_files.append(str(target_file))
+                    else:
+                        print(f"下載失敗: {filename}")
+                else:
+                    print(f"下載失敗: {filename}")
+
+            except Exception as e:
+                print(f"處理文件時發生錯誤: {str(e)}")
+
+        # 刪除臨時目錄
+        temp_dir = self.download_dir / "temp"
+        if temp_dir.exists():
+            try:
+                shutil.rmtree(temp_dir)
+            except Exception:
+                pass
+
+        if not downloaded_files:
+            print("所有檔案已經存在，無需下載")
+        else:
+            print(f"成功下載 {len(downloaded_files)} 個檔案")
+
+        return downloaded_files