resolve conflicts

TushigBili · TushigBili · commit e7a2ec95d977 · 2025-11-19T18:27:31.000-05:00
diff --git a/src/mutations/create_article.py b/src/mutations/create_article.py
@@ -14,11 +14,10 @@ class Arguments:
     article = Field(lambda: ArticleType)
 
     def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
-        from datetime import datetime
         article_data = {
             "title": title,
             "sports_type": sports_type,
-            "published_at": datetime.fromisoformat(published_at),
+            "published_at": published_at,  # Already in ISO 8601 format
             "url": url,
             "slug": slug,
             "image": image
diff --git a/src/repositories/article_repository.py b/src/repositories/article_repository.py
@@ -1,7 +1,7 @@
 from src.database import daily_sun_db
 from src.models.article import Article
 from pymongo import UpdateOne
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 
 class ArticleRepository:
     @staticmethod
@@ -52,7 +52,9 @@ def find_recent(limit_days=3):
         Retrieve articles from the last N days, sorted by published_at descending.
         """
         article_collection = daily_sun_db["news_articles"]
-        query = {"published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}}
+        # Calculate threshold as ISO 8601 string
+        threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
+        query = {"published_at": {"$gte": threshold}}
         articles = article_collection.find(query).sort("published_at", -1)
         return [Article.from_dict(article) for article in articles]
 
@@ -62,9 +64,11 @@ def find_by_sports_type(sports_type, limit_days=3):
         Retrieve articles by sports_type from the last N days, sorted by published_at descending.
         """
         article_collection = daily_sun_db["news_articles"]
+        # Calculate threshold as ISO 8601 string
+        threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
         query = {
             "sports_type": sports_type,
-            "published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}
+            "published_at": {"$gte": threshold}
         }
         articles = article_collection.find(query).sort("published_at", -1)
         return [Article.from_dict(article) for article in articles]
@@ -75,5 +79,7 @@ def delete_not_recent(limit_days=3):
         Delete articles older than N days, sorted by published_at descending.
         """
         article_collection = daily_sun_db["news_articles"]
-        query = {"published_at": {"$lt": datetime.now() - timedelta(days=limit_days)}}
+        # Calculate threshold as ISO 8601 string
+        threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
+        query = {"published_at": {"$lt": threshold}}
         article_collection.delete_many(query)
diff --git a/src/scrapers/daily_sun_scrape.py b/src/scrapers/daily_sun_scrape.py
@@ -1,9 +1,10 @@
 import os
 import requests
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone
 from dotenv import load_dotenv
 from ..services import ArticleService
 from ..utils.constants import ARTICLE_IMG_TAG
+from ..utils.helpers import extract_sport_type_from_title
 import logging
 from bs4 import BeautifulSoup
 import base64
@@ -23,20 +24,22 @@ def fetch_news():
         response.raise_for_status()
         data = response.json()
 
-        # Current date and 3-day threshold
-        current_date = datetime.now()
+        # Current date and 3-day threshold (in UTC)
+        current_date = datetime.now(timezone.utc)
         three_days_ago = current_date - timedelta(days=3)
 
         # Process articles
         articles_to_store = []
         for article in data.get("articles", []):
-            published_at = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
+            published_at_dt = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
+            # Assume the timezone is UTC and convert to ISO 8601 format string
+            published_at_dt = published_at_dt.replace(tzinfo=timezone.utc)
+            published_at = published_at_dt.isoformat().replace('+00:00', 'Z')
             
             if published_at >= three_days_ago:
-                sports_type = next(
-                    (tag["name"] for tag in article["tags"] if tag["name"] not in ["Sports", "Top Stories"]),
-                    "General"
-                )
+                # Extract sport type from title
+                title = article["headline"]
+                sports_type = extract_sport_type_from_title(title)
                 article_url = f"https://cornellsun.com/article/{article['slug']}"
 
                 article_image = None
@@ -61,7 +64,7 @@ def fetch_news():
                     "published_at": published_at,
                     "url": article_url,
                     "slug": article["slug"],
-                    "created_at": datetime.now()
+                    "created_at": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
                 }
                 articles_to_store.append(article_doc)
              
diff --git a/src/types.py b/src/types.py
@@ -199,7 +199,4 @@ class ArticleType(ObjectType):
 
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
-            if key == "published_at" and isinstance(value, datetime):
-                setattr(self, key, value.isoformat())
-            else:
-                setattr(self, key, value)
+            setattr(self, key, value)
diff --git a/src/utils/helpers.py b/src/utils/helpers.py
@@ -144,3 +144,46 @@ def extract_sport_from_title(title):
     
     return None
 
+def extract_sport_type_from_title(title: str):
+    """
+    Extract the sport type from an article title by matching against known sports.
+    
+    Args:
+        title (str): The article title to analyze
+        
+    Returns:
+        str: The sport name if found, otherwise "sports" as default
+    """
+    from .constants import SPORT_URLS
+    
+    if not title:
+        return "sports"
+    
+    # Get all unique sport names from SPORT_URLS
+    sport_names = set()
+    for sport_data in SPORT_URLS.values():
+        sport_name = sport_data["sport"].strip()
+        if sport_name:
+            sport_names.add(sport_name)
+    
+    # Sort by length (longest first) to match "Swimming & Diving" before "Swimming"
+    sport_names_sorted = sorted(sport_names, key=len, reverse=True)
+    
+    title_lower = title.lower()
+    
+    for sport_name in sport_names_sorted:
+        if sport_name.lower() in title_lower:
+            return sport_name
+    
+    # Special mappings for common variations in titles
+    # Only checked if no exact match found above
+    # e.g., "Hockey" in title should match "Ice Hockey" in sport names
+    special_mappings = {
+        "hockey": "Ice Hockey",  # "Men's Hockey" or "Women's Hockey" → "Ice Hockey"
+    }
+    
+    for keyword, sport_name in special_mappings.items():
+        if keyword in title_lower and sport_name in sport_names:
+            return sport_name
+    
+    return "sports"