Skip to content

Commit e7a2ec9

Browse files
committed
resolve conflicts
2 parents 0a892fa + 8ab9357 commit e7a2ec9

File tree

5 files changed

+67
-19
lines changed

5 files changed

+67
-19
lines changed

src/mutations/create_article.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@ class Arguments:
1414
article = Field(lambda: ArticleType)
1515

1616
def mutate(self, info, title, sports_type, published_at, url, slug, image=None):
17-
from datetime import datetime
1817
article_data = {
1918
"title": title,
2019
"sports_type": sports_type,
21-
"published_at": datetime.fromisoformat(published_at),
20+
"published_at": published_at, # Already in ISO 8601 format
2221
"url": url,
2322
"slug": slug,
2423
"image": image

src/repositories/article_repository.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from src.database import daily_sun_db
22
from src.models.article import Article
33
from pymongo import UpdateOne
4-
from datetime import datetime, timedelta
4+
from datetime import datetime, timedelta, timezone
55

66
class ArticleRepository:
77
@staticmethod
@@ -52,7 +52,9 @@ def find_recent(limit_days=3):
5252
Retrieve articles from the last N days, sorted by published_at descending.
5353
"""
5454
article_collection = daily_sun_db["news_articles"]
55-
query = {"published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}}
55+
# Calculate threshold as ISO 8601 string
56+
threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
57+
query = {"published_at": {"$gte": threshold}}
5658
articles = article_collection.find(query).sort("published_at", -1)
5759
return [Article.from_dict(article) for article in articles]
5860

@@ -62,9 +64,11 @@ def find_by_sports_type(sports_type, limit_days=3):
6264
Retrieve articles by sports_type from the last N days, sorted by published_at descending.
6365
"""
6466
article_collection = daily_sun_db["news_articles"]
67+
# Calculate threshold as ISO 8601 string
68+
threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
6569
query = {
6670
"sports_type": sports_type,
67-
"published_at": {"$gte": datetime.now() - timedelta(days=limit_days)}
71+
"published_at": {"$gte": threshold}
6872
}
6973
articles = article_collection.find(query).sort("published_at", -1)
7074
return [Article.from_dict(article) for article in articles]
@@ -75,5 +79,7 @@ def delete_not_recent(limit_days=3):
7579
Delete articles older than N days, sorted by published_at descending.
7680
"""
7781
article_collection = daily_sun_db["news_articles"]
78-
query = {"published_at": {"$lt": datetime.now() - timedelta(days=limit_days)}}
82+
# Calculate threshold as ISO 8601 string
83+
threshold = (datetime.now(timezone.utc) - timedelta(days=limit_days)).isoformat().replace('+00:00', 'Z')
84+
query = {"published_at": {"$lt": threshold}}
7985
article_collection.delete_many(query)

src/scrapers/daily_sun_scrape.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import os
22
import requests
3-
from datetime import datetime, timedelta
3+
from datetime import datetime, timedelta, timezone
44
from dotenv import load_dotenv
55
from ..services import ArticleService
66
from ..utils.constants import ARTICLE_IMG_TAG
7+
from ..utils.helpers import extract_sport_type_from_title
78
import logging
89
from bs4 import BeautifulSoup
910
import base64
@@ -23,20 +24,22 @@ def fetch_news():
2324
response.raise_for_status()
2425
data = response.json()
2526

26-
# Current date and 3-day threshold
27-
current_date = datetime.now()
27+
# Current date and 3-day threshold (in UTC)
28+
current_date = datetime.now(timezone.utc)
2829
three_days_ago = current_date - timedelta(days=3)
2930

3031
# Process articles
3132
articles_to_store = []
3233
for article in data.get("articles", []):
33-
published_at = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
34+
published_at_dt = datetime.strptime(article["published_at"], "%Y-%m-%d %H:%M:%S")
35+
# Assume the timezone is UTC and convert to ISO 8601 format string
36+
published_at_dt = published_at_dt.replace(tzinfo=timezone.utc)
37+
published_at = published_at_dt.isoformat().replace('+00:00', 'Z')
3438

3539
if published_at >= three_days_ago:
36-
sports_type = next(
37-
(tag["name"] for tag in article["tags"] if tag["name"] not in ["Sports", "Top Stories"]),
38-
"General"
39-
)
40+
# Extract sport type from title
41+
title = article["headline"]
42+
sports_type = extract_sport_type_from_title(title)
4043
article_url = f"https://cornellsun.com/article/{article['slug']}"
4144

4245
article_image = None
@@ -61,7 +64,7 @@ def fetch_news():
6164
"published_at": published_at,
6265
"url": article_url,
6366
"slug": article["slug"],
64-
"created_at": datetime.now()
67+
"created_at": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
6568
}
6669
articles_to_store.append(article_doc)
6770

src/types.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,4 @@ class ArticleType(ObjectType):
199199

200200
def __init__(self, **kwargs):
201201
for key, value in kwargs.items():
202-
if key == "published_at" and isinstance(value, datetime):
203-
setattr(self, key, value.isoformat())
204-
else:
205-
setattr(self, key, value)
202+
setattr(self, key, value)

src/utils/helpers.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,3 +144,46 @@ def extract_sport_from_title(title):
144144

145145
return None
146146

147+
def extract_sport_type_from_title(title: str):
148+
"""
149+
Extract the sport type from an article title by matching against known sports.
150+
151+
Args:
152+
title (str): The article title to analyze
153+
154+
Returns:
155+
str: The sport name if found, otherwise "sports" as default
156+
"""
157+
from .constants import SPORT_URLS
158+
159+
if not title:
160+
return "sports"
161+
162+
# Get all unique sport names from SPORT_URLS
163+
sport_names = set()
164+
for sport_data in SPORT_URLS.values():
165+
sport_name = sport_data["sport"].strip()
166+
if sport_name:
167+
sport_names.add(sport_name)
168+
169+
# Sort by length (longest first) to match "Swimming & Diving" before "Swimming"
170+
sport_names_sorted = sorted(sport_names, key=len, reverse=True)
171+
172+
title_lower = title.lower()
173+
174+
for sport_name in sport_names_sorted:
175+
if sport_name.lower() in title_lower:
176+
return sport_name
177+
178+
# Special mappings for common variations in titles
179+
# Only checked if no exact match found above
180+
# e.g., "Hockey" in title should match "Ice Hockey" in sport names
181+
special_mappings = {
182+
"hockey": "Ice Hockey", # "Men's Hockey" or "Women's Hockey" → "Ice Hockey"
183+
}
184+
185+
for keyword, sport_name in special_mappings.items():
186+
if keyword in title_lower and sport_name in sport_names:
187+
return sport_name
188+
189+
return "sports"

0 commit comments

Comments
 (0)