Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion handler/feed_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,22 @@ func (h *Handler) OnCheck() {
lastEntry = &abonnement.LastEntry.String
}

feed, err := abonnement.Feed.Check(lastEntry)
result, err := abonnement.Feed.Check(lastEntry)
if err != nil {
log.Printf("%s: %s", abonnement.Feed.Url, err)
return
}

// Update cache headers if we got new ones
if result.ETag != nil || result.LastModified != nil {
err = h.DB.Abonnements.SetCacheHeaders(abonnement.Feed.Url, result.ETag, result.LastModified)
if err != nil {
log.Printf("%s: failed to update cache headers: %s", abonnement.Feed.Url, err)
}
}

feed := result.Feed

for _, entry := range reverse(feed.Items) {
templateData := &TemplateData{}
if entry.Title != "" {
Expand Down
162 changes: 152 additions & 10 deletions storage/abonnements.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
package storage

import (
"compress/gzip"
"context"
"database/sql"
"fmt"
"io"
"net/http"
"time"

"github.com/jmoiron/sqlx"
"github.com/mmcdole/gofeed"
"time"
)

type (
Expand All @@ -17,6 +22,7 @@ type (
GetByUser(chatId int64) ([]Feed, error)
GetAll() ([]Abonnement, error)
SetLastEntry(feedUrl string, lastEntry *string) error
SetCacheHeaders(feedUrl string, etag *string, lastModified *string) error
}

Abonnements struct {
Expand All @@ -35,11 +41,20 @@ type (
}

Feed struct {
ID int64 `db:"id"`
Url string `db:"url"`
LastEntry sql.NullString `db:"last_entry"`
CreatedAt time.Time `db:"created_at"`
UpdatedAt sql.NullTime `db:"updated_at"`
ID int64 `db:"id"`
Url string `db:"url"`
LastEntry sql.NullString `db:"last_entry"`
ETag sql.NullString `db:"etag"`
LastModified sql.NullString `db:"last_modified"`
CreatedAt time.Time `db:"created_at"`
UpdatedAt sql.NullTime `db:"updated_at"`
}

// FeedCheckResult contains the parsed feed and caching headers
FeedCheckResult struct {
Feed *gofeed.Feed
ETag *string
LastModified *string
}
)

Expand Down Expand Up @@ -214,20 +229,147 @@ WHERE feeds.url = ?`
return err
}

func (feedToCheck Feed) Check(lastEntry *string) (*gofeed.Feed, error) {
feed, err := gofeed.NewParser().ParseURL(feedToCheck.Url)
func (db *Abonnements) SetCacheHeaders(feedUrl string, etag *string, lastModified *string) error {
const query = `UPDATE feeds
SET feeds.etag = ?, feeds.last_modified = ?
WHERE feeds.url = ?`

_, err := db.Exec(query, etag, lastModified, feedUrl)
return err
}

// fetchFeedWithCaching fetches a feed using HTTP with smart caching headers
func fetchFeedWithCaching(feedURL string, etag *string, lastModified *string) (*http.Response, error) {
client := &http.Client{
Timeout: 30 * time.Second,
}

req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}

// Set User-Agent to identify the bot
req.Header.Set("User-Agent", "RSSBot/2.0 (+https://github.com/Brawl345/rssbot)")

// Enable compression
req.Header.Set("Accept-Encoding", "gzip, deflate")

// Add caching headers if we have them
if etag != nil && *etag != "" {
req.Header.Set("If-None-Match", *etag)
}
if lastModified != nil && *lastModified != "" {
req.Header.Set("If-Modified-Since", *lastModified)
}

resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to fetch feed: %w", err)
}

return resp, nil
}

// readResponseBody reads and decompresses the response body if needed
func readResponseBody(resp *http.Response) (io.ReadCloser, error) {
var reader io.ReadCloser
var err error

switch resp.Header.Get("Content-Encoding") {
case "gzip":
reader, err = gzip.NewReader(resp.Body)
if err != nil {
resp.Body.Close()
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
}
default:
reader = resp.Body
}

return reader, nil
}

func (feedToCheck Feed) Check(lastEntry *string) (*FeedCheckResult, error) {
var etag *string
var lastModified *string

if feedToCheck.ETag.Valid {
etag = &feedToCheck.ETag.String
}
if feedToCheck.LastModified.Valid {
lastModified = &feedToCheck.LastModified.String
}

resp, err := fetchFeedWithCaching(feedToCheck.Url, etag, lastModified)
if err != nil {
return nil, err
}
defer resp.Body.Close()

// Extract new caching headers from response
var newETag *string
var newLastModified *string

if etagHeader := resp.Header.Get("ETag"); etagHeader != "" {
newETag = &etagHeader
}
if lastModHeader := resp.Header.Get("Last-Modified"); lastModHeader != "" {
newLastModified = &lastModHeader
}

// Handle HTTP 304 Not Modified - no new content
if resp.StatusCode == http.StatusNotModified {
// Return empty feed with no items to indicate nothing new
// Keep existing cache headers since server confirmed they're still valid
return &FeedCheckResult{
Feed: &gofeed.Feed{Items: []*gofeed.Item{}},
ETag: etag,
LastModified: lastModified,
}, nil
}

// Handle HTTP 429 Too Many Requests or 503 Service Unavailable
if resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable {
retryAfter := resp.Header.Get("Retry-After")
if retryAfter != "" {
return nil, fmt.Errorf("server returned %d, retry after: %s", resp.StatusCode, retryAfter)
}
return nil, fmt.Errorf("server returned %d", resp.StatusCode)
}

// Handle other non-200 status codes
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}

// Read and decompress body if needed
reader, err := readResponseBody(resp)
if err != nil {
return nil, err
}
defer reader.Close()

// Parse the feed using gofeed's Parse method
parser := gofeed.NewParser()
feed, err := parser.Parse(reader)
if err != nil {
return nil, fmt.Errorf("failed to parse feed: %w", err)
}

// Filter out items we've already seen
if lastEntry != nil {
for i, item := range feed.Items {
if item.GUID == *lastEntry {
feed.Items = feed.Items[:i]
return feed, nil
break
}
}
}

return feed, nil
return &FeedCheckResult{
Feed: feed,
ETag: newETag,
LastModified: newLastModified,
}, nil
}
Loading