service-podcasts/src/perun/rss_feed_handler.py

85 lines
2.8 KiB
Python

import feedparser
from simple_logger_handler import setup_logger
import time
from urllib.error import URLError
from typing import Optional
from dataclasses import dataclass
from datetime import datetime
@dataclass
class EpisodeData:
episode_date: str
episode_description: str
episode_link: str
episode_number: str
episode_title: str
episode_ytid: str
logger = setup_logger(__name__)
def grab_latest_chapter_information(id: str, max_retries: int = 3) -> Optional[EpisodeData]:
"""
Fetches the latest episodes information from a Youtube RSS feed, with retries on network-related errors.
Parameters:
id: Youtube channel ID as a string.
max_retries: Number of retry attempts if fetching the feed fails due to network issues.
Returns:
EpisodeData: A dataclass containing episode metadata:
episode_date: Date when it was published in iso format (2025-11-30).
episode_description: Episode description.
episode_link: YouTube link.
episode_number: Episode number.
episode_title: Episode title.
episode_ytid: Episode YouTube ID .
Returns None if the feed has no entries or all retries are exhausted.
Raises:
ValueError: If the feed has no entries.
Other network-related exceptions: If fetching fails after retries.
"""
rss_feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={id}"
attempt = 1
while attempt <= max_retries:
logger.debug(f"[Feed] Parsing feed URL: {rss_feed_url} (attempt {attempt}/{max_retries})")
try:
feed = feedparser.parse(rss_feed_url)
if not feed.entries:
logger.warning(f"[Feed] No entries found for feed {id}")
return None
latest_chapter_data = feed["entries"][0]
episode_link = latest_chapter_data["link"]
episode_title = latest_chapter_data["title"]
episode_description = latest_chapter_data["summary"]
episode_date = latest_chapter_data["published"]
episode_date = datetime.fromisoformat(episode_date).date().isoformat()
episode_ytid = latest_chapter_data["yt_videoid"]
logger.info(f"[Feed] Latest episode '{episode_title}': {episode_link}")
logger.debug(f"[Feed] Latest episode '{episode_title}' (YouTubeId {episode_ytid}): {episode_link} -> {episode_description}")
return EpisodeData(
episode_date=episode_date,
episode_description=episode_description,
episode_link=episode_link,
episode_number="",
episode_title=episode_title,
episode_ytid=episode_ytid
)
except (URLError, OSError) as e:
logger.warning(f"[Feed] Network error on attempt {attempt} for feed {id}: {e}")
if attempt == max_retries:
logger.error(f"[Feed] All {max_retries} attempts failed for feed {id}")
return None
backoff = 2 ** (attempt - 1)
logger.debug(f"[Feed] Retrying in {backoff} seconds...")
time.sleep(backoff)
attempt += 1
if __name__ == "__main__":
print(grab_latest_chapter_information("UCC3ehuUksTyQ7bbjGntmx3Q"))