Merge pull request 'Perun: Changed fetching YouTube data from using yt-dlp to an RSS feed' (#2) from feature/rss-feed into main
All checks were successful
Build image with python3,get-iplayer / Build Docker Image (push) Successful in 18s
All checks were successful
Build image with python3,get-iplayer / Build Docker Image (push) Successful in 18s
Reviewed-on: #2
This commit is contained in:
commit
e81fe01a01
@ -4,9 +4,9 @@ import os
|
||||
import time
|
||||
from dotenv import load_dotenv
|
||||
from ssh_helper import upload_via_sftp, send_notification_via_ssh
|
||||
from youtube_handler import get_url_for_latest_video, get_youtube_data, return_download_options, check_for_sponsorblock_segments
|
||||
from youtube_handler import return_download_options, check_for_sponsorblock_segments
|
||||
from simple_logger_handler import setup_logger
|
||||
|
||||
from rss_feed_handler import grab_latest_chapter_information, EpisodeData
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
load_dotenv()
|
||||
@ -14,14 +14,14 @@ PODCAST_AUTHORIZATION_TOKEN = os.getenv("PODCAST_AUTHORIZATION_TOKEN")
|
||||
PODCAST_API_URL = os.getenv("PODCAST_API_URL")
|
||||
|
||||
|
||||
def get_audiobookshelf_data() -> tuple[int | None, str | None]:
|
||||
def get_audiobookshelf_data() -> tuple[str | None, str | None]:
|
||||
"""
|
||||
Fetches the latest episode data from the Audiobookshelf API.
|
||||
|
||||
Returns:
|
||||
tuple[int | None, str | None]:
|
||||
- The track number as an integer (or None if data could not be fetched due to retryable errors).
|
||||
- The episode title as a string (or None if data could not be fetched due to retryable errors).
|
||||
- The track number as a string (or None if data could not be fetched due to retryable errors).
|
||||
- The YouTube episode id as a string (or None if data could not be fetched due to retryable errors).
|
||||
|
||||
Raises:
|
||||
requests.exceptions.HTTPError:
|
||||
@ -40,10 +40,10 @@ def get_audiobookshelf_data() -> tuple[int | None, str | None]:
|
||||
result = response.json()
|
||||
|
||||
audiobookshelf_track = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTrack"]
|
||||
audiobookshelf_title = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTitle"]
|
||||
audiobookshelf_ytid = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagDescription"]
|
||||
|
||||
logger.debug(f"[Audiobookshelf] Fetched Audiobookshelf data: track={audiobookshelf_track}, title={audiobookshelf_title}")
|
||||
return (audiobookshelf_track, audiobookshelf_title)
|
||||
logger.debug(f"[Audiobookshelf] Fetched Audiobookshelf data: track={audiobookshelf_track}, ytid={audiobookshelf_ytid}")
|
||||
return (audiobookshelf_track, audiobookshelf_ytid)
|
||||
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logger.warning(f"[Audiobookshelf] Connection error, will retry: {e}")
|
||||
@ -60,57 +60,56 @@ def get_audiobookshelf_data() -> tuple[int | None, str | None]:
|
||||
logger.error(f"[Audiobookshelf] HTTP error {status}, not retrying: {e}")
|
||||
raise
|
||||
|
||||
def check_until_new_episode_gets_released() -> tuple[int | None, dict | None, str | None]:
|
||||
def check_until_new_episode_gets_released() -> tuple[EpisodeData | None, str | None]:
|
||||
"""
|
||||
Polls YouTube every hour for a new episode and compares it to the available episode on Audiobookshelf.
|
||||
Stops after 72 hours.
|
||||
|
||||
Returns:
|
||||
tuple[int | None, dict | None, str | None]:
|
||||
tuple[EpisodeData | None, str | None]:
|
||||
- EpisodeData with information about the date,description,link,title and YouTube ID
|
||||
- Track number from Audiobookshelf
|
||||
- Episode info dictionary from YouTube
|
||||
- Episode URL
|
||||
Returns (None, None, None) if no new episode found within timeout
|
||||
Returns (None, None) if no new episode found within timeout
|
||||
"""
|
||||
CHECK_INTERVAL_HOURS = 1
|
||||
MAX_HOURS = 72
|
||||
for attempt in range(1, MAX_HOURS + 1):
|
||||
logger.debug(f"[EpisodeCheck] Waiting for a new episode to be released, attempt: {attempt}/{MAX_HOURS}")
|
||||
audiobookshelf_track, audiobookshelf_title = get_audiobookshelf_data()
|
||||
audiobookshelf_track, audiobookshelf_ytid = get_audiobookshelf_data()
|
||||
|
||||
if audiobookshelf_track is None or audiobookshelf_title is None:
|
||||
if audiobookshelf_track is None or audiobookshelf_ytid is None:
|
||||
logger.warning("[EpisodeCheck] Unable to fetch Audiobookshelf data, retrying in 1 hour.")
|
||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
||||
continue
|
||||
|
||||
episode_url = get_url_for_latest_video()
|
||||
if episode_url is None:
|
||||
logger.warning("[EpisodeCheck] Unable to fetch latest video URL, retrying in 1 hour.")
|
||||
try:
|
||||
episode_data = grab_latest_chapter_information("UCC3ehuUksTyQ7bbjGntmx3Q")
|
||||
except Exception as e:
|
||||
logger.warning(f"[EpisodeCheck] Failed to fetch latest video data: {e}, retrying in 1 hour.")
|
||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
||||
continue
|
||||
|
||||
episode_info = get_youtube_data(episode_url)
|
||||
if not episode_info:
|
||||
logger.warning("[EpisodeCheck] Unable to fetch video metadata, retrying in 1 hour.")
|
||||
if episode_data is None:
|
||||
logger.warning("[EpisodeCheck] Unable to fetch latest video data, retrying in 1 hour.")
|
||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
||||
continue
|
||||
|
||||
if audiobookshelf_title != episode_info["title"]:
|
||||
logger.info(f"[EpisodeCheck] Latest YouTube episode: {episode_info['title']}")
|
||||
return (audiobookshelf_track,episode_info,episode_url)
|
||||
if audiobookshelf_ytid != episode_data.episode_ytid:
|
||||
logger.info(f"[EpisodeCheck] Latest YouTube episode: {episode_data.episode_title}")
|
||||
return episode_data, audiobookshelf_track
|
||||
|
||||
logger.debug("[EpisodeCheck] No new episode found, retrying in 1 hour.")
|
||||
logger.info("[EpisodeCheck] No new episode found, retrying in 1 hour.")
|
||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
||||
|
||||
logger.warning("[EpisodeCheck] No new episode found after maximum attempts.")
|
||||
return (None, None, None)
|
||||
return None, None
|
||||
|
||||
def wait_for_sponsorblock_segments_to_be_added(episode_url) -> bool:
|
||||
def wait_for_sponsorblock_segments_to_be_added(episode_link) -> bool:
|
||||
"""
|
||||
Polls SponsorBlock for segments on the current video until found or until max attempts.
|
||||
|
||||
Args:
|
||||
episode_url: YouTube video URL to check for SponsorBlock segments
|
||||
episode_link: YouTube video URL to check for SponsorBlock segments
|
||||
|
||||
Returns:
|
||||
True if segments found, False otherwise
|
||||
@ -119,7 +118,7 @@ def wait_for_sponsorblock_segments_to_be_added(episode_url) -> bool:
|
||||
MAX_HOURS = 24
|
||||
for attempt in range(1, MAX_HOURS + 1):
|
||||
logger.debug(f"[SponsorBlock] Waiting for SponsorBlock to be added, attempt: {attempt}/{MAX_HOURS} ")
|
||||
segments = check_for_sponsorblock_segments(episode_url)
|
||||
segments = check_for_sponsorblock_segments(episode_link)
|
||||
|
||||
if segments:
|
||||
logger.debug("[SponsorBlock] Segments found, exiting loop.")
|
||||
@ -138,9 +137,9 @@ def download_episode() -> None:
|
||||
logger.info("[App] Starting Perun")
|
||||
|
||||
try:
|
||||
audiobookshelf_track,episode_info,episode_url = check_until_new_episode_gets_released()
|
||||
episode_data,audiobookshelf_track = check_until_new_episode_gets_released()
|
||||
|
||||
if audiobookshelf_track is None or episode_info is None or episode_url is None:
|
||||
if episode_data is None or audiobookshelf_track is None:
|
||||
logger.error("[App] Failed to find new episode within timeout period")
|
||||
return
|
||||
|
||||
@ -150,23 +149,22 @@ def download_episode() -> None:
|
||||
return
|
||||
|
||||
try:
|
||||
episode_description = episode_info.get("description", "")
|
||||
if "sponsored" in episode_description.lower():
|
||||
if "sponsored" in episode_data.episode_description.lower():
|
||||
logger.debug("[App] Sponsored segments found in description, waiting for SponsorBlock")
|
||||
wait_for_sponsorblock_segments_to_be_added(episode_url)
|
||||
wait_for_sponsorblock_segments_to_be_added(episode_data.episode_link)
|
||||
else:
|
||||
logger.debug("[App] No sponsored segments found in description")
|
||||
except Exception as e:
|
||||
logger.warning(f"[App] Failed during SponsorBlock wait: {e}", exc_info=True)
|
||||
|
||||
try:
|
||||
track = str(int(audiobookshelf_track) + 1).zfill(4)
|
||||
episode_data.episode_number = str(int(audiobookshelf_track) + 1).zfill(4)
|
||||
except (ValueError,TypeError) as e:
|
||||
logger.warning(f"[App] Failed incrementing audiobookshelf track: {e}", exc_info=True)
|
||||
return
|
||||
|
||||
try:
|
||||
options = return_download_options(episode_info,track)
|
||||
options = return_download_options(episode_data)
|
||||
except Exception as e:
|
||||
logger.error(f"[App] Failed to generate download options: {e}", exc_info=True)
|
||||
return
|
||||
@ -174,17 +172,17 @@ def download_episode() -> None:
|
||||
logger.info("[App] Downloading new episode")
|
||||
try:
|
||||
with yt_dlp.YoutubeDL(options) as episode:
|
||||
episode.download(episode_url)
|
||||
episode.download(episode_data.episode_link)
|
||||
logger.debug("[App] Download completed successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"[App] Failed to download episode: {e}", exc_info=True)
|
||||
return
|
||||
|
||||
logger.info("[App] Uploading episode via SFTP")
|
||||
upload_via_sftp(f"perun-{episode_info['date']}.mp3")
|
||||
upload_via_sftp(f"perun-{episode_data.episode_date}.mp3")
|
||||
|
||||
logger.info("[App] Sending release notification")
|
||||
send_notification_via_ssh(f"Perun episode {track} has been released",episode_info["title"])
|
||||
send_notification_via_ssh(f"Perun episode {episode_data.episode_number} has been released",episode_data.episode_title)
|
||||
logger.info("[App] Workflow complete")
|
||||
|
||||
|
||||
|
||||
@ -6,6 +6,7 @@ cffi==2.0.0
|
||||
charset-normalizer==3.4.3
|
||||
cryptography==46.0.2
|
||||
dotenv==0.9.9
|
||||
feedparser==6.0.12
|
||||
idna==3.10
|
||||
invoke==2.2.0
|
||||
mutagen==1.47.0
|
||||
@ -15,7 +16,8 @@ pycryptodomex==3.23.0
|
||||
PyNaCl==1.6.0
|
||||
python-dotenv==1.1.1
|
||||
requests==2.32.5
|
||||
sgmllib3k==1.0.0
|
||||
simple-logger-handler==0.1.0
|
||||
sponsorblock.py==0.2.3
|
||||
urllib3==2.5.0
|
||||
websockets==15.0.1
|
||||
simple-logger-handler==0.1.0
|
||||
85
src/perun/rss_feed_handler.py
Normal file
85
src/perun/rss_feed_handler.py
Normal file
@ -0,0 +1,85 @@
|
||||
import feedparser
|
||||
from simple_logger_handler import setup_logger
|
||||
import time
|
||||
from urllib.error import URLError
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
|
||||
@dataclass
|
||||
class EpisodeData:
|
||||
episode_date: str
|
||||
episode_description: str
|
||||
episode_link: str
|
||||
episode_number: str
|
||||
episode_title: str
|
||||
episode_ytid: str
|
||||
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
def grab_latest_chapter_information(id: str, max_retries: int = 3) -> Optional[EpisodeData]:
|
||||
"""
|
||||
Fetches the latest episodes information from a Youtube RSS feed, with retries on network-related errors.
|
||||
|
||||
Parameters:
|
||||
id: Youtube channel ID as a string.
|
||||
max_retries: Number of retry attempts if fetching the feed fails due to network issues.
|
||||
|
||||
Returns:
|
||||
EpisodeData: A dataclass containing episode metadata:
|
||||
episode_date: Date when it was published in iso format (2025-11-30).
|
||||
episode_description: Episode description.
|
||||
episode_link: YouTube link.
|
||||
episode_number: Episode number.
|
||||
episode_title: Episode title.
|
||||
episode_ytid: Episode YouTube ID .
|
||||
Returns None if the feed has no entries or all retries are exhausted.
|
||||
|
||||
Raises:
|
||||
ValueError: If the feed has no entries.
|
||||
Other network-related exceptions: If fetching fails after retries.
|
||||
"""
|
||||
|
||||
rss_feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={id}"
|
||||
attempt = 1
|
||||
|
||||
while attempt <= max_retries:
|
||||
logger.debug(f"[Feed] Parsing feed URL: {rss_feed_url} (attempt {attempt}/{max_retries})")
|
||||
try:
|
||||
feed = feedparser.parse(rss_feed_url)
|
||||
|
||||
if not feed.entries:
|
||||
logger.warning(f"[Feed] No entries found for feed {id}")
|
||||
return None
|
||||
|
||||
latest_chapter_data = feed["entries"][0]
|
||||
episode_link = latest_chapter_data["link"]
|
||||
episode_title = latest_chapter_data["title"]
|
||||
episode_description = latest_chapter_data["summary"]
|
||||
episode_date = latest_chapter_data["published"]
|
||||
episode_date = datetime.fromisoformat(episode_date).date().isoformat()
|
||||
episode_ytid = latest_chapter_data["yt_videoid"]
|
||||
|
||||
logger.info(f"[Feed] Latest episode '{episode_title}': {episode_link}")
|
||||
logger.debug(f"[Feed] Latest episode '{episode_title}' (YouTubeId {episode_ytid}): {episode_link} -> {episode_description}")
|
||||
return EpisodeData(
|
||||
episode_date=episode_date,
|
||||
episode_description=episode_description,
|
||||
episode_link=episode_link,
|
||||
episode_number="",
|
||||
episode_title=episode_title,
|
||||
episode_ytid=episode_ytid
|
||||
)
|
||||
|
||||
except (URLError, OSError) as e:
|
||||
logger.warning(f"[Feed] Network error on attempt {attempt} for feed {id}: {e}")
|
||||
if attempt == max_retries:
|
||||
logger.error(f"[Feed] All {max_retries} attempts failed for feed {id}")
|
||||
return None
|
||||
backoff = 2 ** (attempt - 1)
|
||||
logger.debug(f"[Feed] Retrying in {backoff} seconds...")
|
||||
time.sleep(backoff)
|
||||
attempt += 1
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(grab_latest_chapter_information("UCC3ehuUksTyQ7bbjGntmx3Q"))
|
||||
@ -13,59 +13,6 @@ logger = setup_logger(__name__)
|
||||
load_dotenv()
|
||||
YOUTUBE_CHANNEL_URL = os.getenv("YOUTUBE_CHANNEL_URL")
|
||||
|
||||
|
||||
def get_url_for_latest_video():
|
||||
"""
|
||||
Fetch the URL of the latest video from a YouTube channel.
|
||||
"""
|
||||
logger.info("[YouTube] Fetching latest video URL from YouTube channel")
|
||||
options = {
|
||||
"extract_flat": True,
|
||||
"playlist_items": "1",
|
||||
"quiet": True,
|
||||
"forcejson": True,
|
||||
"simulate": True,
|
||||
}
|
||||
|
||||
try:
|
||||
with open(os.devnull, "w") as devnull, contextlib.redirect_stdout(devnull):
|
||||
with yt_dlp.YoutubeDL(options) as video:
|
||||
info_dict = video.extract_info(YOUTUBE_CHANNEL_URL, download=False)
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTube] Failed to fetch latest video info: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
if "entries" in info_dict and len(info_dict["entries"]) > 0:
|
||||
latest_url = info_dict["entries"][0]["url"]
|
||||
logger.debug(f"[YouTube] Latest video URL found: {latest_url}")
|
||||
return latest_url
|
||||
else:
|
||||
logger.warning("[YouTube] No entries found in channel feed")
|
||||
return None
|
||||
|
||||
def get_youtube_data(url: str) -> dict:
|
||||
"""
|
||||
Fetch metadata for a given YouTube video URL.
|
||||
"""
|
||||
logger.info(f"Fetching YouTube metadata for video: {url}")
|
||||
try:
|
||||
with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as video:
|
||||
info_dict = video.extract_info(url, download=False)
|
||||
except Exception as e:
|
||||
logger.error(f"[YouTube] Failed to fetch YouTube video info for {url}: {e}", exc_info=True)
|
||||
return {}
|
||||
|
||||
video_data = {
|
||||
"date": datetime.datetime.fromtimestamp(
|
||||
info_dict["timestamp"], datetime.timezone.utc
|
||||
).strftime("%Y-%m-%d"),
|
||||
"title": info_dict["title"],
|
||||
"description": info_dict.get("description", "")
|
||||
}
|
||||
|
||||
logger.debug(f"[YouTube] Fetched video data: {json.dumps(video_data, indent=4)}")
|
||||
return video_data
|
||||
|
||||
def check_for_sponsorblock_segments(youtube_video:str) -> bool:
|
||||
client = sb.Client()
|
||||
try:
|
||||
@ -78,14 +25,14 @@ def check_for_sponsorblock_segments(youtube_video:str) -> bool:
|
||||
logger.info(f"[SponsorBlock] SponsorBlock segments found for video: {youtube_video}")
|
||||
return True
|
||||
|
||||
def return_download_options(information:dict,track:str)->dict:
|
||||
def return_download_options(episode_data)->dict:
|
||||
download_options = {
|
||||
"quiet": True,
|
||||
"noprogress": True,
|
||||
"format": "bestaudio/best",
|
||||
"extract_audio": True,
|
||||
"audio_format": "mp3",
|
||||
"outtmpl": f"perun-{information['date']}.%(ext)s",
|
||||
"outtmpl": f"perun-{episode_data.episode_date}.%(ext)s",
|
||||
"addmetadata": True,
|
||||
"postprocessors":[
|
||||
{"api": "https://sponsor.ajay.app",
|
||||
@ -109,12 +56,12 @@ def return_download_options(information:dict,track:str)->dict:
|
||||
"key": "FFmpegMetadata",
|
||||
}],
|
||||
"postprocessor_args": [
|
||||
"-metadata", f"title={information['title']}",
|
||||
"-metadata", f"title={episode_data.episode_title}",
|
||||
"-metadata", "artist=Perun",
|
||||
"-metadata", f"track={track}",
|
||||
"-metadata", f"date={information['date']}",
|
||||
"-metadata", f"comment={return_string_as_html(information['description'])}",
|
||||
"-metadata", f"description={return_string_as_html(information['description'])}",
|
||||
"-metadata", f"track={episode_data.episode_number}",
|
||||
"-metadata", f"date={episode_data.episode_date}",
|
||||
"-metadata", f"comment={return_string_as_html(episode_data.episode_description)}",
|
||||
"-metadata", f"description={episode_data.episode_ytid}",
|
||||
],
|
||||
"merge_output_format": "mp3"
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user