service-podcasts/src/perun/youtube_handler.py
2025-11-03 18:58:44 +01:00

126 lines
3.7 KiB
Python

import yt_dlp
import datetime
import contextlib
from dotenv import load_dotenv
import os
from helper import return_string_as_html
from simple_logger_handler import setup_logger
import json
import sponsorblock as sb
logger = setup_logger(__name__)
load_dotenv()
YOUTUBE_CHANNEL_URL = os.getenv("YOUTUBE_CHANNEL_URL")
def get_url_for_latest_video():
"""
Fetch the URL of the latest video from a YouTube channel.
"""
logger.info("[YouTube] Fetching latest video URL from YouTube channel")
options = {
"extract_flat": True,
"playlist_items": "1",
"quiet": True,
"forcejson": True,
"simulate": True,
}
try:
with open(os.devnull, "w") as devnull, contextlib.redirect_stdout(devnull):
with yt_dlp.YoutubeDL(options) as video:
info_dict = video.extract_info(YOUTUBE_CHANNEL_URL, download=False)
except Exception as e:
logger.error(f"[YouTube] Failed to fetch latest video info: {e}", exc_info=True)
return None
if "entries" in info_dict and len(info_dict["entries"]) > 0:
latest_url = info_dict["entries"][0]["url"]
logger.debug(f"[YouTube] Latest video URL found: {latest_url}")
return latest_url
else:
logger.warning("[YouTube] No entries found in channel feed")
return None
def get_youtube_data(url: str) -> dict:
"""
Fetch metadata for a given YouTube video URL.
"""
logger.info(f"Fetching YouTube metadata for video: {url}")
try:
with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as video:
info_dict = video.extract_info(url, download=False)
except Exception as e:
logger.error(f"[YouTube] Failed to fetch YouTube video info for {url}: {e}", exc_info=True)
return {}
video_data = {
"date": datetime.datetime.fromtimestamp(
info_dict["timestamp"], datetime.timezone.utc
).strftime("%Y-%m-%d"),
"title": info_dict["title"],
"description": info_dict.get("description", "")
}
logger.debug(f"[YouTube] Fetched video data: {json.dumps(video_data, indent=4)}")
return video_data
def check_for_sponsorblock_segments(youtube_video:str) -> bool:
client = sb.Client()
try:
segments = client.get_skip_segments(youtube_video)
except sb.errors.NotFoundException:
logger.debug(f"[SponsorBlock] No SponsorBlock information for video:{youtube_video}")
return False
if segments:
logger.debug(f"[SponsorBlock] SponsorBlock segments found for video: {youtube_video}")
return True
def return_download_options(information:dict,track:str)->dict:
download_options = {
"quiet": True,
"noprogress": True,
"format": "bestaudio/best",
"extract_audio": True,
"audio_format": "mp3",
"outtmpl": f"perun-{information['date']}.%(ext)s",
"addmetadata": True,
"postprocessors":[
{"api": "https://sponsor.ajay.app",
"categories":["sponsor"],
"key": "SponsorBlock",
"when": "after_filter"
},
{
"force_keyframes": False,
"key": "ModifyChapters",
"remove_chapters_patterns": [],
"remove_ranges": [],
"remove_sponsor_segments": ["sponsor"],
"sponsorblock_chapter_title": "[SponsorBlock]: %(category_names)l"
},
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
},
{
"key": "FFmpegMetadata",
}],
"postprocessor_args": [
"-metadata", f"title={information['title']}",
"-metadata", f"artist=Perun",
"-metadata", f"track={track}",
"-metadata", f"date={information['date']}",
"-metadata", f"comment={return_string_as_html(information['description'])}",
"-metadata", f"description={return_string_as_html(information['description'])}",
],
"merge_output_format": "mp3"
}
logger.debug(f"[YouTube] Created download options:\n {json.dumps(download_options, indent=4)}")
return download_options
if __name__ == "__main__":
print(check_for_sponsorblock_segments("https://www.youtube.com/watch?v=M0t8UYZ9rrQ"))