From a47d6667bc40167e88a19e74d789237525974099 Mon Sep 17 00:00:00 2001 From: Florian Date: Fri, 10 Oct 2025 17:29:42 +0200 Subject: [PATCH] Imported BBC Radio1 handler and heavily rewrote it. Refactored metadata extraction -> created an EpisodeData dataclass to represent metadata clearly. Modularized download process: Broke down huge main function into several parts. Added error handling for yt_dlp. Added docstrings to functions for ease of understanding. --- src/bbcr1/__pycache__/helper.cpython-312.pyc | Bin 0 -> 3095 bytes src/bbcr1/config.py | 26 +++ src/bbcr1/get_episode.py | 162 +++++++++++++++++++ src/bbcr1/helper.py | 52 ++++++ src/bbcr1/ytdlp_helper.py | 47 ++++++ 5 files changed, 287 insertions(+) create mode 100644 src/bbcr1/__pycache__/helper.cpython-312.pyc create mode 100644 src/bbcr1/config.py create mode 100644 src/bbcr1/get_episode.py create mode 100644 src/bbcr1/helper.py create mode 100644 src/bbcr1/ytdlp_helper.py diff --git a/src/bbcr1/__pycache__/helper.cpython-312.pyc b/src/bbcr1/__pycache__/helper.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..97dec813dfa9e877f9a5a46f4c84bbb036bbdb57 GIT binary patch literal 3095 zcmai0O-vlg74E-jW*TP9-+=jBwedP;Y?uaYv>O}P#Rm4rF^cePq_85RW~v4n&!4Jl zU>Ob3M9RS{SqeFs&1w}6In2sAaOB8wrASe?Sqaoa+QVMLZM5WMDRN%*%&_1fr4aS= z-m6!yUw!qx`A0CQ3DEpqCo6wx5QHE3qTbZnaO4_&J^VV`oT~LvN8z-I`jm>H2=8OBfY z!U(2tW|^(TSvH=?Gs;iW_1C{NRWP52Z3Q*96klLkU0A$Se$+DJS^ZE#iFo!Ue7l`LlTl-*79_zUO~RPzQFBO-a}p* z<_hw7Zt}IHX$Akdk(<R||D*3_9sj&7J|S0N!{r%y1)B2+SUryu(D7D0!@#C^4lClSQq`aD5g_L! z#}>3r2}wn1mEYN0?PYDv@rC39JhM%?D6v4@tSJ9ospF34cvC5gMTJ|h@&vrqzym}L zhz0qkujqpt@y(s^w4U$?Pj~>3a|M|jG@=q7of}M&Y?{N2GBcfXl?`kXw_zRQr(i`m zU`RLLrn+r#N5o7pjN(7rONhBa*h*MDn~Zy#JGuLKKEte3JZTZi;z?qG%x$``3RR^v zo---Mh;oY2y;g-l%QCL-jA!!cB}~Y>V8X#qGtdF>)zG2Z>!`hU-|&GtQffVJ>Duo9 z>&>rj?u_i12Q34ozN6m0o#3H9=ICPw`g>0AdwU~J&$umKJ!#28{3s)$xEjqb0`J7XQ3>7#?uK1LgLEaI{RE z@aSRqeJA|>UfV%v6poWsf3Ss z8Q|4qP5?@^z{6~24HHK7T2ge6J9L)SlgV@rFApWM>72=|r4-iFn3+)1O+A?HW-NDcm0grx#As2cXRJ3T|@uQ)xi6cK~<8KLx72&UsY#ybtPD zMz0lX9_PIG!@~=eGICFUSc6CHGl5Wblh)F4mIH_Zo{2}S{1n$;548}yKRf$)CN?!a z6`LB-3fkunKc4z^Y-WB@D>TPGc|7xA>dDN+h%queJggPeC$Xvd*o6073(Cyg^h81R zK61kKc_V3o8;^+IkBW#w?!wB0t?R=XMmFH}SEG$r?}YL~!dbfd^19PmWF*uxBbGAZhF(L-)zt93_87r-7~x^*==_m>26gK+rF`*Io-GIuEFvHyUlQ< z;h%Th{A$kbxm~_%w+%be$RDDWMmVJs5(1sJ*y+9V$Q7@G5}pySP`WDH$N^XhFJ*)m zTQbBuUa7tOx-#6q_v^(gWgauRJ`Yx2w!RfS$6jh*`^O=-`4pZcAHsgl`1C1s6NsE1wE5@OezEE@EQEuZo@J#Hj#7rCAmqiskDSfseb_Prb3C bb%|HY?Pc_BcSV5d{){(#m-shddBOY_ 2: + filename_timestamp = sys.argv[2] + else: + logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.") + filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") + else: + filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") + + if "calculate_amount_of_fridays" in configuration_settings: + track = get_friday_number(episode_data.extracted_timestamp) + else: + track = filename_timestamp + + return episode_data, filename_timestamp, track + + +def _prepare_ffmpeg_chapters(episode_data, configuration_settings): + """ + Prepare chapters for FFmpeg if cutting intro is requested. + """ + if not episode_data.chapters or len(episode_data.chapters) < 2: + logger.warning("Cutting intro requested but no chapters found.") + return None + + return modify_chapters_for_ffmpeg( + episode_data.chapters[1:], episode_data.chapters[0]["end_time"] + ) + + +def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str): + """ + Download episode audio using yt_dlp with metadata. + """ + ytdl_options = { + "quiet": True, + "noprogress": True, + "format": "bestaudio/best", + "extract_audio": True, + "audio_format": "mp3", + "outtmpl": f"{filename_timestamp}.%(ext)s", + "addmetadata": True, + "postprocessors": [ + { + "key": "FFmpegExtractAudio", + "preferredcodec": "mp3", + }, + { + "key": "FFmpegMetadata", + } + ], + "postprocessor_args": [ + "-metadata", f"title={episode_data.extracted_title}", + "-metadata", f"artist={artist}", + "-metadata", f"track={track}", + "-metadata", f"date={filename_timestamp}", + "-metadata", f"comment={episode_data.extracted_description}" + ], + "merge_output_format": "mp3" + } + + with yt_dlp.YoutubeDL(ytdl_options) as episode: + episode.download(episode_url) + + +def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str): + """ + Cut the intro from the episode using FFmpeg and apply metadata. + """ + logger.info("Fixing chapters and metadata with FFmpeg") + + temp_metadata_path = None + try: + with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file: + temp_file.write(ffmpeg_chapters) + temp_metadata_path = temp_file.name + + ffmpeg_command = [ + "ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]), + "-hide_banner", "-loglevel", "error", + "-i", f"{filename_timestamp}.mp3", + "-i", temp_metadata_path, + "-map_metadata", "1", + "-metadata", f"title={episode_data.extracted_title}", + "-metadata", f"artist={artist}", + "-metadata", f"track={track}", + "-metadata", f"date={filename_timestamp}", + "-metadata", f"comment={episode_data.extracted_description}", + "-codec", "copy", + f"{filename_timestamp}-{episode_data.extracted_id}.mp3" + ] + + subprocess.run(ffmpeg_command, check=True) + remove(f"{filename_timestamp}.mp3") + except subprocess.CalledProcessError as e: + logger.error(f"Error running FFmpeg: {e}") + finally: + if temp_metadata_path and remove: + try: + remove(temp_metadata_path) + except Exception as ex: + logger.warning(f"Could not remove temp metadata file: {ex}") + + +def download_episode(configuration_settings: dict, episode_url: str): + logger.info("Extracting metadata") + episode_data = return_episode_data(episode_url) + + episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data) + + artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist") + + ffmpeg_chapters = None + if configuration_settings.get("cut_intro"): + ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings) + + logger.info("Downloading episode") + _download_audio(episode_url, episode_data, filename_timestamp, track, artist) + + if ffmpeg_chapters: + _cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist) + else: + rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3") + + logger.info("Finished") + + + + +if __name__ == "__main__": + show_name = sys.argv[1] + logger.info (f"Processing {show_name}") + episode_url = return_url_of_latest_episode(settings[show_name]["base_url"]) + download_episode(settings[show_name],episode_url) diff --git a/src/bbcr1/helper.py b/src/bbcr1/helper.py new file mode 100644 index 0000000..def8157 --- /dev/null +++ b/src/bbcr1/helper.py @@ -0,0 +1,52 @@ +from datetime import datetime, timezone, timedelta +from typing import List, Dict + +import subprocess + + +def time_to_milliseconds(time,length_to_cut) -> int: + return int(time * 1000 - length_to_cut * 1000) + +def add_html_tags_to_description(input_text) -> str: + return("

"+input_text.replace("\n\n", "

\n

").replace("\n", "
")+"

") + + +def get_friday_number(extracted_timestamp) -> int: + dt = datetime.fromtimestamp(extracted_timestamp) + start_of_year = datetime(dt.year, 1, 1) + days_until_first_friday = (4 - start_of_year.weekday()) % 7 + first_friday = start_of_year + timedelta(days=days_until_first_friday) + fridays_passed = (dt - first_friday).days // 7 + 1 + return fridays_passed + +def return_url_of_latest_episode(base_url:str) -> str: + result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True) + latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:] + return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}") + +def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str: + """ + Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut. + + Args: + chapters (list): List of chapter dicts with "start_time", "end_time", and "title". + length_to_cut (int/float): Amount of time to cut from start, in seconds. + + Returns: + str: Chapters formatted as ffmpeg metadata. + """ + for entry in chapters: + if "start_time" in entry: + entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut) + if "end_time" in entry: + entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut) + + chapter_format = ";FFMETADATA1\n" + for entry in chapters: + chapter_format+=("[CHAPTER]\n") + chapter_format+=("TIMEBASE=1/1000\n") + chapter_format+=(f"START={entry['start_time']}\n") + chapter_format+=(f"END={entry['end_time']}\n") + chapter_format+=(f"title={entry['title']}\n\n") + + return(chapter_format) \ No newline at end of file diff --git a/src/bbcr1/ytdlp_helper.py b/src/bbcr1/ytdlp_helper.py new file mode 100644 index 0000000..5f0c121 --- /dev/null +++ b/src/bbcr1/ytdlp_helper.py @@ -0,0 +1,47 @@ +import yt_dlp +from helper import add_html_tags_to_description +from typing import List, Optional +from dataclasses import dataclass + +@dataclass +class EpisodeData: + chapters: List + extracted_description: str + extracted_id: str + extracted_title: str + extracted_timestamp: Optional[int] + + +def return_episode_data(episode_url: str) -> EpisodeData: + """ + Quietly extracts meta information about a given radio show. + + Args: + episode_url (str): The URL of the episode. + + Returns: + EpisodeData: A dataclass containing episode metadata: + - chapters (List): Chapters in JSON format. + - extracted_description (str): HTML-wrapped description of the episode. + - extracted_id (str): Unique episode ID. + - extracted_title (str): Episode title. + - extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available. + + """ + try: + with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl: + info_dict = ydl.extract_info(episode_url, download=False) + except Exception as e: + return {"error": f"Failed to extract info: {e}"} + + return EpisodeData( + chapters=info_dict.get("chapters", []), + extracted_description=add_html_tags_to_description(info_dict.get("description", "")), + extracted_id=info_dict.get("id", ""), + extracted_title=info_dict.get("title", ""), + extracted_timestamp=info_dict.get("timestamp"), + ) + + +if __name__ == "__main__": + print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt ")) \ No newline at end of file