diff --git a/src/bbcr1/__pycache__/helper.cpython-312.pyc b/src/bbcr1/__pycache__/helper.cpython-312.pyc new file mode 100644 index 0000000..97dec81 Binary files /dev/null and b/src/bbcr1/__pycache__/helper.cpython-312.pyc differ diff --git a/src/bbcr1/config.py b/src/bbcr1/config.py new file mode 100644 index 0000000..9fbaf1a --- /dev/null +++ b/src/bbcr1/config.py @@ -0,0 +1,26 @@ +settings = { + "Pete Tong":{ + "artist": "Pete Tong", + "base_url":"https://www.bbc.co.uk/programmes/b006ww0v", + "cut_intro":True, + "modify_timestamp":7200, + "calculate_amount_of_fridays":True + }, + "Radio 1s Classic Essential Mix":{ + "artist":"Radio 1s Classic Essential Mix", + "use_different_release_date":True, + "base_url":"https://www.bbc.co.uk/programmes/b00f3pc4", + "cut_intro":True, + "remove_amount_of_characters_from_title":-5 + }, + "Defected on Radio 1 Dance":{ + "artist": "Defected on Radio 1 Dance", + "base_url":"https://www.bbc.co.uk/programmes/m00287n1", + "remove_amount_of_characters_from_title":-10 + }, + "Radio 1s Essential Mix":{ + "artist":"Radio 1s Essential Mix", + "base_url":"https://www.bbc.co.uk/programmes/b006wkfp", + "cut_intro":True + } +} \ No newline at end of file diff --git a/src/bbcr1/get_episode.py b/src/bbcr1/get_episode.py new file mode 100644 index 0000000..2b9e658 --- /dev/null +++ b/src/bbcr1/get_episode.py @@ -0,0 +1,162 @@ +import yt_dlp +import subprocess +import tempfile +import sys +from datetime import datetime, timezone +from config import settings +from os import rename, remove +from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode +import logging +from ytdlp_helper import return_episode_data + +logger = logging.getLogger(__name__) + + +def _apply_configurations(configuration_settings: dict, episode_data): + """ + Apply configuration settings to episode data. + + Returns: + tuple: (episode_data, filename_timestamp, track) + """ + if "remove_amount_of_characters_from_title" in configuration_settings: + amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"] + episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove] + + if "modify_timestamp" in configuration_settings: + episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"] + + if "use_different_release_date" in configuration_settings: + if len(sys.argv) > 2: + filename_timestamp = sys.argv[2] + else: + logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.") + filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") + else: + filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") + + if "calculate_amount_of_fridays" in configuration_settings: + track = get_friday_number(episode_data.extracted_timestamp) + else: + track = filename_timestamp + + return episode_data, filename_timestamp, track + + +def _prepare_ffmpeg_chapters(episode_data, configuration_settings): + """ + Prepare chapters for FFmpeg if cutting intro is requested. + """ + if not episode_data.chapters or len(episode_data.chapters) < 2: + logger.warning("Cutting intro requested but no chapters found.") + return None + + return modify_chapters_for_ffmpeg( + episode_data.chapters[1:], episode_data.chapters[0]["end_time"] + ) + + +def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str): + """ + Download episode audio using yt_dlp with metadata. + """ + ytdl_options = { + "quiet": True, + "noprogress": True, + "format": "bestaudio/best", + "extract_audio": True, + "audio_format": "mp3", + "outtmpl": f"{filename_timestamp}.%(ext)s", + "addmetadata": True, + "postprocessors": [ + { + "key": "FFmpegExtractAudio", + "preferredcodec": "mp3", + }, + { + "key": "FFmpegMetadata", + } + ], + "postprocessor_args": [ + "-metadata", f"title={episode_data.extracted_title}", + "-metadata", f"artist={artist}", + "-metadata", f"track={track}", + "-metadata", f"date={filename_timestamp}", + "-metadata", f"comment={episode_data.extracted_description}" + ], + "merge_output_format": "mp3" + } + + with yt_dlp.YoutubeDL(ytdl_options) as episode: + episode.download(episode_url) + + +def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str): + """ + Cut the intro from the episode using FFmpeg and apply metadata. + """ + logger.info("Fixing chapters and metadata with FFmpeg") + + temp_metadata_path = None + try: + with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file: + temp_file.write(ffmpeg_chapters) + temp_metadata_path = temp_file.name + + ffmpeg_command = [ + "ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]), + "-hide_banner", "-loglevel", "error", + "-i", f"{filename_timestamp}.mp3", + "-i", temp_metadata_path, + "-map_metadata", "1", + "-metadata", f"title={episode_data.extracted_title}", + "-metadata", f"artist={artist}", + "-metadata", f"track={track}", + "-metadata", f"date={filename_timestamp}", + "-metadata", f"comment={episode_data.extracted_description}", + "-codec", "copy", + f"{filename_timestamp}-{episode_data.extracted_id}.mp3" + ] + + subprocess.run(ffmpeg_command, check=True) + remove(f"{filename_timestamp}.mp3") + except subprocess.CalledProcessError as e: + logger.error(f"Error running FFmpeg: {e}") + finally: + if temp_metadata_path and remove: + try: + remove(temp_metadata_path) + except Exception as ex: + logger.warning(f"Could not remove temp metadata file: {ex}") + + +def download_episode(configuration_settings: dict, episode_url: str): + logger.info("Extracting metadata") + episode_data = return_episode_data(episode_url) + + episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data) + + artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist") + + ffmpeg_chapters = None + if configuration_settings.get("cut_intro"): + ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings) + + logger.info("Downloading episode") + _download_audio(episode_url, episode_data, filename_timestamp, track, artist) + + if ffmpeg_chapters: + _cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist) + else: + rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3") + + logger.info("Finished") + + + + +if __name__ == "__main__": + show_name = sys.argv[1] + logger.info (f"Processing {show_name}") + episode_url = return_url_of_latest_episode(settings[show_name]["base_url"]) + download_episode(settings[show_name],episode_url) diff --git a/src/bbcr1/helper.py b/src/bbcr1/helper.py new file mode 100644 index 0000000..def8157 --- /dev/null +++ b/src/bbcr1/helper.py @@ -0,0 +1,52 @@ +from datetime import datetime, timezone, timedelta +from typing import List, Dict + +import subprocess + + +def time_to_milliseconds(time,length_to_cut) -> int: + return int(time * 1000 - length_to_cut * 1000) + +def add_html_tags_to_description(input_text) -> str: + return("

"+input_text.replace("\n\n", "

\n

").replace("\n", "
")+"

") + + +def get_friday_number(extracted_timestamp) -> int: + dt = datetime.fromtimestamp(extracted_timestamp) + start_of_year = datetime(dt.year, 1, 1) + days_until_first_friday = (4 - start_of_year.weekday()) % 7 + first_friday = start_of_year + timedelta(days=days_until_first_friday) + fridays_passed = (dt - first_friday).days // 7 + 1 + return fridays_passed + +def return_url_of_latest_episode(base_url:str) -> str: + result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True) + latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:] + return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}") + +def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str: + """ + Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut. + + Args: + chapters (list): List of chapter dicts with "start_time", "end_time", and "title". + length_to_cut (int/float): Amount of time to cut from start, in seconds. + + Returns: + str: Chapters formatted as ffmpeg metadata. + """ + for entry in chapters: + if "start_time" in entry: + entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut) + if "end_time" in entry: + entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut) + + chapter_format = ";FFMETADATA1\n" + for entry in chapters: + chapter_format+=("[CHAPTER]\n") + chapter_format+=("TIMEBASE=1/1000\n") + chapter_format+=(f"START={entry['start_time']}\n") + chapter_format+=(f"END={entry['end_time']}\n") + chapter_format+=(f"title={entry['title']}\n\n") + + return(chapter_format) \ No newline at end of file diff --git a/src/bbcr1/ytdlp_helper.py b/src/bbcr1/ytdlp_helper.py new file mode 100644 index 0000000..5f0c121 --- /dev/null +++ b/src/bbcr1/ytdlp_helper.py @@ -0,0 +1,47 @@ +import yt_dlp +from helper import add_html_tags_to_description +from typing import List, Optional +from dataclasses import dataclass + +@dataclass +class EpisodeData: + chapters: List + extracted_description: str + extracted_id: str + extracted_title: str + extracted_timestamp: Optional[int] + + +def return_episode_data(episode_url: str) -> EpisodeData: + """ + Quietly extracts meta information about a given radio show. + + Args: + episode_url (str): The URL of the episode. + + Returns: + EpisodeData: A dataclass containing episode metadata: + - chapters (List): Chapters in JSON format. + - extracted_description (str): HTML-wrapped description of the episode. + - extracted_id (str): Unique episode ID. + - extracted_title (str): Episode title. + - extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available. + + """ + try: + with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl: + info_dict = ydl.extract_info(episode_url, download=False) + except Exception as e: + return {"error": f"Failed to extract info: {e}"} + + return EpisodeData( + chapters=info_dict.get("chapters", []), + extracted_description=add_html_tags_to_description(info_dict.get("description", "")), + extracted_id=info_dict.get("id", ""), + extracted_title=info_dict.get("title", ""), + extracted_timestamp=info_dict.get("timestamp"), + ) + + +if __name__ == "__main__": + print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt ")) \ No newline at end of file