From 9686ae26e415dff1486f6e47b30899a9926f283e Mon Sep 17 00:00:00 2001 From: Florian Date: Sun, 19 Oct 2025 20:40:27 +0200 Subject: [PATCH] Remade generic BBC R1 downloader into one tailored for Pete Tong - `yt-dlp` stopped working so a switch to `get_iplayer` was necessary - Added sending a notification to the backend api - Added logging and general error handling --- .gitignore | 3 +- README.md | 13 +++ src/bbcr1/config.py | 26 ----- src/bbcr1/get_episode.py | 162 ------------------------------ src/bbcr1/helper.py | 52 ---------- src/bbcr1/ytdlp_helper.py | 47 --------- src/petetong/README.md | 75 ++++++++++++++ src/petetong/download_episode.py | 141 ++++++++++++++++++++++++++ src/petetong/grabEpisode.sh | 4 + src/petetong/logger_handler.py | 19 ++++ src/petetong/send_notification.py | 60 +++++++++++ 11 files changed, 314 insertions(+), 288 deletions(-) delete mode 100644 src/bbcr1/config.py delete mode 100644 src/bbcr1/get_episode.py delete mode 100644 src/bbcr1/helper.py delete mode 100644 src/bbcr1/ytdlp_helper.py create mode 100644 src/petetong/README.md create mode 100644 src/petetong/download_episode.py create mode 100644 src/petetong/grabEpisode.sh create mode 100644 src/petetong/logger_handler.py create mode 100644 src/petetong/send_notification.py diff --git a/.gitignore b/.gitignore index 0dbf2f2..414da72 100644 --- a/.gitignore +++ b/.gitignore @@ -167,4 +167,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ - +*.mp3 +*.m4a diff --git a/README.md b/README.md index e69de29..8bd3739 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,13 @@ +# Media Download Scripts + +A collection of Python scripts for automatically downloading and processing podcast episodes from various sources. + +### [Perun YouTube Downloader](src/perun/) +Downloads the latest video from the Perun YouTube channel, converts to MP3 with metadata and sponsor segment removal, and uploads to a podcast server. + +### [BBC Radio 1 Pete Tong Downloader](src/petetong/) +Downloads the latest Pete Tong radio show from BBC iPlayer Radio, converts to MP3 with metadata, and sends notifications. + +## Setup + +Each project has its own README with detailed installation and configuration instructions. Navigate to the respective directories for more information. \ No newline at end of file diff --git a/src/bbcr1/config.py b/src/bbcr1/config.py deleted file mode 100644 index 9fbaf1a..0000000 --- a/src/bbcr1/config.py +++ /dev/null @@ -1,26 +0,0 @@ -settings = { - "Pete Tong":{ - "artist": "Pete Tong", - "base_url":"https://www.bbc.co.uk/programmes/b006ww0v", - "cut_intro":True, - "modify_timestamp":7200, - "calculate_amount_of_fridays":True - }, - "Radio 1s Classic Essential Mix":{ - "artist":"Radio 1s Classic Essential Mix", - "use_different_release_date":True, - "base_url":"https://www.bbc.co.uk/programmes/b00f3pc4", - "cut_intro":True, - "remove_amount_of_characters_from_title":-5 - }, - "Defected on Radio 1 Dance":{ - "artist": "Defected on Radio 1 Dance", - "base_url":"https://www.bbc.co.uk/programmes/m00287n1", - "remove_amount_of_characters_from_title":-10 - }, - "Radio 1s Essential Mix":{ - "artist":"Radio 1s Essential Mix", - "base_url":"https://www.bbc.co.uk/programmes/b006wkfp", - "cut_intro":True - } -} \ No newline at end of file diff --git a/src/bbcr1/get_episode.py b/src/bbcr1/get_episode.py deleted file mode 100644 index 2b9e658..0000000 --- a/src/bbcr1/get_episode.py +++ /dev/null @@ -1,162 +0,0 @@ -import yt_dlp -import subprocess -import tempfile -import sys -from datetime import datetime, timezone -from config import settings -from os import rename, remove -from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode -import logging -from ytdlp_helper import return_episode_data - -logger = logging.getLogger(__name__) - - -def _apply_configurations(configuration_settings: dict, episode_data): - """ - Apply configuration settings to episode data. - - Returns: - tuple: (episode_data, filename_timestamp, track) - """ - if "remove_amount_of_characters_from_title" in configuration_settings: - amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"] - episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove] - - if "modify_timestamp" in configuration_settings: - episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"] - - if "use_different_release_date" in configuration_settings: - if len(sys.argv) > 2: - filename_timestamp = sys.argv[2] - else: - logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.") - filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") - else: - filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d") - - if "calculate_amount_of_fridays" in configuration_settings: - track = get_friday_number(episode_data.extracted_timestamp) - else: - track = filename_timestamp - - return episode_data, filename_timestamp, track - - -def _prepare_ffmpeg_chapters(episode_data, configuration_settings): - """ - Prepare chapters for FFmpeg if cutting intro is requested. - """ - if not episode_data.chapters or len(episode_data.chapters) < 2: - logger.warning("Cutting intro requested but no chapters found.") - return None - - return modify_chapters_for_ffmpeg( - episode_data.chapters[1:], episode_data.chapters[0]["end_time"] - ) - - -def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str): - """ - Download episode audio using yt_dlp with metadata. - """ - ytdl_options = { - "quiet": True, - "noprogress": True, - "format": "bestaudio/best", - "extract_audio": True, - "audio_format": "mp3", - "outtmpl": f"{filename_timestamp}.%(ext)s", - "addmetadata": True, - "postprocessors": [ - { - "key": "FFmpegExtractAudio", - "preferredcodec": "mp3", - }, - { - "key": "FFmpegMetadata", - } - ], - "postprocessor_args": [ - "-metadata", f"title={episode_data.extracted_title}", - "-metadata", f"artist={artist}", - "-metadata", f"track={track}", - "-metadata", f"date={filename_timestamp}", - "-metadata", f"comment={episode_data.extracted_description}" - ], - "merge_output_format": "mp3" - } - - with yt_dlp.YoutubeDL(ytdl_options) as episode: - episode.download(episode_url) - - -def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str): - """ - Cut the intro from the episode using FFmpeg and apply metadata. - """ - logger.info("Fixing chapters and metadata with FFmpeg") - - temp_metadata_path = None - try: - with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file: - temp_file.write(ffmpeg_chapters) - temp_metadata_path = temp_file.name - - ffmpeg_command = [ - "ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]), - "-hide_banner", "-loglevel", "error", - "-i", f"{filename_timestamp}.mp3", - "-i", temp_metadata_path, - "-map_metadata", "1", - "-metadata", f"title={episode_data.extracted_title}", - "-metadata", f"artist={artist}", - "-metadata", f"track={track}", - "-metadata", f"date={filename_timestamp}", - "-metadata", f"comment={episode_data.extracted_description}", - "-codec", "copy", - f"{filename_timestamp}-{episode_data.extracted_id}.mp3" - ] - - subprocess.run(ffmpeg_command, check=True) - remove(f"{filename_timestamp}.mp3") - except subprocess.CalledProcessError as e: - logger.error(f"Error running FFmpeg: {e}") - finally: - if temp_metadata_path and remove: - try: - remove(temp_metadata_path) - except Exception as ex: - logger.warning(f"Could not remove temp metadata file: {ex}") - - -def download_episode(configuration_settings: dict, episode_url: str): - logger.info("Extracting metadata") - episode_data = return_episode_data(episode_url) - - episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data) - - artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist") - - ffmpeg_chapters = None - if configuration_settings.get("cut_intro"): - ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings) - - logger.info("Downloading episode") - _download_audio(episode_url, episode_data, filename_timestamp, track, artist) - - if ffmpeg_chapters: - _cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist) - else: - rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3") - - logger.info("Finished") - - - - -if __name__ == "__main__": - show_name = sys.argv[1] - logger.info (f"Processing {show_name}") - episode_url = return_url_of_latest_episode(settings[show_name]["base_url"]) - download_episode(settings[show_name],episode_url) diff --git a/src/bbcr1/helper.py b/src/bbcr1/helper.py deleted file mode 100644 index def8157..0000000 --- a/src/bbcr1/helper.py +++ /dev/null @@ -1,52 +0,0 @@ -from datetime import datetime, timezone, timedelta -from typing import List, Dict - -import subprocess - - -def time_to_milliseconds(time,length_to_cut) -> int: - return int(time * 1000 - length_to_cut * 1000) - -def add_html_tags_to_description(input_text) -> str: - return("

"+input_text.replace("\n\n", "

\n

").replace("\n", "
")+"

") - - -def get_friday_number(extracted_timestamp) -> int: - dt = datetime.fromtimestamp(extracted_timestamp) - start_of_year = datetime(dt.year, 1, 1) - days_until_first_friday = (4 - start_of_year.weekday()) % 7 - first_friday = start_of_year + timedelta(days=days_until_first_friday) - fridays_passed = (dt - first_friday).days // 7 + 1 - return fridays_passed - -def return_url_of_latest_episode(base_url:str) -> str: - result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True) - latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:] - return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}") - -def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str: - """ - Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut. - - Args: - chapters (list): List of chapter dicts with "start_time", "end_time", and "title". - length_to_cut (int/float): Amount of time to cut from start, in seconds. - - Returns: - str: Chapters formatted as ffmpeg metadata. - """ - for entry in chapters: - if "start_time" in entry: - entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut) - if "end_time" in entry: - entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut) - - chapter_format = ";FFMETADATA1\n" - for entry in chapters: - chapter_format+=("[CHAPTER]\n") - chapter_format+=("TIMEBASE=1/1000\n") - chapter_format+=(f"START={entry['start_time']}\n") - chapter_format+=(f"END={entry['end_time']}\n") - chapter_format+=(f"title={entry['title']}\n\n") - - return(chapter_format) \ No newline at end of file diff --git a/src/bbcr1/ytdlp_helper.py b/src/bbcr1/ytdlp_helper.py deleted file mode 100644 index 5f0c121..0000000 --- a/src/bbcr1/ytdlp_helper.py +++ /dev/null @@ -1,47 +0,0 @@ -import yt_dlp -from helper import add_html_tags_to_description -from typing import List, Optional -from dataclasses import dataclass - -@dataclass -class EpisodeData: - chapters: List - extracted_description: str - extracted_id: str - extracted_title: str - extracted_timestamp: Optional[int] - - -def return_episode_data(episode_url: str) -> EpisodeData: - """ - Quietly extracts meta information about a given radio show. - - Args: - episode_url (str): The URL of the episode. - - Returns: - EpisodeData: A dataclass containing episode metadata: - - chapters (List): Chapters in JSON format. - - extracted_description (str): HTML-wrapped description of the episode. - - extracted_id (str): Unique episode ID. - - extracted_title (str): Episode title. - - extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available. - - """ - try: - with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl: - info_dict = ydl.extract_info(episode_url, download=False) - except Exception as e: - return {"error": f"Failed to extract info: {e}"} - - return EpisodeData( - chapters=info_dict.get("chapters", []), - extracted_description=add_html_tags_to_description(info_dict.get("description", "")), - extracted_id=info_dict.get("id", ""), - extracted_title=info_dict.get("title", ""), - extracted_timestamp=info_dict.get("timestamp"), - ) - - -if __name__ == "__main__": - print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt ")) \ No newline at end of file diff --git a/src/petetong/README.md b/src/petetong/README.md new file mode 100644 index 0000000..ff19f9c --- /dev/null +++ b/src/petetong/README.md @@ -0,0 +1,75 @@ +# Pete Tong BBC Radio Episode Downloader + +A Python script that automatically downloads the latest Pete Tong radio show from BBC iPlayer Radio, converts it to MP3 with metadata, and sends a push notification when complete. + +## Features + +- **Automatic Detection**: Finds the latest Pete Tong episode from BBC iPlayer +- **Audio Download**: Uses `get_iplayer` to download BBC Radio episodes +- **MP3 Conversion**: Converts to MP3 format with ffmpeg +- **Metadata Injection**: Adds title, artist, track number (week of year), date, and description +- **Push Notifications**: Sends notification to backend service when new episode is ready + +## Prerequisites + +- Python 3.8+ +- `get_iplayer` (BBC iPlayer downloader) +- `ffmpeg` and `ffprobe` (audio processing) +- Backend notification service + +## Installation + +### Install Python Dependencies + +```bash +pip install requests python-dotenv +``` + +### Install System Dependencies + +**Ubuntu/Debian:** +```bash +sudo apt install get-iplayer ffmpeg +``` + +**macOS:** +```bash +brew install get-iplayer ffmpeg +``` + +## Configuration + +Create a `.env` file with the following variables: + +```env +# Backend notification service +BACKEND_API_URL=http://localhost:30101/internal/receive-notifications +BACKEND_API_KEY=your_api_key_here +``` + +## Usage + +Run the script manually: +```bash +python download_episode.py +``` +Or schedule with cron and use the provided `grabEpisode.sh`(Saturday mornings at 9 AM): +```bash +0 9 * * 6 /path/to/script/grabEpisode.sh +``` + +## Output + +MP3 files are named: `YYYY-MM-DD-{episode_id}.mp3` + +Example: `2025-10-17-m00258br.mp3` + +## Metadata Structure + +| Field | Value | Example | +|-------|-------|---------| +| Title | Featured artist | "Solomun" | +| Artist | Pete Tong | "Pete Tong" | +| Track | Friday number | 42 (42nd Friday of year) | +| Date | ISO date | "2025-10-17" | +| Comment | Episode description | HTML formatted text | \ No newline at end of file diff --git a/src/petetong/download_episode.py b/src/petetong/download_episode.py new file mode 100644 index 0000000..13b20dc --- /dev/null +++ b/src/petetong/download_episode.py @@ -0,0 +1,141 @@ +from datetime import datetime, timedelta +import os +import subprocess +from dataclasses import dataclass +import json +from logger_handler import setup_logger +from send_notification import send_notification + +logger = setup_logger("PeteTongDownloader") + + +@dataclass +class EpisodeData: + description: str + title: str + timestamp: str + track: int + id: str + +def add_html_tags_to_description(input_text) -> str: + if not input_text: + return "" + return("

"+input_text.replace("\n\n", "

\n

").replace("\n", "
")+"

") + +def get_friday_number(iso_timestamp: str) -> int: + """ + Returns the week number of the Friday in the year for a given ISO timestamp string. + """ + try: + dt = datetime.fromisoformat(iso_timestamp) + start_of_year = datetime(dt.year, 1, 1, tzinfo=dt.tzinfo) + days_until_first_friday = (4 - start_of_year.weekday()) % 7 + first_friday = start_of_year + timedelta(days=days_until_first_friday) + fridays_passed = (dt - first_friday).days // 7 + 1 + return fridays_passed + except Exception as e: + logger.error(f"Failed to calculate Friday number from {iso_timestamp}: {e}") + return 0 + +def find_downloaded_file_name_via_id(directory: str, latest_episode_id: str) -> str | None: + for filename in os.listdir(directory): + if latest_episode_id in filename: + return filename + logger.warning(f"No file found containing episode ID {latest_episode_id} in {directory}") + return None + +def extract_metadata_from_downloaded_episode(file_name: str, episode_id: str) -> EpisodeData: + if not file_name or not os.path.exists(file_name): + logger.error(f"File not found: {file_name}") + raise FileNotFoundError(f"File not found: {file_name}") + + try: + result = subprocess.run( + ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", file_name], + capture_output=True, text=True, check=True + ) + ffprobe_data = json.loads(result.stdout) + metadata = ffprobe_data.get("format", {}).get("tags", {}) + + iso_timestamp = metadata.get("date", "1970-01-01T00:00:00") + return EpisodeData( + description=add_html_tags_to_description(metadata.get("lyrics", "")), + title=metadata.get("title", "Unknown Title"), + timestamp=iso_timestamp.split("T")[0], + track=get_friday_number(iso_timestamp), + id=episode_id + ) + except subprocess.CalledProcessError as e: + logger.error(f"ffprobe failed for {file_name}: {e.stderr}") + raise + except json.JSONDecodeError as e: + logger.error(f"Failed to parse ffprobe output for {file_name}: {e}") + raise + +def get_id_of_the_latest_episode(base_url: str) -> str: + try: + result = subprocess.run( + ["get_iplayer", "--pid-recursive-list", base_url], + capture_output=True, text=True, check=True + ) + lines = result.stdout.strip().split("\n") + if len(lines) < 3: + raise ValueError("get_iplayer output too short to find latest episode ID") + latest_episode_id = lines[-2].split(",")[-1].strip() + logger.info(f"Latest episode ID: {latest_episode_id}") + return latest_episode_id + except subprocess.CalledProcessError as e: + logger.error(f"get_iplayer failed: {e.stderr}") + raise + + +def download_episode_via_episode_id(episode_id: str) -> str: + script_dir = os.path.dirname(os.path.abspath(__file__)) + try: + logger.info(f"Downloading episode {episode_id}") + subprocess.run( + ["get_iplayer", f"--pid={episode_id}", "--type=radio"], + cwd=script_dir, check=True + ) + except subprocess.CalledProcessError as e: + logger.error(f"Download failed for {episode_id}: {e.stderr}") + raise + return script_dir + +def convert_episode_to_mp3(episode_data: EpisodeData, file_name: str): + output_file = f"{episode_data.timestamp}-{episode_data.id}.mp3" + ffmpeg_command = [ + "ffmpeg", "-i", file_name, + "-metadata", f"title={episode_data.title}", + "-metadata", f"artist=Pete Tong", + "-metadata", f"track={episode_data.track}", + "-metadata", f"date={episode_data.timestamp}", + "-metadata", f"comment={episode_data.description}", + output_file + ] + + try: + logger.info(f"Converting {file_name} to {output_file}") + subprocess.run(ffmpeg_command, check=True) + os.remove(file_name) + except subprocess.CalledProcessError as e: + logger.error(f"ffmpeg conversion failed: {e}") + raise + +def download_latest_pete_tong_episode(): + try: + base_url = "https://www.bbc.co.uk/programmes/b006ww0v" + episode_id = get_id_of_the_latest_episode(base_url) + download_episode_via_episode_id(episode_id) + script_dir = download_episode_via_episode_id(episode_id) + file_name = find_downloaded_file_name_via_id(script_dir, episode_id) + episode_data = extract_metadata_from_downloaded_episode(file_name, episode_id) + convert_episode_to_mp3(episode_data, file_name) + logger.info("Episode download and conversion completed successfully") + send_notification(episode_data.title) + logger.info("Notification sent") + except Exception as e: + logger.error(f"Failed to download latest Pete Tong episode: {e}", exc_info=True) + +if __name__ == "__main__": + download_latest_pete_tong_episode() \ No newline at end of file diff --git a/src/petetong/grabEpisode.sh b/src/petetong/grabEpisode.sh new file mode 100644 index 0000000..f86e5ab --- /dev/null +++ b/src/petetong/grabEpisode.sh @@ -0,0 +1,4 @@ +#!/bin/bash -e + +docker run --network host --rm -v /home/florian/github/service-podcasts/src/petetong:/app ytdlp:latest python3 /app/download_episode.py +mv /home/florian/github/scripts/audiobookshelf/bbc-downloader/*.mp3 "/var/lib/audiobookshelf/music/Pete Tong/" diff --git a/src/petetong/logger_handler.py b/src/petetong/logger_handler.py new file mode 100644 index 0000000..3911736 --- /dev/null +++ b/src/petetong/logger_handler.py @@ -0,0 +1,19 @@ +import logging +import os + +LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper() +if LOG_LEVEL not in {"ERROR", "DEBUG", "INFO", "WARNING", "CRITICAL"}: + LOG_LEVEL = "INFO" + +def setup_logger(name: str) -> logging.Logger: + logger = logging.getLogger(name) + if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.setLevel(getattr(logging, LOG_LEVEL)) + logger.debug(f"Logger {name} initialized with level {LOG_LEVEL}") + return logger diff --git a/src/petetong/send_notification.py b/src/petetong/send_notification.py new file mode 100644 index 0000000..e7bd8ee --- /dev/null +++ b/src/petetong/send_notification.py @@ -0,0 +1,60 @@ +import requests +from requests.exceptions import RequestException, Timeout, ConnectionError +import os +import time +from logger_handler import setup_logger +from dotenv import load_dotenv + +load_dotenv() +backend_api_url=os.getenv("BACKEND_API_URL","http://localhost:30101/internal/receive-notifications") +api_key= os.getenv("BACKEND_API_KEY") +logger = setup_logger(__name__) + + +def send_notification(body: str,max_retries: int = 5,timeout: int = 5): + """ + Sends a notification to the internal backend service when a new Pete Tong episode is out. + + Parameters: + body: Featured artist + """ + + headers = { + "X-API-Key-Internal": api_key, + "Content-Type": "application/json" + } + + title = "New Pete Tong episode is available" + data = { + "receipent_user_id": 1, + "message": { + "title": title, + "body": f"Featured artist: {body}", + "category":"mixtapes", + "timestamp": int(time.time()) + } + } + + logger.debug(f"[Notify] Preparing to send notification: title='{title}', body={body}") + with requests.Session() as session: + for attempt in range(1, max_retries + 1): + try: + logger.debug(f"[Notify] Sending request to backend (attempt {attempt}/{max_retries})") + response = session.post(backend_api_url, headers=headers, json=data, timeout=timeout) + response.raise_for_status() + logger.info(f"[Notify] Notification sent successfully for '{title}' (body {body})") + return + + except (Timeout, ConnectionError) as e: + logger.warning(f"[Notify] Attempt {attempt}/{max_retries} failed: {type(e).__name__}") + if attempt == max_retries: + logger.error(f"[Notify] All retry attempts failed for '{title}'") + else: + sleep_time = 2 ** (attempt - 1) + logger.debug(f"[Notify] Retrying in {sleep_time} seconds...") + time.sleep(sleep_time) + + except RequestException as e: + logger.error(f"[Notify] Unexpected request failure: {e}") + return +