Remade generic BBC R1 downloader into one tailored for Pete Tong

- `yt-dlp` stopped working so a switch to `get_iplayer` was necessary - Added sending a notification to the backend api - Added logging and general error handling
2025-10-19 20:40:27 +02:00 · 2025-10-19 20:40:27 +02:00 · 9686ae26e4
commit 9686ae26e4
parent 7da6b09981
11 changed files with 314 additions and 288 deletions
--- a/.gitignore
+++ b/.gitignore
@ -167,4 +167,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-
+*.mp3
 *.m4a
--- a/README.md
+++ b/README.md
@ -0,0 +1,13 @@
 # Media Download Scripts
 A collection of Python scripts for automatically downloading and processing podcast episodes from various sources.
 ### [Perun YouTube Downloader](src/perun/)
 Downloads the latest video from the Perun YouTube channel, converts to MP3 with metadata and sponsor segment removal, and uploads to a podcast server.
 ### [BBC Radio 1 Pete Tong Downloader](src/petetong/)
 Downloads the latest Pete Tong radio show from BBC iPlayer Radio, converts to MP3 with metadata, and sends notifications.
 ## Setup
 Each project has its own README with detailed installation and configuration instructions. Navigate to the respective directories for more information.
--- a/src/bbcr1/config.py
+++ b/src/bbcr1/config.py
@ -1,26 +0,0 @@
 settings = {
 	"Pete Tong":{
 		"artist": "Pete Tong",
 		"base_url":"https://www.bbc.co.uk/programmes/b006ww0v",
 		"cut_intro":True,
 		"modify_timestamp":7200,
 		"calculate_amount_of_fridays":True
 	},
 	"Radio 1s Classic Essential Mix":{
 		"artist":"Radio 1s Classic Essential Mix",
 		"use_different_release_date":True,
 		"base_url":"https://www.bbc.co.uk/programmes/b00f3pc4",
 		"cut_intro":True,
 		"remove_amount_of_characters_from_title":-5
 	},
 	"Defected on Radio 1 Dance":{
 		"artist": "Defected on Radio 1 Dance",
 		"base_url":"https://www.bbc.co.uk/programmes/m00287n1",
 		"remove_amount_of_characters_from_title":-10
 	},
 	"Radio 1s Essential Mix":{
 		"artist":"Radio 1s Essential Mix",
 		"base_url":"https://www.bbc.co.uk/programmes/b006wkfp",
 		"cut_intro":True
 	}
 }
--- a/src/bbcr1/get_episode.py
+++ b/src/bbcr1/get_episode.py
@ -1,162 +0,0 @@
 import yt_dlp
 import subprocess
 import tempfile
 import sys
 from datetime import datetime, timezone
 from config import settings
 from os import rename, remove
 from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode
 import logging
 from ytdlp_helper import return_episode_data
 logger = logging.getLogger(__name__)
 def _apply_configurations(configuration_settings: dict, episode_data):
 	"""
 	Apply configuration settings to episode data.
 	Returns:
 		tuple: (episode_data, filename_timestamp, track)
 	"""
 	if "remove_amount_of_characters_from_title" in configuration_settings:
 		amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"]
 		episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove]
 	if "modify_timestamp" in configuration_settings:
 		episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"]
 	if "use_different_release_date" in configuration_settings:
 		if len(sys.argv) > 2:
 			filename_timestamp = sys.argv[2]
 		else:
 			logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.")
 			filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
 	else:
 		filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
 	if "calculate_amount_of_fridays" in configuration_settings:
 		track = get_friday_number(episode_data.extracted_timestamp)
 	else:
 		track = filename_timestamp
 	return episode_data, filename_timestamp, track
 def _prepare_ffmpeg_chapters(episode_data, configuration_settings):
 	"""
 	Prepare chapters for FFmpeg if cutting intro is requested.
 	"""
 	if not episode_data.chapters or len(episode_data.chapters) < 2:
 		logger.warning("Cutting intro requested but no chapters found.")
 		return None
 	return modify_chapters_for_ffmpeg(
 		episode_data.chapters[1:], episode_data.chapters[0]["end_time"]
 	)
 def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str):
 	"""
 	Download episode audio using yt_dlp with metadata.
 	"""
 	ytdl_options = {
 		"quiet": True,
 		"noprogress": True,
 		"format": "bestaudio/best",
 		"extract_audio": True,
 		"audio_format": "mp3",
 		"outtmpl": f"{filename_timestamp}.%(ext)s",
 		"addmetadata": True,
 		"postprocessors": [
 			{
 				"key": "FFmpegExtractAudio",
 				"preferredcodec": "mp3",
 			},
 			{
 				"key": "FFmpegMetadata",
 			}
 		],
 		"postprocessor_args": [
 			"-metadata", f"title={episode_data.extracted_title}",
 			"-metadata", f"artist={artist}",
 			"-metadata", f"track={track}",
 			"-metadata", f"date={filename_timestamp}",
 			"-metadata", f"comment={episode_data.extracted_description}"
 		],
 		"merge_output_format": "mp3"
 	}
 	with yt_dlp.YoutubeDL(ytdl_options) as episode:
 		episode.download(episode_url)
 def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str):
 	"""
 	Cut the intro from the episode using FFmpeg and apply metadata.
 	"""
 	logger.info("Fixing chapters and metadata with FFmpeg")
 	temp_metadata_path = None
 	try:
 		with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file:
 			temp_file.write(ffmpeg_chapters)
 			temp_metadata_path = temp_file.name
 		ffmpeg_command = [
 			"ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]),
 			"-hide_banner", "-loglevel", "error",
 			"-i", f"{filename_timestamp}.mp3",
 			"-i", temp_metadata_path,
 			"-map_metadata", "1",
 			"-metadata", f"title={episode_data.extracted_title}",
 			"-metadata", f"artist={artist}",
 			"-metadata", f"track={track}",
 			"-metadata", f"date={filename_timestamp}",
 			"-metadata", f"comment={episode_data.extracted_description}",
 			"-codec", "copy",
 			f"{filename_timestamp}-{episode_data.extracted_id}.mp3"
 		]
 		subprocess.run(ffmpeg_command, check=True)
 		remove(f"{filename_timestamp}.mp3")
 	except subprocess.CalledProcessError as e:
 		logger.error(f"Error running FFmpeg: {e}")
 	finally:
 		if temp_metadata_path and remove:
 			try:
 				remove(temp_metadata_path)
 			except Exception as ex:
 				logger.warning(f"Could not remove temp metadata file: {ex}")
 def download_episode(configuration_settings: dict, episode_url: str):
 	logger.info("Extracting metadata")
 	episode_data = return_episode_data(episode_url)
 	episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data)
 	artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist")
 	ffmpeg_chapters = None
 	if configuration_settings.get("cut_intro"):
 		ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings)
 	logger.info("Downloading episode")
 	_download_audio(episode_url, episode_data, filename_timestamp, track, artist)
 	if ffmpeg_chapters:
 		_cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist)
 	else:
 		rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3")
 	logger.info("Finished")
 if __name__ == "__main__":
 	show_name = sys.argv[1]
 	logger.info (f"Processing {show_name}")
 	episode_url = return_url_of_latest_episode(settings[show_name]["base_url"])
 	download_episode(settings[show_name],episode_url)
--- a/src/bbcr1/helper.py
+++ b/src/bbcr1/helper.py
@ -1,52 +0,0 @@
 from datetime import datetime, timezone, timedelta
 from typing import List, Dict
 import subprocess
 def time_to_milliseconds(time,length_to_cut) -> int:
 	return  int(time * 1000 - length_to_cut * 1000)
 def add_html_tags_to_description(input_text) -> str:
 	return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
 def get_friday_number(extracted_timestamp) -> int:
 	dt = datetime.fromtimestamp(extracted_timestamp)
 	start_of_year = datetime(dt.year, 1, 1)
 	days_until_first_friday = (4 - start_of_year.weekday()) % 7
 	first_friday = start_of_year + timedelta(days=days_until_first_friday)
 	fridays_passed = (dt - first_friday).days // 7 + 1
 	return fridays_passed
 def return_url_of_latest_episode(base_url:str) -> str:
 	result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True)
 	latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:]
 	return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}")
 def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str:
 	"""
 	Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut.
 	Args:
 		chapters (list): List of chapter dicts with "start_time", "end_time", and "title".
 		length_to_cut (int/float): Amount of time to cut from start, in seconds.
 	Returns:
 		str: Chapters formatted as ffmpeg metadata.
 	"""
 	for entry in chapters:
 		if "start_time" in entry:
 			entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut)
 		if "end_time" in entry:
 			entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut)	
 	chapter_format = ";FFMETADATA1\n"
 	for entry in chapters:
 		chapter_format+=("[CHAPTER]\n")
 		chapter_format+=("TIMEBASE=1/1000\n")
 		chapter_format+=(f"START={entry['start_time']}\n")
 		chapter_format+=(f"END={entry['end_time']}\n")
 		chapter_format+=(f"title={entry['title']}\n\n")
 	return(chapter_format)
--- a/src/bbcr1/ytdlp_helper.py
+++ b/src/bbcr1/ytdlp_helper.py
@ -1,47 +0,0 @@
 import yt_dlp
 from helper import add_html_tags_to_description
 from typing import List, Optional
 from dataclasses import dataclass
@dataclass
 class EpisodeData:
 	chapters: List
 	extracted_description: str
 	extracted_id: str
 	extracted_title: str
 	extracted_timestamp: Optional[int]
 def return_episode_data(episode_url: str) -> EpisodeData:
 	"""
 	Quietly extracts meta information about a given radio show.
 	Args:
 		episode_url (str): The URL of the episode.
 	Returns:
 			EpisodeData: A dataclass containing episode metadata:
 				- chapters (List): Chapters in JSON format.
 				- extracted_description (str): HTML-wrapped description of the episode.
 				- extracted_id (str): Unique episode ID.
 				- extracted_title (str): Episode title.
 				- extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available.
 	"""
 	try:
 		with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl:
 			info_dict = ydl.extract_info(episode_url, download=False)
 	except Exception as e:
 		return {"error": f"Failed to extract info: {e}"}
 	return EpisodeData(
 		chapters=info_dict.get("chapters", []),
 		extracted_description=add_html_tags_to_description(info_dict.get("description", "")),
 		extracted_id=info_dict.get("id", ""),
 		extracted_title=info_dict.get("title", ""),
 		extracted_timestamp=info_dict.get("timestamp"),
 	)
 if __name__ == "__main__":
 	print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt	"))
--- a/src/petetong/README.md
+++ b/src/petetong/README.md
@ -0,0 +1,75 @@
 # Pete Tong BBC Radio Episode Downloader
 A Python script that automatically downloads the latest Pete Tong radio show from BBC iPlayer Radio, converts it to MP3 with metadata, and sends a push notification when complete.
 ## Features
 - **Automatic Detection**: Finds the latest Pete Tong episode from BBC iPlayer
 - **Audio Download**: Uses `get_iplayer` to download BBC Radio episodes
 - **MP3 Conversion**: Converts to MP3 format with ffmpeg
 - **Metadata Injection**: Adds title, artist, track number (week of year), date, and description
 - **Push Notifications**: Sends notification to backend service when new episode is ready
 ## Prerequisites
 - Python 3.8+
 - `get_iplayer` (BBC iPlayer downloader)
 - `ffmpeg` and `ffprobe` (audio processing)
 - Backend notification service
 ## Installation
 ### Install Python Dependencies
 ```bash
 pip install requests python-dotenv
 ```
 ### Install System Dependencies
 **Ubuntu/Debian:**
 ```bash
 sudo apt install get-iplayer ffmpeg
 ```
 **macOS:**
 ```bash
 brew install get-iplayer ffmpeg
 ```
 ## Configuration
 Create a `.env` file with the following variables:
 ```env
 # Backend notification service
 BACKEND_API_URL=http://localhost:30101/internal/receive-notifications
 BACKEND_API_KEY=your_api_key_here
 ```
 ## Usage
 Run the script manually:
 ```bash
 python download_episode.py
 ```
 Or schedule with cron and use the provided `grabEpisode.sh`(Saturday mornings at 9 AM):
 ```bash
 0 9 * * 6 /path/to/script/grabEpisode.sh
 ```
 ## Output
 MP3 files are named: `YYYY-MM-DD-{episode_id}.mp3`
 Example: `2025-10-17-m00258br.mp3`
 ## Metadata Structure
 | Field | Value | Example |
 |-------|-------|---------|
 | Title | Featured artist | "Solomun" |
 | Artist | Pete Tong | "Pete Tong" |
 | Track | Friday number | 42 (42nd Friday of year) |
 | Date | ISO date | "2025-10-17" |
 | Comment | Episode description | HTML formatted text |
--- a/src/petetong/download_episode.py
+++ b/src/petetong/download_episode.py
@ -0,0 +1,141 @@
 from datetime import datetime, timedelta
 import os
 import subprocess
 from dataclasses import dataclass
 import json
 from logger_handler import setup_logger
 from send_notification import send_notification
 logger = setup_logger("PeteTongDownloader")
@dataclass
 class EpisodeData:
 	description: str
 	title: str
 	timestamp: str
 	track: int
 	id: str
 def add_html_tags_to_description(input_text) -> str:
 	if not input_text:
 		return ""
 	return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
 def get_friday_number(iso_timestamp: str) -> int:
 	"""
 	Returns the week number of the Friday in the year for a given ISO timestamp string.
 	"""
 	try:
 		dt = datetime.fromisoformat(iso_timestamp)
 		start_of_year = datetime(dt.year, 1, 1, tzinfo=dt.tzinfo)
 		days_until_first_friday = (4 - start_of_year.weekday()) % 7
 		first_friday = start_of_year + timedelta(days=days_until_first_friday)
 		fridays_passed = (dt - first_friday).days // 7 + 1
 		return fridays_passed
 	except Exception as e:
 		logger.error(f"Failed to calculate Friday number from {iso_timestamp}: {e}")
 		return 0
 def find_downloaded_file_name_via_id(directory: str, latest_episode_id: str) -> str | None:
 	for filename in os.listdir(directory):
 		if latest_episode_id in filename:
 			return filename
 	logger.warning(f"No file found containing episode ID {latest_episode_id} in {directory}")
 	return None
 def extract_metadata_from_downloaded_episode(file_name: str, episode_id: str) -> EpisodeData:
 	if not file_name or not os.path.exists(file_name):
 		logger.error(f"File not found: {file_name}")
 		raise FileNotFoundError(f"File not found: {file_name}")
 	try:
 		result = subprocess.run(
 			["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", file_name],
 			capture_output=True, text=True, check=True
 		)
 		ffprobe_data = json.loads(result.stdout)
 		metadata = ffprobe_data.get("format", {}).get("tags", {})
 		iso_timestamp = metadata.get("date", "1970-01-01T00:00:00")
 		return EpisodeData(
 			description=add_html_tags_to_description(metadata.get("lyrics", "")),
 			title=metadata.get("title", "Unknown Title"),
 			timestamp=iso_timestamp.split("T")[0],
 			track=get_friday_number(iso_timestamp),
 			id=episode_id
 		)
 	except subprocess.CalledProcessError as e:
 		logger.error(f"ffprobe failed for {file_name}: {e.stderr}")
 		raise
 	except json.JSONDecodeError as e:
 		logger.error(f"Failed to parse ffprobe output for {file_name}: {e}")
 		raise
 def get_id_of_the_latest_episode(base_url: str) -> str:
 	try:
 		result = subprocess.run(
 			["get_iplayer", "--pid-recursive-list", base_url],
 			capture_output=True, text=True, check=True
 		)
 		lines = result.stdout.strip().split("\n")
 		if len(lines) < 3:
 			raise ValueError("get_iplayer output too short to find latest episode ID")
 		latest_episode_id = lines[-2].split(",")[-1].strip()
 		logger.info(f"Latest episode ID: {latest_episode_id}")
 		return latest_episode_id
 	except subprocess.CalledProcessError as e:
 		logger.error(f"get_iplayer failed: {e.stderr}")
 		raise
 def download_episode_via_episode_id(episode_id: str) -> str:
 	script_dir = os.path.dirname(os.path.abspath(__file__))
 	try:
 		logger.info(f"Downloading episode {episode_id}")
 		subprocess.run(
 			["get_iplayer", f"--pid={episode_id}", "--type=radio"],
 			cwd=script_dir, check=True
 		)
 	except subprocess.CalledProcessError as e:
 		logger.error(f"Download failed for {episode_id}: {e.stderr}")
 		raise
 	return script_dir
 def convert_episode_to_mp3(episode_data: EpisodeData, file_name: str):
 	output_file = f"{episode_data.timestamp}-{episode_data.id}.mp3"
 	ffmpeg_command = [
 		"ffmpeg", "-i", file_name,
 		"-metadata", f"title={episode_data.title}",
 		"-metadata", f"artist=Pete Tong",
 		"-metadata", f"track={episode_data.track}",
 		"-metadata", f"date={episode_data.timestamp}",
 		"-metadata", f"comment={episode_data.description}",
 		output_file
 	]
 	try:
 		logger.info(f"Converting {file_name} to {output_file}")
 		subprocess.run(ffmpeg_command, check=True)
 		os.remove(file_name)
 	except subprocess.CalledProcessError as e:
 		logger.error(f"ffmpeg conversion failed: {e}")
 		raise
 def download_latest_pete_tong_episode():
 	try:
 		base_url = "https://www.bbc.co.uk/programmes/b006ww0v"
 		episode_id = get_id_of_the_latest_episode(base_url)
 		download_episode_via_episode_id(episode_id)
 		script_dir = download_episode_via_episode_id(episode_id)
 		file_name = find_downloaded_file_name_via_id(script_dir, episode_id)
 		episode_data = extract_metadata_from_downloaded_episode(file_name, episode_id)
 		convert_episode_to_mp3(episode_data, file_name)
 		logger.info("Episode download and conversion completed successfully")
 		send_notification(episode_data.title)
 		logger.info("Notification sent")
 	except Exception as e:
 		logger.error(f"Failed to download latest Pete Tong episode: {e}", exc_info=True)
 if __name__ == "__main__":
 	download_latest_pete_tong_episode()
--- a/src/petetong/grabEpisode.sh
+++ b/src/petetong/grabEpisode.sh
@ -0,0 +1,4 @@
 #!/bin/bash -e
 docker run --network host --rm -v /home/florian/github/service-podcasts/src/petetong:/app ytdlp:latest python3 /app/download_episode.py
 mv /home/florian/github/scripts/audiobookshelf/bbc-downloader/*.mp3 "/var/lib/audiobookshelf/music/Pete Tong/"
--- a/src/petetong/logger_handler.py
+++ b/src/petetong/logger_handler.py
@ -0,0 +1,19 @@
 import logging
 import os
 LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
 if LOG_LEVEL not in {"ERROR", "DEBUG", "INFO", "WARNING", "CRITICAL"}:
 	LOG_LEVEL = "INFO"
 def setup_logger(name: str) -> logging.Logger:
 	logger = logging.getLogger(name)
 	if not logger.handlers:
 		handler = logging.StreamHandler()
 		formatter = logging.Formatter(
 			'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 		)
 		handler.setFormatter(formatter)
 		logger.addHandler(handler)
 	logger.setLevel(getattr(logging, LOG_LEVEL))
 	logger.debug(f"Logger {name} initialized with level {LOG_LEVEL}")
 	return logger
--- a/src/petetong/send_notification.py
+++ b/src/petetong/send_notification.py
@ -0,0 +1,60 @@
 import requests
 from requests.exceptions import RequestException, Timeout, ConnectionError
 import os
 import time
 from logger_handler import setup_logger
 from dotenv import load_dotenv
 load_dotenv()
 backend_api_url=os.getenv("BACKEND_API_URL","http://localhost:30101/internal/receive-notifications")
 api_key= os.getenv("BACKEND_API_KEY")
 logger = setup_logger(__name__)
 def send_notification(body: str,max_retries: int = 5,timeout: int = 5):
 	"""
 	Sends a notification to the internal backend service when a new Pete Tong episode is out.
 	Parameters:
 		body: Featured artist
 	"""
 	headers = {
 		"X-API-Key-Internal": api_key,
 		"Content-Type": "application/json"
 	}
 	title = "New Pete Tong episode is available"
 	data = {
 		"receipent_user_id": 1,
 		"message": {
 			"title": title,
 			"body": f"Featured artist: {body}",
 			"category":"mixtapes",
 			"timestamp": int(time.time())
 		}
 	}
 	logger.debug(f"[Notify] Preparing to send notification: title='{title}', body={body}")
 	with requests.Session() as session:
 		for attempt in range(1, max_retries + 1):
 			try:
 				logger.debug(f"[Notify] Sending request to backend (attempt {attempt}/{max_retries})")
 				response = session.post(backend_api_url, headers=headers, json=data, timeout=timeout)
 				response.raise_for_status()
 				logger.info(f"[Notify] Notification sent successfully for '{title}' (body {body})")
 				return
 			except (Timeout, ConnectionError) as e:
 				logger.warning(f"[Notify] Attempt {attempt}/{max_retries} failed: {type(e).__name__}")
 				if attempt == max_retries:
 					logger.error(f"[Notify] All retry attempts failed for '{title}'")
 				else:
 					sleep_time = 2 ** (attempt - 1)
 					logger.debug(f"[Notify] Retrying in {sleep_time} seconds...")
 					time.sleep(sleep_time)
 			except RequestException as e:
 				logger.error(f"[Notify] Unexpected request failure: {e}")
 				return