Imported BBC Radio1 handler and heavily rewrote it.

Refactored metadata extraction -> created an EpisodeData dataclass to represent metadata clearly. Modularized download process: Broke down huge main function into several parts. Added error handling for yt_dlp. Added docstrings to functions for ease of understanding.
2025-10-10 17:29:42 +02:00 · 2025-10-10 17:29:42 +02:00 · a47d6667bc
commit a47d6667bc
parent 0e127670d9
5 changed files with 287 additions and 0 deletions
--- a/src/bbcr1/pycache/helper.cpython-312.pyc
+++ b/src/bbcr1/pycache/helper.cpython-312.pyc
--- a/src/bbcr1/config.py
+++ b/src/bbcr1/config.py
@ -0,0 +1,26 @@
+settings = {
+	"Pete Tong":{
+		"artist": "Pete Tong",
+		"base_url":"https://www.bbc.co.uk/programmes/b006ww0v",
+		"cut_intro":True,
+		"modify_timestamp":7200,
+		"calculate_amount_of_fridays":True
+	},
+	"Radio 1s Classic Essential Mix":{
+		"artist":"Radio 1s Classic Essential Mix",
+		"use_different_release_date":True,
+		"base_url":"https://www.bbc.co.uk/programmes/b00f3pc4",
+		"cut_intro":True,
+		"remove_amount_of_characters_from_title":-5
+	},
+	"Defected on Radio 1 Dance":{
+		"artist": "Defected on Radio 1 Dance",
+		"base_url":"https://www.bbc.co.uk/programmes/m00287n1",
+		"remove_amount_of_characters_from_title":-10
+	},
+	"Radio 1s Essential Mix":{
+		"artist":"Radio 1s Essential Mix",
+		"base_url":"https://www.bbc.co.uk/programmes/b006wkfp",
+		"cut_intro":True
+	}
+}
--- a/src/bbcr1/get_episode.py
+++ b/src/bbcr1/get_episode.py
@ -0,0 +1,162 @@
+import yt_dlp
+import subprocess
+import tempfile
+import sys
+from datetime import datetime, timezone
+from config import settings
+from os import rename, remove
+from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode
+import logging
+from ytdlp_helper import return_episode_data
+
+logger = logging.getLogger(__name__)
+
+
+def _apply_configurations(configuration_settings: dict, episode_data):
+	"""
+	Apply configuration settings to episode data.
+
+	Returns:
+		tuple: (episode_data, filename_timestamp, track)
+	"""
+	if "remove_amount_of_characters_from_title" in configuration_settings:
+		amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"]
+		episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove]
+
+	if "modify_timestamp" in configuration_settings:
+		episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"]
+
+	if "use_different_release_date" in configuration_settings:
+		if len(sys.argv) > 2:
+			filename_timestamp = sys.argv[2]
+		else:
+			logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.")
+			filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
+	else:
+		filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
+
+	if "calculate_amount_of_fridays" in configuration_settings:
+		track = get_friday_number(episode_data.extracted_timestamp)
+	else:
+		track = filename_timestamp
+
+	return episode_data, filename_timestamp, track
+
+
+def _prepare_ffmpeg_chapters(episode_data, configuration_settings):
+	"""
+	Prepare chapters for FFmpeg if cutting intro is requested.
+	"""
+	if not episode_data.chapters or len(episode_data.chapters) < 2:
+		logger.warning("Cutting intro requested but no chapters found.")
+		return None
+
+	return modify_chapters_for_ffmpeg(
+		episode_data.chapters[1:], episode_data.chapters[0]["end_time"]
+	)
+
+
+def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str):
+	"""
+	Download episode audio using yt_dlp with metadata.
+	"""
+	ytdl_options = {
+		"quiet": True,
+		"noprogress": True,
+		"format": "bestaudio/best",
+		"extract_audio": True,
+		"audio_format": "mp3",
+		"outtmpl": f"{filename_timestamp}.%(ext)s",
+		"addmetadata": True,
+		"postprocessors": [
+			{
+				"key": "FFmpegExtractAudio",
+				"preferredcodec": "mp3",
+			},
+			{
+				"key": "FFmpegMetadata",
+			}
+		],
+		"postprocessor_args": [
+			"-metadata", f"title={episode_data.extracted_title}",
+			"-metadata", f"artist={artist}",
+			"-metadata", f"track={track}",
+			"-metadata", f"date={filename_timestamp}",
+			"-metadata", f"comment={episode_data.extracted_description}"
+		],
+		"merge_output_format": "mp3"
+	}
+
+	with yt_dlp.YoutubeDL(ytdl_options) as episode:
+		episode.download(episode_url)
+
+
+def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str):
+	"""
+	Cut the intro from the episode using FFmpeg and apply metadata.
+	"""
+	logger.info("Fixing chapters and metadata with FFmpeg")
+
+	temp_metadata_path = None
+	try:
+		with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file:
+			temp_file.write(ffmpeg_chapters)
+			temp_metadata_path = temp_file.name
+
+		ffmpeg_command = [
+			"ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]),
+			"-hide_banner", "-loglevel", "error",
+			"-i", f"{filename_timestamp}.mp3",
+			"-i", temp_metadata_path,
+			"-map_metadata", "1",
+			"-metadata", f"title={episode_data.extracted_title}",
+			"-metadata", f"artist={artist}",
+			"-metadata", f"track={track}",
+			"-metadata", f"date={filename_timestamp}",
+			"-metadata", f"comment={episode_data.extracted_description}",
+			"-codec", "copy",
+			f"{filename_timestamp}-{episode_data.extracted_id}.mp3"
+		]
+
+		subprocess.run(ffmpeg_command, check=True)
+		remove(f"{filename_timestamp}.mp3")
+	except subprocess.CalledProcessError as e:
+		logger.error(f"Error running FFmpeg: {e}")
+	finally:
+		if temp_metadata_path and remove:
+			try:
+				remove(temp_metadata_path)
+			except Exception as ex:
+				logger.warning(f"Could not remove temp metadata file: {ex}")
+
+
+def download_episode(configuration_settings: dict, episode_url: str):
+	logger.info("Extracting metadata")
+	episode_data = return_episode_data(episode_url)
+
+	episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data)
+
+	artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist")
+
+	ffmpeg_chapters = None
+	if configuration_settings.get("cut_intro"):
+		ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings)
+
+	logger.info("Downloading episode")
+	_download_audio(episode_url, episode_data, filename_timestamp, track, artist)
+
+	if ffmpeg_chapters:
+		_cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist)
+	else:
+		rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3")
+
+	logger.info("Finished")
+
+
+
+	
+if __name__ == "__main__":
+	show_name = sys.argv[1]
+	logger.info (f"Processing {show_name}")
+	episode_url = return_url_of_latest_episode(settings[show_name]["base_url"])
+	download_episode(settings[show_name],episode_url)
--- a/src/bbcr1/helper.py
+++ b/src/bbcr1/helper.py
@ -0,0 +1,52 @@
+from datetime import datetime, timezone, timedelta
+from typing import List, Dict
+
+import subprocess
+
+
+def time_to_milliseconds(time,length_to_cut) -> int:
+	return  int(time * 1000 - length_to_cut * 1000)
+
+def add_html_tags_to_description(input_text) -> str:
+	return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
+	
+
+def get_friday_number(extracted_timestamp) -> int:
+	dt = datetime.fromtimestamp(extracted_timestamp)
+	start_of_year = datetime(dt.year, 1, 1)
+	days_until_first_friday = (4 - start_of_year.weekday()) % 7
+	first_friday = start_of_year + timedelta(days=days_until_first_friday)
+	fridays_passed = (dt - first_friday).days // 7 + 1
+	return fridays_passed
+
+def return_url_of_latest_episode(base_url:str) -> str:
+	result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True)
+	latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:]
+	return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}")
+
+def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str:
+	"""
+	Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut.
+
+	Args:
+		chapters (list): List of chapter dicts with "start_time", "end_time", and "title".
+		length_to_cut (int/float): Amount of time to cut from start, in seconds.
+
+	Returns:
+		str: Chapters formatted as ffmpeg metadata.
+	"""
+	for entry in chapters:
+		if "start_time" in entry:
+			entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut)
+		if "end_time" in entry:
+			entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut)	
+
+	chapter_format = ";FFMETADATA1\n"
+	for entry in chapters:
+		chapter_format+=("[CHAPTER]\n")
+		chapter_format+=("TIMEBASE=1/1000\n")
+		chapter_format+=(f"START={entry['start_time']}\n")
+		chapter_format+=(f"END={entry['end_time']}\n")
+		chapter_format+=(f"title={entry['title']}\n\n")
+
+	return(chapter_format)
--- a/src/bbcr1/ytdlp_helper.py
+++ b/src/bbcr1/ytdlp_helper.py
@ -0,0 +1,47 @@
+import yt_dlp
+from helper import add_html_tags_to_description
+from typing import List, Optional
+from dataclasses import dataclass
+
+@dataclass
+class EpisodeData:
+	chapters: List
+	extracted_description: str
+	extracted_id: str
+	extracted_title: str
+	extracted_timestamp: Optional[int]
+
+
+def return_episode_data(episode_url: str) -> EpisodeData:
+	"""
+	Quietly extracts meta information about a given radio show.
+
+	Args:
+		episode_url (str): The URL of the episode.
+
+	Returns:
+			EpisodeData: A dataclass containing episode metadata:
+				- chapters (List): Chapters in JSON format.
+				- extracted_description (str): HTML-wrapped description of the episode.
+				- extracted_id (str): Unique episode ID.
+				- extracted_title (str): Episode title.
+				- extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available.
+		
+	"""
+	try:
+		with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl:
+			info_dict = ydl.extract_info(episode_url, download=False)
+	except Exception as e:
+		return {"error": f"Failed to extract info: {e}"}
+
+	return EpisodeData(
+		chapters=info_dict.get("chapters", []),
+		extracted_description=add_html_tags_to_description(info_dict.get("description", "")),
+		extracted_id=info_dict.get("id", ""),
+		extracted_title=info_dict.get("title", ""),
+		extracted_timestamp=info_dict.get("timestamp"),
+	)
+
+
+if __name__ == "__main__":
+	print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt	"))