Imported BBC Radio1 handler and heavily rewrote it.

Refactored metadata extraction -> created an EpisodeData dataclass to represent metadata clearly.
Modularized download process: Broke down huge main function into several parts.
Added error handling for yt_dlp.
Added docstrings to functions for ease of understanding.
This commit is contained in:
Florian 2025-10-10 17:29:42 +02:00
parent 0e127670d9
commit a47d6667bc
5 changed files with 287 additions and 0 deletions

Binary file not shown.

26
src/bbcr1/config.py Normal file
View File

@ -0,0 +1,26 @@
settings = {
"Pete Tong":{
"artist": "Pete Tong",
"base_url":"https://www.bbc.co.uk/programmes/b006ww0v",
"cut_intro":True,
"modify_timestamp":7200,
"calculate_amount_of_fridays":True
},
"Radio 1s Classic Essential Mix":{
"artist":"Radio 1s Classic Essential Mix",
"use_different_release_date":True,
"base_url":"https://www.bbc.co.uk/programmes/b00f3pc4",
"cut_intro":True,
"remove_amount_of_characters_from_title":-5
},
"Defected on Radio 1 Dance":{
"artist": "Defected on Radio 1 Dance",
"base_url":"https://www.bbc.co.uk/programmes/m00287n1",
"remove_amount_of_characters_from_title":-10
},
"Radio 1s Essential Mix":{
"artist":"Radio 1s Essential Mix",
"base_url":"https://www.bbc.co.uk/programmes/b006wkfp",
"cut_intro":True
}
}

162
src/bbcr1/get_episode.py Normal file
View File

@ -0,0 +1,162 @@
import yt_dlp
import subprocess
import tempfile
import sys
from datetime import datetime, timezone
from config import settings
from os import rename, remove
from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode
import logging
from ytdlp_helper import return_episode_data
logger = logging.getLogger(__name__)
def _apply_configurations(configuration_settings: dict, episode_data):
"""
Apply configuration settings to episode data.
Returns:
tuple: (episode_data, filename_timestamp, track)
"""
if "remove_amount_of_characters_from_title" in configuration_settings:
amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"]
episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove]
if "modify_timestamp" in configuration_settings:
episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"]
if "use_different_release_date" in configuration_settings:
if len(sys.argv) > 2:
filename_timestamp = sys.argv[2]
else:
logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.")
filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
else:
filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
if "calculate_amount_of_fridays" in configuration_settings:
track = get_friday_number(episode_data.extracted_timestamp)
else:
track = filename_timestamp
return episode_data, filename_timestamp, track
def _prepare_ffmpeg_chapters(episode_data, configuration_settings):
"""
Prepare chapters for FFmpeg if cutting intro is requested.
"""
if not episode_data.chapters or len(episode_data.chapters) < 2:
logger.warning("Cutting intro requested but no chapters found.")
return None
return modify_chapters_for_ffmpeg(
episode_data.chapters[1:], episode_data.chapters[0]["end_time"]
)
def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str):
"""
Download episode audio using yt_dlp with metadata.
"""
ytdl_options = {
"quiet": True,
"noprogress": True,
"format": "bestaudio/best",
"extract_audio": True,
"audio_format": "mp3",
"outtmpl": f"{filename_timestamp}.%(ext)s",
"addmetadata": True,
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
},
{
"key": "FFmpegMetadata",
}
],
"postprocessor_args": [
"-metadata", f"title={episode_data.extracted_title}",
"-metadata", f"artist={artist}",
"-metadata", f"track={track}",
"-metadata", f"date={filename_timestamp}",
"-metadata", f"comment={episode_data.extracted_description}"
],
"merge_output_format": "mp3"
}
with yt_dlp.YoutubeDL(ytdl_options) as episode:
episode.download(episode_url)
def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str):
"""
Cut the intro from the episode using FFmpeg and apply metadata.
"""
logger.info("Fixing chapters and metadata with FFmpeg")
temp_metadata_path = None
try:
with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file:
temp_file.write(ffmpeg_chapters)
temp_metadata_path = temp_file.name
ffmpeg_command = [
"ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]),
"-hide_banner", "-loglevel", "error",
"-i", f"{filename_timestamp}.mp3",
"-i", temp_metadata_path,
"-map_metadata", "1",
"-metadata", f"title={episode_data.extracted_title}",
"-metadata", f"artist={artist}",
"-metadata", f"track={track}",
"-metadata", f"date={filename_timestamp}",
"-metadata", f"comment={episode_data.extracted_description}",
"-codec", "copy",
f"{filename_timestamp}-{episode_data.extracted_id}.mp3"
]
subprocess.run(ffmpeg_command, check=True)
remove(f"{filename_timestamp}.mp3")
except subprocess.CalledProcessError as e:
logger.error(f"Error running FFmpeg: {e}")
finally:
if temp_metadata_path and remove:
try:
remove(temp_metadata_path)
except Exception as ex:
logger.warning(f"Could not remove temp metadata file: {ex}")
def download_episode(configuration_settings: dict, episode_url: str):
logger.info("Extracting metadata")
episode_data = return_episode_data(episode_url)
episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data)
artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist")
ffmpeg_chapters = None
if configuration_settings.get("cut_intro"):
ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings)
logger.info("Downloading episode")
_download_audio(episode_url, episode_data, filename_timestamp, track, artist)
if ffmpeg_chapters:
_cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist)
else:
rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3")
logger.info("Finished")
if __name__ == "__main__":
show_name = sys.argv[1]
logger.info (f"Processing {show_name}")
episode_url = return_url_of_latest_episode(settings[show_name]["base_url"])
download_episode(settings[show_name],episode_url)

52
src/bbcr1/helper.py Normal file
View File

@ -0,0 +1,52 @@
from datetime import datetime, timezone, timedelta
from typing import List, Dict
import subprocess
def time_to_milliseconds(time,length_to_cut) -> int:
return int(time * 1000 - length_to_cut * 1000)
def add_html_tags_to_description(input_text) -> str:
return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
def get_friday_number(extracted_timestamp) -> int:
dt = datetime.fromtimestamp(extracted_timestamp)
start_of_year = datetime(dt.year, 1, 1)
days_until_first_friday = (4 - start_of_year.weekday()) % 7
first_friday = start_of_year + timedelta(days=days_until_first_friday)
fridays_passed = (dt - first_friday).days // 7 + 1
return fridays_passed
def return_url_of_latest_episode(base_url:str) -> str:
result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True)
latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:]
return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}")
def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str:
"""
Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut.
Args:
chapters (list): List of chapter dicts with "start_time", "end_time", and "title".
length_to_cut (int/float): Amount of time to cut from start, in seconds.
Returns:
str: Chapters formatted as ffmpeg metadata.
"""
for entry in chapters:
if "start_time" in entry:
entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut)
if "end_time" in entry:
entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut)
chapter_format = ";FFMETADATA1\n"
for entry in chapters:
chapter_format+=("[CHAPTER]\n")
chapter_format+=("TIMEBASE=1/1000\n")
chapter_format+=(f"START={entry['start_time']}\n")
chapter_format+=(f"END={entry['end_time']}\n")
chapter_format+=(f"title={entry['title']}\n\n")
return(chapter_format)

47
src/bbcr1/ytdlp_helper.py Normal file
View File

@ -0,0 +1,47 @@
import yt_dlp
from helper import add_html_tags_to_description
from typing import List, Optional
from dataclasses import dataclass
@dataclass
class EpisodeData:
chapters: List
extracted_description: str
extracted_id: str
extracted_title: str
extracted_timestamp: Optional[int]
def return_episode_data(episode_url: str) -> EpisodeData:
"""
Quietly extracts meta information about a given radio show.
Args:
episode_url (str): The URL of the episode.
Returns:
EpisodeData: A dataclass containing episode metadata:
- chapters (List): Chapters in JSON format.
- extracted_description (str): HTML-wrapped description of the episode.
- extracted_id (str): Unique episode ID.
- extracted_title (str): Episode title.
- extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available.
"""
try:
with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl:
info_dict = ydl.extract_info(episode_url, download=False)
except Exception as e:
return {"error": f"Failed to extract info: {e}"}
return EpisodeData(
chapters=info_dict.get("chapters", []),
extracted_description=add_html_tags_to_description(info_dict.get("description", "")),
extracted_id=info_dict.get("id", ""),
extracted_title=info_dict.get("title", ""),
extracted_timestamp=info_dict.get("timestamp"),
)
if __name__ == "__main__":
print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt "))