Imported BBC Radio1 handler and heavily rewrote it.
Refactored metadata extraction -> created an EpisodeData dataclass to represent metadata clearly. Modularized download process: Broke down huge main function into several parts. Added error handling for yt_dlp. Added docstrings to functions for ease of understanding.
This commit is contained in:
parent
0e127670d9
commit
a47d6667bc
BIN
src/bbcr1/__pycache__/helper.cpython-312.pyc
Normal file
BIN
src/bbcr1/__pycache__/helper.cpython-312.pyc
Normal file
Binary file not shown.
26
src/bbcr1/config.py
Normal file
26
src/bbcr1/config.py
Normal file
@ -0,0 +1,26 @@
|
||||
settings = {
|
||||
"Pete Tong":{
|
||||
"artist": "Pete Tong",
|
||||
"base_url":"https://www.bbc.co.uk/programmes/b006ww0v",
|
||||
"cut_intro":True,
|
||||
"modify_timestamp":7200,
|
||||
"calculate_amount_of_fridays":True
|
||||
},
|
||||
"Radio 1s Classic Essential Mix":{
|
||||
"artist":"Radio 1s Classic Essential Mix",
|
||||
"use_different_release_date":True,
|
||||
"base_url":"https://www.bbc.co.uk/programmes/b00f3pc4",
|
||||
"cut_intro":True,
|
||||
"remove_amount_of_characters_from_title":-5
|
||||
},
|
||||
"Defected on Radio 1 Dance":{
|
||||
"artist": "Defected on Radio 1 Dance",
|
||||
"base_url":"https://www.bbc.co.uk/programmes/m00287n1",
|
||||
"remove_amount_of_characters_from_title":-10
|
||||
},
|
||||
"Radio 1s Essential Mix":{
|
||||
"artist":"Radio 1s Essential Mix",
|
||||
"base_url":"https://www.bbc.co.uk/programmes/b006wkfp",
|
||||
"cut_intro":True
|
||||
}
|
||||
}
|
||||
162
src/bbcr1/get_episode.py
Normal file
162
src/bbcr1/get_episode.py
Normal file
@ -0,0 +1,162 @@
|
||||
import yt_dlp
|
||||
import subprocess
|
||||
import tempfile
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from config import settings
|
||||
from os import rename, remove
|
||||
from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode
|
||||
import logging
|
||||
from ytdlp_helper import return_episode_data
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _apply_configurations(configuration_settings: dict, episode_data):
|
||||
"""
|
||||
Apply configuration settings to episode data.
|
||||
|
||||
Returns:
|
||||
tuple: (episode_data, filename_timestamp, track)
|
||||
"""
|
||||
if "remove_amount_of_characters_from_title" in configuration_settings:
|
||||
amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"]
|
||||
episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove]
|
||||
|
||||
if "modify_timestamp" in configuration_settings:
|
||||
episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"]
|
||||
|
||||
if "use_different_release_date" in configuration_settings:
|
||||
if len(sys.argv) > 2:
|
||||
filename_timestamp = sys.argv[2]
|
||||
else:
|
||||
logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.")
|
||||
filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||
else:
|
||||
filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
if "calculate_amount_of_fridays" in configuration_settings:
|
||||
track = get_friday_number(episode_data.extracted_timestamp)
|
||||
else:
|
||||
track = filename_timestamp
|
||||
|
||||
return episode_data, filename_timestamp, track
|
||||
|
||||
|
||||
def _prepare_ffmpeg_chapters(episode_data, configuration_settings):
|
||||
"""
|
||||
Prepare chapters for FFmpeg if cutting intro is requested.
|
||||
"""
|
||||
if not episode_data.chapters or len(episode_data.chapters) < 2:
|
||||
logger.warning("Cutting intro requested but no chapters found.")
|
||||
return None
|
||||
|
||||
return modify_chapters_for_ffmpeg(
|
||||
episode_data.chapters[1:], episode_data.chapters[0]["end_time"]
|
||||
)
|
||||
|
||||
|
||||
def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str):
|
||||
"""
|
||||
Download episode audio using yt_dlp with metadata.
|
||||
"""
|
||||
ytdl_options = {
|
||||
"quiet": True,
|
||||
"noprogress": True,
|
||||
"format": "bestaudio/best",
|
||||
"extract_audio": True,
|
||||
"audio_format": "mp3",
|
||||
"outtmpl": f"{filename_timestamp}.%(ext)s",
|
||||
"addmetadata": True,
|
||||
"postprocessors": [
|
||||
{
|
||||
"key": "FFmpegExtractAudio",
|
||||
"preferredcodec": "mp3",
|
||||
},
|
||||
{
|
||||
"key": "FFmpegMetadata",
|
||||
}
|
||||
],
|
||||
"postprocessor_args": [
|
||||
"-metadata", f"title={episode_data.extracted_title}",
|
||||
"-metadata", f"artist={artist}",
|
||||
"-metadata", f"track={track}",
|
||||
"-metadata", f"date={filename_timestamp}",
|
||||
"-metadata", f"comment={episode_data.extracted_description}"
|
||||
],
|
||||
"merge_output_format": "mp3"
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(ytdl_options) as episode:
|
||||
episode.download(episode_url)
|
||||
|
||||
|
||||
def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str):
|
||||
"""
|
||||
Cut the intro from the episode using FFmpeg and apply metadata.
|
||||
"""
|
||||
logger.info("Fixing chapters and metadata with FFmpeg")
|
||||
|
||||
temp_metadata_path = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file:
|
||||
temp_file.write(ffmpeg_chapters)
|
||||
temp_metadata_path = temp_file.name
|
||||
|
||||
ffmpeg_command = [
|
||||
"ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]),
|
||||
"-hide_banner", "-loglevel", "error",
|
||||
"-i", f"{filename_timestamp}.mp3",
|
||||
"-i", temp_metadata_path,
|
||||
"-map_metadata", "1",
|
||||
"-metadata", f"title={episode_data.extracted_title}",
|
||||
"-metadata", f"artist={artist}",
|
||||
"-metadata", f"track={track}",
|
||||
"-metadata", f"date={filename_timestamp}",
|
||||
"-metadata", f"comment={episode_data.extracted_description}",
|
||||
"-codec", "copy",
|
||||
f"{filename_timestamp}-{episode_data.extracted_id}.mp3"
|
||||
]
|
||||
|
||||
subprocess.run(ffmpeg_command, check=True)
|
||||
remove(f"{filename_timestamp}.mp3")
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Error running FFmpeg: {e}")
|
||||
finally:
|
||||
if temp_metadata_path and remove:
|
||||
try:
|
||||
remove(temp_metadata_path)
|
||||
except Exception as ex:
|
||||
logger.warning(f"Could not remove temp metadata file: {ex}")
|
||||
|
||||
|
||||
def download_episode(configuration_settings: dict, episode_url: str):
|
||||
logger.info("Extracting metadata")
|
||||
episode_data = return_episode_data(episode_url)
|
||||
|
||||
episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data)
|
||||
|
||||
artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist")
|
||||
|
||||
ffmpeg_chapters = None
|
||||
if configuration_settings.get("cut_intro"):
|
||||
ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings)
|
||||
|
||||
logger.info("Downloading episode")
|
||||
_download_audio(episode_url, episode_data, filename_timestamp, track, artist)
|
||||
|
||||
if ffmpeg_chapters:
|
||||
_cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist)
|
||||
else:
|
||||
rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3")
|
||||
|
||||
logger.info("Finished")
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
show_name = sys.argv[1]
|
||||
logger.info (f"Processing {show_name}")
|
||||
episode_url = return_url_of_latest_episode(settings[show_name]["base_url"])
|
||||
download_episode(settings[show_name],episode_url)
|
||||
52
src/bbcr1/helper.py
Normal file
52
src/bbcr1/helper.py
Normal file
@ -0,0 +1,52 @@
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from typing import List, Dict
|
||||
|
||||
import subprocess
|
||||
|
||||
|
||||
def time_to_milliseconds(time,length_to_cut) -> int:
|
||||
return int(time * 1000 - length_to_cut * 1000)
|
||||
|
||||
def add_html_tags_to_description(input_text) -> str:
|
||||
return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
|
||||
|
||||
|
||||
def get_friday_number(extracted_timestamp) -> int:
|
||||
dt = datetime.fromtimestamp(extracted_timestamp)
|
||||
start_of_year = datetime(dt.year, 1, 1)
|
||||
days_until_first_friday = (4 - start_of_year.weekday()) % 7
|
||||
first_friday = start_of_year + timedelta(days=days_until_first_friday)
|
||||
fridays_passed = (dt - first_friday).days // 7 + 1
|
||||
return fridays_passed
|
||||
|
||||
def return_url_of_latest_episode(base_url:str) -> str:
|
||||
result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True)
|
||||
latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:]
|
||||
return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}")
|
||||
|
||||
def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str:
|
||||
"""
|
||||
Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut.
|
||||
|
||||
Args:
|
||||
chapters (list): List of chapter dicts with "start_time", "end_time", and "title".
|
||||
length_to_cut (int/float): Amount of time to cut from start, in seconds.
|
||||
|
||||
Returns:
|
||||
str: Chapters formatted as ffmpeg metadata.
|
||||
"""
|
||||
for entry in chapters:
|
||||
if "start_time" in entry:
|
||||
entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut)
|
||||
if "end_time" in entry:
|
||||
entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut)
|
||||
|
||||
chapter_format = ";FFMETADATA1\n"
|
||||
for entry in chapters:
|
||||
chapter_format+=("[CHAPTER]\n")
|
||||
chapter_format+=("TIMEBASE=1/1000\n")
|
||||
chapter_format+=(f"START={entry['start_time']}\n")
|
||||
chapter_format+=(f"END={entry['end_time']}\n")
|
||||
chapter_format+=(f"title={entry['title']}\n\n")
|
||||
|
||||
return(chapter_format)
|
||||
47
src/bbcr1/ytdlp_helper.py
Normal file
47
src/bbcr1/ytdlp_helper.py
Normal file
@ -0,0 +1,47 @@
|
||||
import yt_dlp
|
||||
from helper import add_html_tags_to_description
|
||||
from typing import List, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class EpisodeData:
|
||||
chapters: List
|
||||
extracted_description: str
|
||||
extracted_id: str
|
||||
extracted_title: str
|
||||
extracted_timestamp: Optional[int]
|
||||
|
||||
|
||||
def return_episode_data(episode_url: str) -> EpisodeData:
|
||||
"""
|
||||
Quietly extracts meta information about a given radio show.
|
||||
|
||||
Args:
|
||||
episode_url (str): The URL of the episode.
|
||||
|
||||
Returns:
|
||||
EpisodeData: A dataclass containing episode metadata:
|
||||
- chapters (List): Chapters in JSON format.
|
||||
- extracted_description (str): HTML-wrapped description of the episode.
|
||||
- extracted_id (str): Unique episode ID.
|
||||
- extracted_title (str): Episode title.
|
||||
- extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available.
|
||||
|
||||
"""
|
||||
try:
|
||||
with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl:
|
||||
info_dict = ydl.extract_info(episode_url, download=False)
|
||||
except Exception as e:
|
||||
return {"error": f"Failed to extract info: {e}"}
|
||||
|
||||
return EpisodeData(
|
||||
chapters=info_dict.get("chapters", []),
|
||||
extracted_description=add_html_tags_to_description(info_dict.get("description", "")),
|
||||
extracted_id=info_dict.get("id", ""),
|
||||
extracted_title=info_dict.get("title", ""),
|
||||
extracted_timestamp=info_dict.get("timestamp"),
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt "))
|
||||
Loading…
x
Reference in New Issue
Block a user