Compare commits
1 Commits
main
...
feature/bb
| Author | SHA1 | Date | |
|---|---|---|---|
| 53f57878a1 |
@ -1,17 +0,0 @@
|
|||||||
name: Build image with python3,get-iplayer
|
|
||||||
on:
|
|
||||||
schedule:
|
|
||||||
- cron: '0 0 * * 4'
|
|
||||||
workflow_dispatch:
|
|
||||||
env:
|
|
||||||
DOCKER_IMAGE_NAME: bbcr1
|
|
||||||
jobs:
|
|
||||||
build_docker_images:
|
|
||||||
name: Build Docker Image
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Build
|
|
||||||
working-directory: ./src/petetong
|
|
||||||
run: docker build . --tag $DOCKER_IMAGE_NAME:latest
|
|
||||||
171
.gitignore
vendored
171
.gitignore
vendored
@ -1,171 +0,0 @@
|
|||||||
# ---> Python
|
|
||||||
# Byte-compiled / optimized / DLL files
|
|
||||||
__pycache__/
|
|
||||||
*.py[cod]
|
|
||||||
*$py.class
|
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Distribution / packaging
|
|
||||||
.Python
|
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
lib/
|
|
||||||
lib64/
|
|
||||||
parts/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
wheels/
|
|
||||||
share/python-wheels/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
MANIFEST
|
|
||||||
|
|
||||||
# PyInstaller
|
|
||||||
# Usually these files are written by a python script from a template
|
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
|
||||||
pip-log.txt
|
|
||||||
pip-delete-this-directory.txt
|
|
||||||
|
|
||||||
# Unit test / coverage reports
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.nox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*.cover
|
|
||||||
*.py,cover
|
|
||||||
.hypothesis/
|
|
||||||
.pytest_cache/
|
|
||||||
cover/
|
|
||||||
|
|
||||||
# Translations
|
|
||||||
*.mo
|
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
|
||||||
*.log
|
|
||||||
local_settings.py
|
|
||||||
db.sqlite3
|
|
||||||
db.sqlite3-journal
|
|
||||||
|
|
||||||
# Flask stuff:
|
|
||||||
instance/
|
|
||||||
.webassets-cache
|
|
||||||
|
|
||||||
# Scrapy stuff:
|
|
||||||
.scrapy
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
docs/_build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
.pybuilder/
|
|
||||||
target/
|
|
||||||
|
|
||||||
# Jupyter Notebook
|
|
||||||
.ipynb_checkpoints
|
|
||||||
|
|
||||||
# IPython
|
|
||||||
profile_default/
|
|
||||||
ipython_config.py
|
|
||||||
|
|
||||||
# pyenv
|
|
||||||
# For a library or package, you might want to ignore these files since the code is
|
|
||||||
# intended to run in multiple environments; otherwise, check them in:
|
|
||||||
# .python-version
|
|
||||||
|
|
||||||
# pipenv
|
|
||||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
||||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
||||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
||||||
# install all needed dependencies.
|
|
||||||
#Pipfile.lock
|
|
||||||
|
|
||||||
# UV
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
||||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
||||||
# commonly ignored for libraries.
|
|
||||||
#uv.lock
|
|
||||||
|
|
||||||
# poetry
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
||||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
||||||
# commonly ignored for libraries.
|
|
||||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
||||||
#poetry.lock
|
|
||||||
|
|
||||||
# pdm
|
|
||||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
||||||
#pdm.lock
|
|
||||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
|
||||||
# in version control.
|
|
||||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
|
||||||
.pdm.toml
|
|
||||||
.pdm-python
|
|
||||||
.pdm-build/
|
|
||||||
|
|
||||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
||||||
__pypackages__/
|
|
||||||
|
|
||||||
# Celery stuff
|
|
||||||
celerybeat-schedule
|
|
||||||
celerybeat.pid
|
|
||||||
|
|
||||||
# SageMath parsed files
|
|
||||||
*.sage.py
|
|
||||||
|
|
||||||
# Environments
|
|
||||||
.env
|
|
||||||
.venv
|
|
||||||
env/
|
|
||||||
venv/
|
|
||||||
ENV/
|
|
||||||
env.bak/
|
|
||||||
venv.bak/
|
|
||||||
|
|
||||||
# Spyder project settings
|
|
||||||
.spyderproject
|
|
||||||
.spyproject
|
|
||||||
|
|
||||||
# Rope project settings
|
|
||||||
.ropeproject
|
|
||||||
|
|
||||||
# mkdocs documentation
|
|
||||||
/site
|
|
||||||
|
|
||||||
# mypy
|
|
||||||
.mypy_cache/
|
|
||||||
.dmypy.json
|
|
||||||
dmypy.json
|
|
||||||
|
|
||||||
# Pyre type checker
|
|
||||||
.pyre/
|
|
||||||
|
|
||||||
# pytype static type analyzer
|
|
||||||
.pytype/
|
|
||||||
|
|
||||||
# Cython debug symbols
|
|
||||||
cython_debug/
|
|
||||||
|
|
||||||
# PyCharm
|
|
||||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
||||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
||||||
#.idea/
|
|
||||||
*.mp3
|
|
||||||
*.m4a
|
|
||||||
14
README.md
14
README.md
@ -1,13 +1,3 @@
|
|||||||
# Media Download Scripts
|
For now BBC sounds links are broken with `yt-dlp` so I am switching to `get_iplayer` and keeping this as a reference
|
||||||
|
|
||||||
A collection of Python scripts for automatically downloading and processing podcast episodes from various sources.
|
See: https://github.com/yt-dlp/yt-dlp/issues/14569
|
||||||
|
|
||||||
### [Perun YouTube Downloader](src/perun/)
|
|
||||||
Downloads the latest video from the Perun YouTube channel, converts to MP3 with metadata and sponsor segment removal, and uploads to a podcast server.
|
|
||||||
|
|
||||||
### [BBC Radio 1 Pete Tong Downloader](src/petetong/)
|
|
||||||
Downloads the latest Pete Tong radio show from BBC iPlayer Radio, converts to MP3 with metadata, and sends notifications.
|
|
||||||
|
|
||||||
## Setup
|
|
||||||
|
|
||||||
Each project has its own README with detailed installation and configuration instructions. Navigate to the respective directories for more information.
|
|
||||||
|
|||||||
BIN
src/bbcr1/__pycache__/helper.cpython-312.pyc
Normal file
BIN
src/bbcr1/__pycache__/helper.cpython-312.pyc
Normal file
Binary file not shown.
26
src/bbcr1/config.py
Normal file
26
src/bbcr1/config.py
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
settings = {
|
||||||
|
"Pete Tong":{
|
||||||
|
"artist": "Pete Tong",
|
||||||
|
"base_url":"https://www.bbc.co.uk/programmes/b006ww0v",
|
||||||
|
"cut_intro":True,
|
||||||
|
"modify_timestamp":7200,
|
||||||
|
"calculate_amount_of_fridays":True
|
||||||
|
},
|
||||||
|
"Radio 1s Classic Essential Mix":{
|
||||||
|
"artist":"Radio 1s Classic Essential Mix",
|
||||||
|
"use_different_release_date":True,
|
||||||
|
"base_url":"https://www.bbc.co.uk/programmes/b00f3pc4",
|
||||||
|
"cut_intro":True,
|
||||||
|
"remove_amount_of_characters_from_title":-5
|
||||||
|
},
|
||||||
|
"Defected on Radio 1 Dance":{
|
||||||
|
"artist": "Defected on Radio 1 Dance",
|
||||||
|
"base_url":"https://www.bbc.co.uk/programmes/m00287n1",
|
||||||
|
"remove_amount_of_characters_from_title":-10
|
||||||
|
},
|
||||||
|
"Radio 1s Essential Mix":{
|
||||||
|
"artist":"Radio 1s Essential Mix",
|
||||||
|
"base_url":"https://www.bbc.co.uk/programmes/b006wkfp",
|
||||||
|
"cut_intro":True
|
||||||
|
}
|
||||||
|
}
|
||||||
162
src/bbcr1/get_episode.py
Normal file
162
src/bbcr1/get_episode.py
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
import yt_dlp
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import sys
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from config import settings
|
||||||
|
from os import rename, remove
|
||||||
|
from helper import modify_chapters_for_ffmpeg, get_friday_number, return_url_of_latest_episode
|
||||||
|
import logging
|
||||||
|
from ytdlp_helper import return_episode_data
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_configurations(configuration_settings: dict, episode_data):
|
||||||
|
"""
|
||||||
|
Apply configuration settings to episode data.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (episode_data, filename_timestamp, track)
|
||||||
|
"""
|
||||||
|
if "remove_amount_of_characters_from_title" in configuration_settings:
|
||||||
|
amount_to_remove = configuration_settings["remove_amount_of_characters_from_title"]
|
||||||
|
episode_data.extracted_title = episode_data.extracted_title[:amount_to_remove]
|
||||||
|
|
||||||
|
if "modify_timestamp" in configuration_settings:
|
||||||
|
episode_data.extracted_timestamp -= configuration_settings["modify_timestamp"]
|
||||||
|
|
||||||
|
if "use_different_release_date" in configuration_settings:
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
filename_timestamp = sys.argv[2]
|
||||||
|
else:
|
||||||
|
logger.warning("Use_different_release_date set but missing sys.argv[2]. Falling back to default.")
|
||||||
|
filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||||
|
else:
|
||||||
|
filename_timestamp = datetime.fromtimestamp(episode_data.extracted_timestamp, tz=timezone.utc).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
if "calculate_amount_of_fridays" in configuration_settings:
|
||||||
|
track = get_friday_number(episode_data.extracted_timestamp)
|
||||||
|
else:
|
||||||
|
track = filename_timestamp
|
||||||
|
|
||||||
|
return episode_data, filename_timestamp, track
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_ffmpeg_chapters(episode_data, configuration_settings):
|
||||||
|
"""
|
||||||
|
Prepare chapters for FFmpeg if cutting intro is requested.
|
||||||
|
"""
|
||||||
|
if not episode_data.chapters or len(episode_data.chapters) < 2:
|
||||||
|
logger.warning("Cutting intro requested but no chapters found.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
return modify_chapters_for_ffmpeg(
|
||||||
|
episode_data.chapters[1:], episode_data.chapters[0]["end_time"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _download_audio(episode_url: str, episode_data, filename_timestamp: str, track: str, artist: str):
|
||||||
|
"""
|
||||||
|
Download episode audio using yt_dlp with metadata.
|
||||||
|
"""
|
||||||
|
ytdl_options = {
|
||||||
|
"quiet": True,
|
||||||
|
"noprogress": True,
|
||||||
|
"format": "bestaudio/best",
|
||||||
|
"extract_audio": True,
|
||||||
|
"audio_format": "mp3",
|
||||||
|
"outtmpl": f"{filename_timestamp}.%(ext)s",
|
||||||
|
"addmetadata": True,
|
||||||
|
"postprocessors": [
|
||||||
|
{
|
||||||
|
"key": "FFmpegExtractAudio",
|
||||||
|
"preferredcodec": "mp3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key": "FFmpegMetadata",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"postprocessor_args": [
|
||||||
|
"-metadata", f"title={episode_data.extracted_title}",
|
||||||
|
"-metadata", f"artist={artist}",
|
||||||
|
"-metadata", f"track={track}",
|
||||||
|
"-metadata", f"date={filename_timestamp}",
|
||||||
|
"-metadata", f"comment={episode_data.extracted_description}"
|
||||||
|
],
|
||||||
|
"merge_output_format": "mp3"
|
||||||
|
}
|
||||||
|
|
||||||
|
with yt_dlp.YoutubeDL(ytdl_options) as episode:
|
||||||
|
episode.download(episode_url)
|
||||||
|
|
||||||
|
|
||||||
|
def _cut_intro_with_ffmpeg(ffmpeg_chapters: str, episode_data, filename_timestamp: str, track: str, artist: str):
|
||||||
|
"""
|
||||||
|
Cut the intro from the episode using FFmpeg and apply metadata.
|
||||||
|
"""
|
||||||
|
logger.info("Fixing chapters and metadata with FFmpeg")
|
||||||
|
|
||||||
|
temp_metadata_path = None
|
||||||
|
try:
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False, mode="w", suffix=".txt") as temp_file:
|
||||||
|
temp_file.write(ffmpeg_chapters)
|
||||||
|
temp_metadata_path = temp_file.name
|
||||||
|
|
||||||
|
ffmpeg_command = [
|
||||||
|
"ffmpeg", "-ss", str(episode_data.chapters[0]["end_time"]),
|
||||||
|
"-hide_banner", "-loglevel", "error",
|
||||||
|
"-i", f"{filename_timestamp}.mp3",
|
||||||
|
"-i", temp_metadata_path,
|
||||||
|
"-map_metadata", "1",
|
||||||
|
"-metadata", f"title={episode_data.extracted_title}",
|
||||||
|
"-metadata", f"artist={artist}",
|
||||||
|
"-metadata", f"track={track}",
|
||||||
|
"-metadata", f"date={filename_timestamp}",
|
||||||
|
"-metadata", f"comment={episode_data.extracted_description}",
|
||||||
|
"-codec", "copy",
|
||||||
|
f"{filename_timestamp}-{episode_data.extracted_id}.mp3"
|
||||||
|
]
|
||||||
|
|
||||||
|
subprocess.run(ffmpeg_command, check=True)
|
||||||
|
remove(f"{filename_timestamp}.mp3")
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
logger.error(f"Error running FFmpeg: {e}")
|
||||||
|
finally:
|
||||||
|
if temp_metadata_path and remove:
|
||||||
|
try:
|
||||||
|
remove(temp_metadata_path)
|
||||||
|
except Exception as ex:
|
||||||
|
logger.warning(f"Could not remove temp metadata file: {ex}")
|
||||||
|
|
||||||
|
|
||||||
|
def download_episode(configuration_settings: dict, episode_url: str):
|
||||||
|
logger.info("Extracting metadata")
|
||||||
|
episode_data = return_episode_data(episode_url)
|
||||||
|
|
||||||
|
episode_data, filename_timestamp, track = _apply_configurations(configuration_settings, episode_data)
|
||||||
|
|
||||||
|
artist = configuration_settings.get("artist", sys.argv[1] if len(sys.argv) > 1 else "Unknown Artist")
|
||||||
|
|
||||||
|
ffmpeg_chapters = None
|
||||||
|
if configuration_settings.get("cut_intro"):
|
||||||
|
ffmpeg_chapters = _prepare_ffmpeg_chapters(episode_data, configuration_settings)
|
||||||
|
|
||||||
|
logger.info("Downloading episode")
|
||||||
|
_download_audio(episode_url, episode_data, filename_timestamp, track, artist)
|
||||||
|
|
||||||
|
if ffmpeg_chapters:
|
||||||
|
_cut_intro_with_ffmpeg(ffmpeg_chapters, episode_data, filename_timestamp, track, artist)
|
||||||
|
else:
|
||||||
|
rename(f"{filename_timestamp}.mp3", f"{filename_timestamp}-{episode_data.extracted_id}.mp3")
|
||||||
|
|
||||||
|
logger.info("Finished")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
show_name = sys.argv[1]
|
||||||
|
logger.info (f"Processing {show_name}")
|
||||||
|
episode_url = return_url_of_latest_episode(settings[show_name]["base_url"])
|
||||||
|
download_episode(settings[show_name],episode_url)
|
||||||
52
src/bbcr1/helper.py
Normal file
52
src/bbcr1/helper.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
|
||||||
|
def time_to_milliseconds(time,length_to_cut) -> int:
|
||||||
|
return int(time * 1000 - length_to_cut * 1000)
|
||||||
|
|
||||||
|
def add_html_tags_to_description(input_text) -> str:
|
||||||
|
return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
|
||||||
|
|
||||||
|
|
||||||
|
def get_friday_number(extracted_timestamp) -> int:
|
||||||
|
dt = datetime.fromtimestamp(extracted_timestamp)
|
||||||
|
start_of_year = datetime(dt.year, 1, 1)
|
||||||
|
days_until_first_friday = (4 - start_of_year.weekday()) % 7
|
||||||
|
first_friday = start_of_year + timedelta(days=days_until_first_friday)
|
||||||
|
fridays_passed = (dt - first_friday).days // 7 + 1
|
||||||
|
return fridays_passed
|
||||||
|
|
||||||
|
def return_url_of_latest_episode(base_url:str) -> str:
|
||||||
|
result = subprocess.run(["get_iplayer","--pid-recursive-list",base_url], capture_output=True, text=True)
|
||||||
|
latest_episode_id = result.stdout.split("\n")[-3].split(",")[-1][1:]
|
||||||
|
return (f"https://www.bbc.co.uk/sounds/play/{latest_episode_id}")
|
||||||
|
|
||||||
|
def modify_chapters_for_ffmpeg(chapters: List[Dict], length_to_cut: float) -> str:
|
||||||
|
"""
|
||||||
|
Converts chapter times to ffmpeg-compatible metadata format, adjusting by length_to_cut.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chapters (list): List of chapter dicts with "start_time", "end_time", and "title".
|
||||||
|
length_to_cut (int/float): Amount of time to cut from start, in seconds.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Chapters formatted as ffmpeg metadata.
|
||||||
|
"""
|
||||||
|
for entry in chapters:
|
||||||
|
if "start_time" in entry:
|
||||||
|
entry["start_time"]=time_to_milliseconds(entry["start_time"],length_to_cut)
|
||||||
|
if "end_time" in entry:
|
||||||
|
entry["end_time"]=time_to_milliseconds(entry["end_time"],length_to_cut)
|
||||||
|
|
||||||
|
chapter_format = ";FFMETADATA1\n"
|
||||||
|
for entry in chapters:
|
||||||
|
chapter_format+=("[CHAPTER]\n")
|
||||||
|
chapter_format+=("TIMEBASE=1/1000\n")
|
||||||
|
chapter_format+=(f"START={entry['start_time']}\n")
|
||||||
|
chapter_format+=(f"END={entry['end_time']}\n")
|
||||||
|
chapter_format+=(f"title={entry['title']}\n\n")
|
||||||
|
|
||||||
|
return(chapter_format)
|
||||||
47
src/bbcr1/ytdlp_helper.py
Normal file
47
src/bbcr1/ytdlp_helper.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
import yt_dlp
|
||||||
|
from helper import add_html_tags_to_description
|
||||||
|
from typing import List, Optional
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class EpisodeData:
|
||||||
|
chapters: List
|
||||||
|
extracted_description: str
|
||||||
|
extracted_id: str
|
||||||
|
extracted_title: str
|
||||||
|
extracted_timestamp: Optional[int]
|
||||||
|
|
||||||
|
|
||||||
|
def return_episode_data(episode_url: str) -> EpisodeData:
|
||||||
|
"""
|
||||||
|
Quietly extracts meta information about a given radio show.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
episode_url (str): The URL of the episode.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
EpisodeData: A dataclass containing episode metadata:
|
||||||
|
- chapters (List): Chapters in JSON format.
|
||||||
|
- extracted_description (str): HTML-wrapped description of the episode.
|
||||||
|
- extracted_id (str): Unique episode ID.
|
||||||
|
- extracted_title (str): Episode title.
|
||||||
|
- extracted_timestamp (Optional[int]): Airing timestamp (epoch seconds), if available.
|
||||||
|
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as ydl:
|
||||||
|
info_dict = ydl.extract_info(episode_url, download=False)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": f"Failed to extract info: {e}"}
|
||||||
|
|
||||||
|
return EpisodeData(
|
||||||
|
chapters=info_dict.get("chapters", []),
|
||||||
|
extracted_description=add_html_tags_to_description(info_dict.get("description", "")),
|
||||||
|
extracted_id=info_dict.get("id", ""),
|
||||||
|
extracted_title=info_dict.get("title", ""),
|
||||||
|
extracted_timestamp=info_dict.get("timestamp"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(return_episode_data("https://www.bbc.co.uk/sounds/play/m002jtcqyt "))
|
||||||
@ -1,75 +1,18 @@
|
|||||||
# Perun YouTube Podcast Downloader
|
# Perun
|
||||||
|
|
||||||
A Python script that automatically downloads the latest video from the Perun YouTube channel, converts it to MP3 with metadata, removes sponsor segments, and uploads it to a podcast server.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- **Automatic Detection**: Checks for new episodes by comparing with Audiobookshelf library
|
|
||||||
- **Audio Conversion**: Downloads and converts YouTube videos to MP3 format
|
|
||||||
- **Sponsor Removal**: Uses SponsorBlock API to remove sponsored segments
|
|
||||||
- **Metadata Injection**: Adds title, artist, track number, date, and description to MP3 files
|
|
||||||
- **SFTP Upload**: Automatically uploads to remote podcast server
|
|
||||||
- **Push Notifications**: Sends notification when new episode is available
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
- Python 3.8+
|
|
||||||
- yt-dlp
|
|
||||||
- ffmpeg (for audio conversion)
|
|
||||||
- SSH key-based authentication configured
|
|
||||||
- Audiobookshelf server with API access
|
|
||||||
|
|
||||||
|
|
||||||
## Installation
|
Youtube blocks a lot of server IPs so running this locally is just easier, expects the following environment variables in a .env file:
|
||||||
|
|
||||||
```bash
|
REMOTE_HOSTNAME
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
Install ffmpeg:
|
|
||||||
```bash
|
|
||||||
# Ubuntu/Debian
|
|
||||||
sudo apt install ffmpeg
|
|
||||||
|
|
||||||
# macOS
|
REMOTE_PATH
|
||||||
brew install ffmpeg
|
|
||||||
```
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
Run the script manually:
|
BACKEND_API_URL
|
||||||
```bash
|
|
||||||
python get_episode.py
|
|
||||||
```
|
|
||||||
|
|
||||||
Or schedule with cron and use the provided `grabEpisode.sh` (Monday at 7 AM):
|
BACKEND_API_KEY
|
||||||
```bash
|
|
||||||
0 7 * * 1 /path/to/script/grabEpisode.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
Youtube blocks a lot of server IPs so running this locally is just easier.
|
YOUTUBE_CHANNEL_URL
|
||||||
|
|
||||||
## Configuration
|
PODCAST_AUTHORIZATION_TOKEN
|
||||||
|
|
||||||
Create a `.env` file with the following variables:
|
PODCAST_API_URL
|
||||||
|
|
||||||
```env
|
|
||||||
# YouTube channel to monitor
|
|
||||||
YOUTUBE_CHANNEL_URL=https://www.youtube.com/@PerunAU/videos
|
|
||||||
|
|
||||||
# Audiobookshelf API
|
|
||||||
PODCAST_API_URL=https://your-audiobookshelf.com/api/items/{item-id}
|
|
||||||
PODCAST_AUTHORIZATION_TOKEN=your_token_here
|
|
||||||
|
|
||||||
# SFTP upload destination
|
|
||||||
REMOTE_HOSTNAME=your_ssh_host_alias
|
|
||||||
REMOTE_PATH=/path/to/podcast/folder
|
|
||||||
|
|
||||||
# Backend notification service
|
|
||||||
BACKEND_API_URL=http://backend:8101/internal/receive-notifications
|
|
||||||
BACKEND_API_KEY=your_api_key
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output
|
|
||||||
|
|
||||||
MP3 files are named: `perun-YYYY-MM-DD.mp3`
|
|
||||||
|
|
||||||
Example: `perun-2025-10-19.mp3`
|
|
||||||
@ -1,190 +1,66 @@
|
|||||||
import requests
|
import requests
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
import os
|
import os
|
||||||
import time
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
from helper import log_message
|
||||||
from ssh_helper import upload_via_sftp, send_notification_via_ssh
|
from ssh_helper import upload_via_sftp, send_notification_via_ssh
|
||||||
from youtube_handler import return_download_options, check_for_sponsorblock_segments
|
from youtube_handler import get_url_for_latest_video, get_youtube_data, return_download_options
|
||||||
from simple_logger_handler import setup_logger
|
|
||||||
from rss_feed_handler import grab_latest_chapter_information, EpisodeData
|
|
||||||
logger = setup_logger(__name__)
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
PODCAST_AUTHORIZATION_TOKEN = os.getenv("PODCAST_AUTHORIZATION_TOKEN")
|
PODCAST_AUTHORIZATION_TOKEN = os.getenv("PODCAST_AUTHORIZATION_TOKEN")
|
||||||
PODCAST_API_URL = os.getenv("PODCAST_API_URL")
|
PODCAST_API_URL = os.getenv("PODCAST_API_URL")
|
||||||
|
|
||||||
|
|
||||||
def get_audiobookshelf_data() -> tuple[str | None, str | None]:
|
def get_audiobookshelf_data()->tuple[int | None, str | None]:
|
||||||
"""
|
|
||||||
Fetches the latest episode data from the Audiobookshelf API.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
tuple[int | None, str | None]:
|
|
||||||
- The track number as a string (or None if data could not be fetched due to retryable errors).
|
|
||||||
- The YouTube episode id as a string (or None if data could not be fetched due to retryable errors).
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
requests.exceptions.HTTPError:
|
|
||||||
If a non-retryable HTTP error occurs (e.g., 401 Unauthorized, 403 Forbidden, 404 Not Found).
|
|
||||||
|
|
||||||
Notes:
|
|
||||||
- Connection errors, timeouts, and server-side HTTP errors (500, 502, 503, 504) are caught and logged.
|
|
||||||
In these cases, the function returns (None, None) so the caller can retry later.
|
|
||||||
"""
|
|
||||||
|
|
||||||
headers = {"Authorization": f"Bearer {PODCAST_AUTHORIZATION_TOKEN}"}
|
headers = {"Authorization": f"Bearer {PODCAST_AUTHORIZATION_TOKEN}"}
|
||||||
logger.debug("[Audiobookshelf] Fetching Audiobookshelf data")
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(PODCAST_API_URL, headers=headers)
|
response = requests.get(PODCAST_API_URL, headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
audiobookshelf_track = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTrack"]
|
audiobookshelf_track = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTrack"]
|
||||||
audiobookshelf_ytid = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagDescription"]
|
audiobookshelf_title = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTitle"]
|
||||||
|
return audiobookshelf_track, audiobookshelf_title
|
||||||
|
|
||||||
logger.debug(f"[Audiobookshelf] Fetched Audiobookshelf data: track={audiobookshelf_track}, ytid={audiobookshelf_ytid}")
|
except requests.RequestException as e:
|
||||||
return (audiobookshelf_track, audiobookshelf_ytid)
|
log_message(f"Failed to fetch data: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
except requests.exceptions.ConnectionError as e:
|
|
||||||
logger.warning(f"[Audiobookshelf] Connection error, will retry: {e}")
|
|
||||||
return (None, None)
|
|
||||||
except requests.exceptions.Timeout as e:
|
|
||||||
logger.warning(f"[Audiobookshelf] Request timed out, will retry: {e}")
|
|
||||||
return (None, None)
|
|
||||||
except requests.exceptions.HTTPError as e:
|
|
||||||
status = e.response.status_code
|
|
||||||
if status in {500, 502, 503, 504}:
|
|
||||||
logger.warning(f"[Audiobookshelf] Server error {status}, will retry: {e}")
|
|
||||||
return (None, None)
|
|
||||||
else:
|
|
||||||
logger.error(f"[Audiobookshelf] HTTP error {status}, not retrying: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def check_until_new_episode_gets_released() -> tuple[EpisodeData | None, str | None]:
|
def download_episode():
|
||||||
"""
|
log_message("Starting Perun")
|
||||||
Polls YouTube every hour for a new episode and compares it to the available episode on Audiobookshelf.
|
|
||||||
Stops after 72 hours.
|
|
||||||
|
|
||||||
Returns:
|
audiobookshelf_track, audiobookshelf_title = get_audiobookshelf_data()
|
||||||
tuple[EpisodeData | None, str | None]:
|
if audiobookshelf_track is None or audiobookshelf_title is None:
|
||||||
- EpisodeData with information about the date,description,link,title and YouTube ID
|
log_message("Unable to fetch Audiobookshelf data. Exiting.")
|
||||||
- Track number from Audiobookshelf
|
return
|
||||||
Returns (None, None) if no new episode found within timeout
|
|
||||||
"""
|
|
||||||
CHECK_INTERVAL_HOURS = 1
|
|
||||||
MAX_HOURS = 72
|
|
||||||
for attempt in range(1, MAX_HOURS + 1):
|
|
||||||
logger.debug(f"[EpisodeCheck] Waiting for a new episode to be released, attempt: {attempt}/{MAX_HOURS}")
|
|
||||||
audiobookshelf_track, audiobookshelf_ytid = get_audiobookshelf_data()
|
|
||||||
|
|
||||||
if audiobookshelf_track is None or audiobookshelf_ytid is None:
|
episode_url = get_url_for_latest_video()
|
||||||
logger.warning("[EpisodeCheck] Unable to fetch Audiobookshelf data, retrying in 1 hour.")
|
episode_info = get_youtube_data(episode_url)
|
||||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
log_message(f"Latest episode: {episode_info['title']}")
|
||||||
continue
|
|
||||||
|
|
||||||
|
if audiobookshelf_title != episode_info["title"]:
|
||||||
|
log_message("New Episode found")
|
||||||
|
|
||||||
|
track = str(int(audiobookshelf_track) + 1).zfill(4)
|
||||||
|
options = return_download_options(episode_info,track)
|
||||||
|
|
||||||
|
log_message("Downloading episode")
|
||||||
try:
|
try:
|
||||||
episode_data = grab_latest_chapter_information("UCC3ehuUksTyQ7bbjGntmx3Q")
|
with yt_dlp.YoutubeDL(options) as episode:
|
||||||
|
episode.download(episode_url)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"[EpisodeCheck] Failed to fetch latest video data: {e}, retrying in 1 hour.")
|
log_message(f"Failed to download episode: {e}")
|
||||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if episode_data is None:
|
|
||||||
logger.warning("[EpisodeCheck] Unable to fetch latest video data, retrying in 1 hour.")
|
|
||||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if audiobookshelf_ytid != episode_data.episode_ytid:
|
|
||||||
logger.info(f"[EpisodeCheck] Latest YouTube episode: {episode_data.episode_title}")
|
|
||||||
return episode_data, audiobookshelf_track
|
|
||||||
|
|
||||||
logger.info("[EpisodeCheck] No new episode found, retrying in 1 hour.")
|
|
||||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
|
||||||
|
|
||||||
logger.warning("[EpisodeCheck] No new episode found after maximum attempts.")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def wait_for_sponsorblock_segments_to_be_added(episode_link) -> bool:
|
|
||||||
"""
|
|
||||||
Polls SponsorBlock for segments on the current video until found or until max attempts.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
episode_link: YouTube video URL to check for SponsorBlock segments
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if segments found, False otherwise
|
|
||||||
"""
|
|
||||||
CHECK_INTERVAL_HOURS = 1
|
|
||||||
MAX_HOURS = 24
|
|
||||||
for attempt in range(1, MAX_HOURS + 1):
|
|
||||||
logger.debug(f"[SponsorBlock] Waiting for SponsorBlock to be added, attempt: {attempt}/{MAX_HOURS} ")
|
|
||||||
segments = check_for_sponsorblock_segments(episode_link)
|
|
||||||
|
|
||||||
if segments:
|
|
||||||
logger.debug("[SponsorBlock] Segments found, exiting loop.")
|
|
||||||
return True
|
|
||||||
|
|
||||||
logger.debug("[SponsorBlock] No SponsorBlock segments found yet, retrying in 1 hour.")
|
|
||||||
time.sleep(CHECK_INTERVAL_HOURS * 3600)
|
|
||||||
|
|
||||||
logger.warning("[SponsorBlock] Segments not found after maximum attempts.")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def download_episode() -> None:
|
|
||||||
"""
|
|
||||||
Main workflow: Check for new episode, download it, upload via SFTP, and send notification.
|
|
||||||
"""
|
|
||||||
logger.info("[App] Starting Perun")
|
|
||||||
|
|
||||||
try:
|
|
||||||
episode_data,audiobookshelf_track = check_until_new_episode_gets_released()
|
|
||||||
|
|
||||||
if episode_data is None or audiobookshelf_track is None:
|
|
||||||
logger.error("[App] Failed to find new episode within timeout period")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.info("[App] New episode found")
|
log_message("Uploading episode")
|
||||||
except Exception as e:
|
upload_via_sftp(f"perun-{episode_info['date']}.mp3")
|
||||||
logger.error(f"[App] Failed to fetch new episode info: {e}", exc_info=True)
|
log_message("Finished uploading, sending notification")
|
||||||
return
|
send_notification_via_ssh(f"Perun episode {track} has been released",episode_info["title"])
|
||||||
|
log_message("Finished")
|
||||||
try:
|
else:
|
||||||
if "sponsored" in episode_data.episode_description.lower():
|
log_message("No new episode found, exiting...")
|
||||||
logger.debug("[App] Sponsored segments found in description, waiting for SponsorBlock")
|
|
||||||
wait_for_sponsorblock_segments_to_be_added(episode_data.episode_link)
|
|
||||||
else:
|
|
||||||
logger.debug("[App] No sponsored segments found in description")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"[App] Failed during SponsorBlock wait: {e}", exc_info=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
episode_data.episode_number = str(int(audiobookshelf_track) + 1).zfill(4)
|
|
||||||
except (ValueError,TypeError) as e:
|
|
||||||
logger.warning(f"[App] Failed incrementing audiobookshelf track: {e}", exc_info=True)
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
options = return_download_options(episode_data)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[App] Failed to generate download options: {e}", exc_info=True)
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info("[App] Downloading new episode")
|
|
||||||
try:
|
|
||||||
with yt_dlp.YoutubeDL(options) as episode:
|
|
||||||
episode.download(episode_data.episode_link)
|
|
||||||
logger.debug("[App] Download completed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[App] Failed to download episode: {e}", exc_info=True)
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.info("[App] Uploading episode via SFTP")
|
|
||||||
upload_via_sftp(f"perun-{episode_data.episode_date}.mp3")
|
|
||||||
|
|
||||||
logger.info("[App] Sending release notification")
|
|
||||||
send_notification_via_ssh(f"Perun episode {episode_data.episode_number} has been released",episode_data.episode_title)
|
|
||||||
logger.info("[App] Workflow complete")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
download_episode()
|
download_episode()
|
||||||
|
|||||||
1
src/perun/grabEpisode.sh
Executable file → Normal file
1
src/perun/grabEpisode.sh
Executable file → Normal file
@ -9,6 +9,7 @@ fi
|
|||||||
|
|
||||||
source .venv/bin/activate
|
source .venv/bin/activate
|
||||||
|
|
||||||
|
pip install --upgrade pip
|
||||||
pip install --upgrade yt-dlp[default]
|
pip install --upgrade yt-dlp[default]
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,15 @@
|
|||||||
import re
|
import re
|
||||||
|
import datetime
|
||||||
|
|
||||||
def return_string_as_html(input_text):
|
def return_string_as_html(input_text):
|
||||||
string_without_ads=""
|
string_without_ads=""
|
||||||
for line in input_text.splitlines():
|
for line in input_text.splitlines():
|
||||||
line = re.sub(r'(https?://[^\s]+)', r'<a href="\1">\1</a>', line)
|
line = re.sub(r'(https?://[^\s]+)', r'<a href="\1">\1</a>', line)
|
||||||
if "Sponsored" not in line:
|
if not "Sponsored" in line:
|
||||||
string_without_ads+=line+"\n"
|
string_without_ads+=line+"\n"
|
||||||
return("<p>"+string_without_ads.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
|
return("<p>"+string_without_ads.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
|
||||||
|
|
||||||
|
def log_message(message):
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
print(f"[{timestamp}] {message}")
|
||||||
|
return(f"[{timestamp}] {message}\n")
|
||||||
@ -1,4 +1,3 @@
|
|||||||
--extra-index-url https://git.gansejunge.com/api/packages/notifier/pypi/simple/
|
|
||||||
bcrypt==5.0.0
|
bcrypt==5.0.0
|
||||||
Brotli==1.1.0
|
Brotli==1.1.0
|
||||||
certifi==2025.10.5
|
certifi==2025.10.5
|
||||||
@ -6,7 +5,6 @@ cffi==2.0.0
|
|||||||
charset-normalizer==3.4.3
|
charset-normalizer==3.4.3
|
||||||
cryptography==46.0.2
|
cryptography==46.0.2
|
||||||
dotenv==0.9.9
|
dotenv==0.9.9
|
||||||
feedparser==6.0.12
|
|
||||||
idna==3.10
|
idna==3.10
|
||||||
invoke==2.2.0
|
invoke==2.2.0
|
||||||
mutagen==1.47.0
|
mutagen==1.47.0
|
||||||
@ -16,8 +14,5 @@ pycryptodomex==3.23.0
|
|||||||
PyNaCl==1.6.0
|
PyNaCl==1.6.0
|
||||||
python-dotenv==1.1.1
|
python-dotenv==1.1.1
|
||||||
requests==2.32.5
|
requests==2.32.5
|
||||||
sgmllib3k==1.0.0
|
|
||||||
simple-logger-handler==0.1.0
|
|
||||||
sponsorblock.py==0.2.3
|
|
||||||
urllib3==2.5.0
|
urllib3==2.5.0
|
||||||
websockets==15.0.1
|
websockets==15.0.1
|
||||||
|
|||||||
@ -1,85 +0,0 @@
|
|||||||
import feedparser
|
|
||||||
from simple_logger_handler import setup_logger
|
|
||||||
import time
|
|
||||||
from urllib.error import URLError
|
|
||||||
from typing import Optional
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class EpisodeData:
|
|
||||||
episode_date: str
|
|
||||||
episode_description: str
|
|
||||||
episode_link: str
|
|
||||||
episode_number: str
|
|
||||||
episode_title: str
|
|
||||||
episode_ytid: str
|
|
||||||
|
|
||||||
logger = setup_logger(__name__)
|
|
||||||
|
|
||||||
def grab_latest_chapter_information(id: str, max_retries: int = 3) -> Optional[EpisodeData]:
|
|
||||||
"""
|
|
||||||
Fetches the latest episodes information from a Youtube RSS feed, with retries on network-related errors.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
id: Youtube channel ID as a string.
|
|
||||||
max_retries: Number of retry attempts if fetching the feed fails due to network issues.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
EpisodeData: A dataclass containing episode metadata:
|
|
||||||
episode_date: Date when it was published in iso format (2025-11-30).
|
|
||||||
episode_description: Episode description.
|
|
||||||
episode_link: YouTube link.
|
|
||||||
episode_number: Episode number.
|
|
||||||
episode_title: Episode title.
|
|
||||||
episode_ytid: Episode YouTube ID .
|
|
||||||
Returns None if the feed has no entries or all retries are exhausted.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
ValueError: If the feed has no entries.
|
|
||||||
Other network-related exceptions: If fetching fails after retries.
|
|
||||||
"""
|
|
||||||
|
|
||||||
rss_feed_url = f"https://www.youtube.com/feeds/videos.xml?channel_id={id}"
|
|
||||||
attempt = 1
|
|
||||||
|
|
||||||
while attempt <= max_retries:
|
|
||||||
logger.debug(f"[Feed] Parsing feed URL: {rss_feed_url} (attempt {attempt}/{max_retries})")
|
|
||||||
try:
|
|
||||||
feed = feedparser.parse(rss_feed_url)
|
|
||||||
|
|
||||||
if not feed.entries:
|
|
||||||
logger.warning(f"[Feed] No entries found for feed {id}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
latest_chapter_data = feed["entries"][0]
|
|
||||||
episode_link = latest_chapter_data["link"]
|
|
||||||
episode_title = latest_chapter_data["title"]
|
|
||||||
episode_description = latest_chapter_data["summary"]
|
|
||||||
episode_date = latest_chapter_data["published"]
|
|
||||||
episode_date = datetime.fromisoformat(episode_date).date().isoformat()
|
|
||||||
episode_ytid = latest_chapter_data["yt_videoid"]
|
|
||||||
|
|
||||||
logger.info(f"[Feed] Latest episode '{episode_title}': {episode_link}")
|
|
||||||
logger.debug(f"[Feed] Latest episode '{episode_title}' (YouTubeId {episode_ytid}): {episode_link} -> {episode_description}")
|
|
||||||
return EpisodeData(
|
|
||||||
episode_date=episode_date,
|
|
||||||
episode_description=episode_description,
|
|
||||||
episode_link=episode_link,
|
|
||||||
episode_number="",
|
|
||||||
episode_title=episode_title,
|
|
||||||
episode_ytid=episode_ytid
|
|
||||||
)
|
|
||||||
|
|
||||||
except (URLError, OSError) as e:
|
|
||||||
logger.warning(f"[Feed] Network error on attempt {attempt} for feed {id}: {e}")
|
|
||||||
if attempt == max_retries:
|
|
||||||
logger.error(f"[Feed] All {max_retries} attempts failed for feed {id}")
|
|
||||||
return None
|
|
||||||
backoff = 2 ** (attempt - 1)
|
|
||||||
logger.debug(f"[Feed] Retrying in {backoff} seconds...")
|
|
||||||
time.sleep(backoff)
|
|
||||||
attempt += 1
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print(grab_latest_chapter_information("UCC3ehuUksTyQ7bbjGntmx3Q"))
|
|
||||||
@ -2,42 +2,18 @@ import paramiko
|
|||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from json import dumps
|
from json import dumps
|
||||||
from simple_logger_handler import setup_logger
|
|
||||||
import time
|
|
||||||
import shlex
|
|
||||||
|
|
||||||
logger = setup_logger(__name__)
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
REMOTE_HOSTNAME = os.getenv("REMOTE_HOSTNAME")
|
REMOTE_HOSTNAME = os.getenv("REMOTE_HOSTNAME")
|
||||||
REMOTE_PATH = os.getenv("REMOTE_PATH")
|
REMOTE_PATH = os.getenv("REMOTE_PATH")
|
||||||
BACKEND_API_URL = os.getenv("BACKEND_API_URL")
|
BACKEND_API_URL = os.getenv("BACKEND_API_URL")
|
||||||
BACKEND_API_KEY= os.getenv("BACKEND_API_KEY")
|
BACKEND_API_KEY= os.getenv("BACKEND_API_KEY")
|
||||||
|
|
||||||
def load_ssh_config(host_alias:str) -> tuple[str, int, str, str]:
|
def load_ssh_config(host_alias):
|
||||||
"""
|
|
||||||
Load SSH connection details from ~/.ssh/config for the given alias.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
host_alias: The SSH host alias to look up
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (hostname, port, username, keyfile)
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
FileNotFoundError: If SSH config file doesn't exist
|
|
||||||
ValueError: If SSH configuration is incomplete
|
|
||||||
"""
|
|
||||||
logger.debug(f"[SSH] Loading SSH configuration for host alias '{host_alias}'")
|
|
||||||
ssh_config = paramiko.SSHConfig()
|
ssh_config = paramiko.SSHConfig()
|
||||||
config_path = os.path.expanduser("~/.ssh/config")
|
config_path = os.path.expanduser("~/.ssh/config")
|
||||||
|
with open(config_path) as f:
|
||||||
try:
|
ssh_config.parse(f)
|
||||||
with open(config_path) as f:
|
|
||||||
ssh_config.parse(f)
|
|
||||||
except FileNotFoundError:
|
|
||||||
logger.error(f"[SSH] SSH config file not found at {config_path}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
host_config = ssh_config.lookup(host_alias)
|
host_config = ssh_config.lookup(host_alias)
|
||||||
hostname = host_config.get("hostname")
|
hostname = host_config.get("hostname")
|
||||||
port = int(host_config.get("port", 22))
|
port = int(host_config.get("port", 22))
|
||||||
@ -45,136 +21,57 @@ def load_ssh_config(host_alias:str) -> tuple[str, int, str, str]:
|
|||||||
keyfile = host_config.get("identityfile", [None])[0]
|
keyfile = host_config.get("identityfile", [None])[0]
|
||||||
|
|
||||||
if not all([hostname, username, keyfile]):
|
if not all([hostname, username, keyfile]):
|
||||||
logger.error(f"[SSH] Incomplete SSH configuration for alias '{host_alias}'")
|
raise ValueError(f"Missing SSH configuration for {host_alias}.")
|
||||||
raise ValueError(f"[SSH] Missing SSH configuration for {host_alias}.")
|
|
||||||
|
|
||||||
logger.debug(f"[SSH] SSH config loaded: host={hostname}, port={port}, user={username}, key={keyfile}")
|
|
||||||
return hostname, port, username, keyfile
|
return hostname, port, username, keyfile
|
||||||
|
|
||||||
|
|
||||||
def create_ssh_client(hostname: str, port: int, username: str, keyfile: str)-> paramiko.SSHClient:
|
def create_ssh_client(hostname, port, username, keyfile):
|
||||||
"""
|
ssh = paramiko.SSHClient()
|
||||||
Create and return a connected Paramiko SSHClient instance.
|
ssh.load_host_keys(os.path.expanduser("~/.ssh/known_hosts"))
|
||||||
|
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
||||||
Args:
|
pkey = paramiko.RSAKey.from_private_key_file(keyfile)
|
||||||
hostname: Remote hostname
|
ssh.connect(hostname=hostname, username=username, port=port, pkey=pkey)
|
||||||
port: SSH port
|
return ssh
|
||||||
username: SSH username
|
|
||||||
keyfile: Path to SSH private key file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Connected SSHClient instance (caller must close it)
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
Exception: If SSH connection fails
|
|
||||||
"""
|
|
||||||
logger.debug("[SSH] Creating SSH client")
|
|
||||||
try:
|
|
||||||
ssh = paramiko.SSHClient()
|
|
||||||
ssh.load_host_keys(os.path.expanduser("~/.ssh/known_hosts"))
|
|
||||||
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
|
|
||||||
|
|
||||||
pkey = paramiko.RSAKey.from_private_key_file(keyfile)
|
|
||||||
ssh.connect(hostname=hostname, username=username, port=port, pkey=pkey)
|
|
||||||
logger.debug("[SSH] SSH connection established successfully")
|
|
||||||
return ssh
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[SSH] SSH connection failed: {e}", exc_info=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def upload_via_sftp(filename) -> None:
|
def upload_via_sftp(filename):
|
||||||
"""
|
hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME)
|
||||||
Upload a file to the remote host via SFTP using SSH credentials.
|
|
||||||
|
|
||||||
Args:
|
transport = paramiko.Transport((hostname, port))
|
||||||
filename: Local file path to upload
|
pkey = paramiko.RSAKey.from_private_key_file(keyfile)
|
||||||
|
transport.connect(username=username, pkey=pkey)
|
||||||
|
sftp = paramiko.SFTPClient.from_transport(transport)
|
||||||
|
|
||||||
Raises:
|
remote_file = os.path.join(REMOTE_PATH, os.path.basename(filename))
|
||||||
Exception: If upload fails
|
sftp.put(filename, remote_file)
|
||||||
"""
|
|
||||||
logger.info(f"[SFTP] Preparing to upload file '{filename}' via SFTP")
|
|
||||||
try:
|
|
||||||
hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME)
|
|
||||||
logger.debug(f"[SFTP] Connecting to {hostname}:{port} for file upload")
|
|
||||||
|
|
||||||
transport = paramiko.Transport((hostname, port))
|
sftp.close()
|
||||||
pkey = paramiko.RSAKey.from_private_key_file(keyfile)
|
transport.close()
|
||||||
transport.connect(username=username, pkey=pkey)
|
|
||||||
sftp = paramiko.SFTPClient.from_transport(transport)
|
|
||||||
|
|
||||||
remote_file = os.path.join(REMOTE_PATH, os.path.basename(filename))
|
|
||||||
logger.info(f"[SFTP] Uploading to remote path: {remote_file}")
|
|
||||||
sftp.put(filename, remote_file)
|
|
||||||
|
|
||||||
sftp.close()
|
|
||||||
transport.close()
|
|
||||||
logger.info(f"[SFTP] File '{filename}' uploaded successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[SFTP] SFTP upload failed for '{filename}': {e}", exc_info=True)
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def send_notification_via_ssh(notification_title, notification_info) -> None:
|
def send_notification_via_ssh(notification_title, notification_info):
|
||||||
"""
|
hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME)
|
||||||
Send a JSON-formatted notification payload via SSH to the backend.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
notification_title: Title of the notification
|
|
||||||
notification_info: Body/content of the notification
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
Exception: If notification sending fails
|
|
||||||
"""
|
|
||||||
logger.info(f"[Notification] Sending SSH notification: {notification_title}")
|
|
||||||
ssh = None
|
|
||||||
try:
|
|
||||||
hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME)
|
|
||||||
ssh = create_ssh_client(hostname, port, username, keyfile)
|
|
||||||
|
|
||||||
|
with create_ssh_client(hostname, port, username, keyfile) as ssh:
|
||||||
data = {
|
data = {
|
||||||
"receipent_user_id": 1,
|
"receipent_user_id": 1,
|
||||||
"message": {
|
"message": {
|
||||||
"title": notification_title,
|
"title": notification_title,
|
||||||
"body": notification_info,
|
"info": notification_info,
|
||||||
"category": "podcasts",
|
"category": "mixtapes"
|
||||||
"timestamp": int(time.time())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
json_payload = dumps(data)
|
json_payload = dumps(data)
|
||||||
logger.debug(f"[Notification] Notification payload: {json_payload}")
|
|
||||||
|
|
||||||
escaped_payload = shlex.quote(json_payload)
|
|
||||||
escaped_url = shlex.quote(BACKEND_API_URL)
|
|
||||||
|
|
||||||
|
# Command reads API key and JSON from stdin
|
||||||
notification_cmd = (
|
notification_cmd = (
|
||||||
f"API_KEY=$(head -n1) && "
|
f"curl -s -X POST '{BACKEND_API_URL}' "
|
||||||
f"curl -s -X POST {escaped_url} "
|
|
||||||
f"-H 'Content-Type: application/json' "
|
f"-H 'Content-Type: application/json' "
|
||||||
f"-H \"X-API-Key-Internal: $API_KEY\" "
|
f"-H 'X-API-Key-Internal: $(head -n1)' "
|
||||||
f"-d {escaped_payload}"
|
f"-d @-"
|
||||||
)
|
)
|
||||||
|
|
||||||
stdin, stdout, stderr = ssh.exec_command(notification_cmd)
|
stdin, stdout, stderr = ssh.exec_command(notification_cmd)
|
||||||
stdin.write(f"{BACKEND_API_KEY}\n")
|
stdin.write(f"{BACKEND_API_KEY}\n{json_payload}")
|
||||||
stdin.flush()
|
stdin.flush()
|
||||||
stdin.channel.shutdown_write()
|
stdin.channel.shutdown_write()
|
||||||
|
|
||||||
exit_status = stdout.channel.recv_exit_status()
|
|
||||||
response_output = stdout.read().decode()
|
|
||||||
|
|
||||||
if exit_status == 0:
|
|
||||||
logger.info("[Notification] Notification sent successfully")
|
|
||||||
logger.debug(f"[Notification] Response: {response_output}")
|
|
||||||
else:
|
|
||||||
error_output = stderr.read().decode()
|
|
||||||
logger.warning(f"[Notification] Notification command exited with {exit_status}")
|
|
||||||
logger.warning(f"[Notification] Error: {error_output}")
|
|
||||||
logger.warning(f"[Notification] Response: {response_output}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"[Notification] Failed to send SSH notification: {e}", exc_info=True)
|
|
||||||
raise
|
|
||||||
finally:
|
|
||||||
if ssh:
|
|
||||||
ssh.close()
|
|
||||||
logger.debug("[Notification] SSH connection closed")
|
|
||||||
@ -4,39 +4,46 @@ import contextlib
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
from helper import return_string_as_html
|
from helper import return_string_as_html
|
||||||
from simple_logger_handler import setup_logger
|
|
||||||
import json
|
|
||||||
import sponsorblock as sb
|
|
||||||
|
|
||||||
|
|
||||||
logger = setup_logger(__name__)
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
YOUTUBE_CHANNEL_URL = os.getenv("YOUTUBE_CHANNEL_URL")
|
YOUTUBE_CHANNEL_URL = os.getenv("YOUTUBE_CHANNEL_URL")
|
||||||
|
|
||||||
def check_for_sponsorblock_segments(youtube_video:str) -> bool:
|
|
||||||
client = sb.Client()
|
|
||||||
try:
|
|
||||||
segments = client.get_skip_segments(youtube_video)
|
|
||||||
except sb.errors.NotFoundException:
|
|
||||||
logger.debug(f"[SponsorBlock] No SponsorBlock information for video:{youtube_video}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if segments:
|
def get_url_for_latest_video():
|
||||||
logger.info(f"[SponsorBlock] SponsorBlock segments found for video: {youtube_video}")
|
options = {
|
||||||
return True
|
"extract_flat": True,
|
||||||
|
"playlist_items": "1",
|
||||||
|
"quiet": True,
|
||||||
|
"forcejson": True,
|
||||||
|
"simulate": True,
|
||||||
|
}
|
||||||
|
with open(os.devnull, "w") as devnull:
|
||||||
|
with contextlib.redirect_stdout(devnull):
|
||||||
|
with yt_dlp.YoutubeDL(options) as video:
|
||||||
|
info_dict = video.extract_info(YOUTUBE_CHANNEL_URL, download = False)
|
||||||
|
if "entries" in info_dict and len(info_dict["entries"]) > 0:
|
||||||
|
return info_dict["entries"][0]["url"]
|
||||||
|
|
||||||
def return_download_options(episode_data)->dict:
|
def get_youtube_data(url):
|
||||||
download_options = {
|
with yt_dlp.YoutubeDL({"quiet":True,"noprogress":True}) as video:
|
||||||
|
info_dict = video.extract_info(url, download = False)
|
||||||
|
return {"date":datetime.datetime.fromtimestamp(info_dict["timestamp"], datetime.timezone.utc).strftime("%Y-%m-%d"),"title":info_dict["title"],
|
||||||
|
"description":return_string_as_html(info_dict["description"]),"upload_date":info_dict["upload_date"]}
|
||||||
|
|
||||||
|
|
||||||
|
def return_download_options(information:dict,track:str)->dict:
|
||||||
|
return {
|
||||||
"quiet": True,
|
"quiet": True,
|
||||||
"noprogress": True,
|
"noprogress": True,
|
||||||
"format": "bestaudio/best",
|
"format": "bestaudio/best",
|
||||||
"extract_audio": True,
|
"extract_audio": True,
|
||||||
"audio_format": "mp3",
|
"audio_format": "mp3",
|
||||||
"outtmpl": f"perun-{episode_data.episode_date}.%(ext)s",
|
"outtmpl": f"perun-{information['date']}.%(ext)s",
|
||||||
"addmetadata": True,
|
"addmetadata": True,
|
||||||
"postprocessors":[
|
"postprocessors":[
|
||||||
{"api": "https://sponsor.ajay.app",
|
{"api": "https://sponsor.ajay.app",
|
||||||
"categories":["sponsor"],
|
"categories":{"sponsor"},
|
||||||
"key": "SponsorBlock",
|
"key": "SponsorBlock",
|
||||||
"when": "after_filter"
|
"when": "after_filter"
|
||||||
},
|
},
|
||||||
@ -45,7 +52,7 @@ def return_download_options(episode_data)->dict:
|
|||||||
"key": "ModifyChapters",
|
"key": "ModifyChapters",
|
||||||
"remove_chapters_patterns": [],
|
"remove_chapters_patterns": [],
|
||||||
"remove_ranges": [],
|
"remove_ranges": [],
|
||||||
"remove_sponsor_segments": ["sponsor"],
|
"remove_sponsor_segments": {"sponsor"},
|
||||||
"sponsorblock_chapter_title": "[SponsorBlock]: %(category_names)l"
|
"sponsorblock_chapter_title": "[SponsorBlock]: %(category_names)l"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -56,17 +63,12 @@ def return_download_options(episode_data)->dict:
|
|||||||
"key": "FFmpegMetadata",
|
"key": "FFmpegMetadata",
|
||||||
}],
|
}],
|
||||||
"postprocessor_args": [
|
"postprocessor_args": [
|
||||||
"-metadata", f"title={episode_data.episode_title}",
|
"-metadata", f"title={information['title']}",
|
||||||
"-metadata", "artist=Perun",
|
"-metadata", f"artist=Perun",
|
||||||
"-metadata", f"track={episode_data.episode_number}",
|
"-metadata", f"track={track}",
|
||||||
"-metadata", f"date={episode_data.episode_date}",
|
"-metadata", f"date={information['date']}",
|
||||||
"-metadata", f"comment={return_string_as_html(episode_data.episode_description)}",
|
"-metadata", f"comment={information['description']}",
|
||||||
"-metadata", f"description={episode_data.episode_ytid}",
|
"-metadata", f"description={information['description']}",
|
||||||
],
|
],
|
||||||
"merge_output_format": "mp3"
|
"merge_output_format": "mp3"
|
||||||
}
|
}
|
||||||
logger.debug(f"[YouTube] Created download options:\n {json.dumps(download_options, indent=4)}")
|
|
||||||
return download_options
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
print(check_for_sponsorblock_segments("https://www.youtube.com/watch?v=M0t8UYZ9rrQ"))
|
|
||||||
|
|||||||
@ -1,21 +0,0 @@
|
|||||||
FROM ubuntu:24.04
|
|
||||||
|
|
||||||
RUN apt update && \
|
|
||||||
apt install -y ca-certificates curl ffmpeg keychain python3 software-properties-common && \
|
|
||||||
add-apt-repository ppa:m-grant-prg/utils && \
|
|
||||||
apt update && apt install -y get-iplayer && \
|
|
||||||
rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
|
|
||||||
|
|
||||||
ADD https://astral.sh/uv/install.sh /uv-installer.sh
|
|
||||||
RUN sh /uv-installer.sh && mv /root/.local/bin/uv /usr/local/bin/uv && rm /uv-installer.sh
|
|
||||||
|
|
||||||
RUN userdel ubuntu && groupadd -r florian -g 1000 && \
|
|
||||||
useradd -u 1000 -r -g florian -m -d /home/florian -s /bin/bash florian && \
|
|
||||||
mkdir /app && chown -R florian:florian /app
|
|
||||||
|
|
||||||
USER florian
|
|
||||||
WORKDIR /home/florian
|
|
||||||
COPY requirements.txt ./
|
|
||||||
RUN uv venv && uv pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
@ -1,75 +0,0 @@
|
|||||||
# Pete Tong BBC Radio Episode Downloader
|
|
||||||
|
|
||||||
A Python script that automatically downloads the latest Pete Tong radio show from BBC iPlayer Radio, converts it to MP3 with metadata, and sends a push notification when complete.
|
|
||||||
|
|
||||||
## Features
|
|
||||||
|
|
||||||
- **Automatic Detection**: Finds the latest Pete Tong episode from BBC iPlayer
|
|
||||||
- **Audio Download**: Uses `get_iplayer` to download BBC Radio episodes
|
|
||||||
- **MP3 Conversion**: Converts to MP3 format with ffmpeg
|
|
||||||
- **Metadata Injection**: Adds title, artist, track number (week of year), date, and description
|
|
||||||
- **Push Notifications**: Sends notification to backend service when new episode is ready
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
- Python 3.8+
|
|
||||||
- `get_iplayer` (BBC iPlayer downloader)
|
|
||||||
- `ffmpeg` and `ffprobe` (audio processing)
|
|
||||||
- Backend notification service
|
|
||||||
|
|
||||||
## Installation
|
|
||||||
|
|
||||||
### Install Python Dependencies
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pip install requests python-dotenv
|
|
||||||
```
|
|
||||||
|
|
||||||
### Install System Dependencies
|
|
||||||
|
|
||||||
**Ubuntu/Debian:**
|
|
||||||
```bash
|
|
||||||
sudo apt install get-iplayer ffmpeg
|
|
||||||
```
|
|
||||||
|
|
||||||
**macOS:**
|
|
||||||
```bash
|
|
||||||
brew install get-iplayer ffmpeg
|
|
||||||
```
|
|
||||||
|
|
||||||
## Configuration
|
|
||||||
|
|
||||||
Create a `.env` file with the following variables:
|
|
||||||
|
|
||||||
```env
|
|
||||||
# Backend notification service
|
|
||||||
BACKEND_API_URL=http://localhost:30101/internal/receive-notifications
|
|
||||||
BACKEND_API_KEY=your_api_key_here
|
|
||||||
```
|
|
||||||
|
|
||||||
## Usage
|
|
||||||
|
|
||||||
Run the script manually:
|
|
||||||
```bash
|
|
||||||
python download_episode.py
|
|
||||||
```
|
|
||||||
Or schedule with cron and use the provided `grabEpisode.sh`(Saturday mornings at 9 AM):
|
|
||||||
```bash
|
|
||||||
0 9 * * 6 /path/to/script/grabEpisode.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
## Output
|
|
||||||
|
|
||||||
MP3 files are named: `YYYY-MM-DD-{episode_id}.mp3`
|
|
||||||
|
|
||||||
Example: `2025-10-17-m00258br.mp3`
|
|
||||||
|
|
||||||
## Metadata Structure
|
|
||||||
|
|
||||||
| Field | Value | Example |
|
|
||||||
|-------|-------|---------|
|
|
||||||
| Title | Featured artist | "Solomun" |
|
|
||||||
| Artist | Pete Tong | "Pete Tong" |
|
|
||||||
| Track | Friday number | 42 (42nd Friday of year) |
|
|
||||||
| Date | ISO date | "2025-10-17" |
|
|
||||||
| Comment | Episode description | HTML formatted text |
|
|
||||||
@ -1,141 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
from dataclasses import dataclass
|
|
||||||
import json
|
|
||||||
from logger_handler import setup_logger
|
|
||||||
from send_notification import send_notification
|
|
||||||
|
|
||||||
logger = setup_logger("PeteTongDownloader")
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class EpisodeData:
|
|
||||||
description: str
|
|
||||||
title: str
|
|
||||||
timestamp: str
|
|
||||||
track: int
|
|
||||||
id: str
|
|
||||||
|
|
||||||
def add_html_tags_to_description(input_text) -> str:
|
|
||||||
if not input_text:
|
|
||||||
return ""
|
|
||||||
return("<p>"+input_text.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
|
|
||||||
|
|
||||||
def get_friday_number(iso_timestamp: str) -> int:
|
|
||||||
"""
|
|
||||||
Returns the week number of the Friday in the year for a given ISO timestamp string.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
dt = datetime.fromisoformat(iso_timestamp)
|
|
||||||
start_of_year = datetime(dt.year, 1, 1, tzinfo=dt.tzinfo)
|
|
||||||
days_until_first_friday = (4 - start_of_year.weekday()) % 7
|
|
||||||
first_friday = start_of_year + timedelta(days=days_until_first_friday)
|
|
||||||
fridays_passed = (dt - first_friday).days // 7 + 1
|
|
||||||
return fridays_passed
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to calculate Friday number from {iso_timestamp}: {e}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def find_downloaded_file_name_via_id(directory: str, latest_episode_id: str) -> str | None:
|
|
||||||
for filename in os.listdir(directory):
|
|
||||||
if latest_episode_id in filename:
|
|
||||||
return filename
|
|
||||||
logger.warning(f"No file found containing episode ID {latest_episode_id} in {directory}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def extract_metadata_from_downloaded_episode(file_name: str, episode_id: str) -> EpisodeData:
|
|
||||||
if not file_name or not os.path.exists(file_name):
|
|
||||||
logger.error(f"File not found: {file_name}")
|
|
||||||
raise FileNotFoundError(f"File not found: {file_name}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
|
||||||
["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", file_name],
|
|
||||||
capture_output=True, text=True, check=True
|
|
||||||
)
|
|
||||||
ffprobe_data = json.loads(result.stdout)
|
|
||||||
metadata = ffprobe_data.get("format", {}).get("tags", {})
|
|
||||||
|
|
||||||
iso_timestamp = metadata.get("date", "1970-01-01T00:00:00")
|
|
||||||
return EpisodeData(
|
|
||||||
description=add_html_tags_to_description(metadata.get("lyrics", "")),
|
|
||||||
title=metadata.get("title", "Unknown Title"),
|
|
||||||
timestamp=iso_timestamp.split("T")[0],
|
|
||||||
track=get_friday_number(iso_timestamp),
|
|
||||||
id=episode_id
|
|
||||||
)
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
logger.error(f"ffprobe failed for {file_name}: {e.stderr}")
|
|
||||||
raise
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.error(f"Failed to parse ffprobe output for {file_name}: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def get_id_of_the_latest_episode(base_url: str) -> str:
|
|
||||||
try:
|
|
||||||
result = subprocess.run(
|
|
||||||
["get_iplayer", "--pid-recursive-list", base_url],
|
|
||||||
capture_output=True, text=True, check=True
|
|
||||||
)
|
|
||||||
lines = result.stdout.strip().split("\n")
|
|
||||||
if len(lines) < 3:
|
|
||||||
raise ValueError("get_iplayer output too short to find latest episode ID")
|
|
||||||
latest_episode_id = lines[-2].split(",")[-1].strip()
|
|
||||||
logger.info(f"Latest episode ID: {latest_episode_id}")
|
|
||||||
return latest_episode_id
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
logger.error(f"get_iplayer failed: {e.stderr}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def download_episode_via_episode_id(episode_id: str) -> str:
|
|
||||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
try:
|
|
||||||
logger.info(f"Downloading episode {episode_id}")
|
|
||||||
subprocess.run(
|
|
||||||
["get_iplayer", f"--pid={episode_id}", "--type=radio"],
|
|
||||||
cwd=script_dir, check=True
|
|
||||||
)
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
logger.error(f"Download failed for {episode_id}: {e.stderr}")
|
|
||||||
raise
|
|
||||||
return script_dir
|
|
||||||
|
|
||||||
def convert_episode_to_mp3(episode_data: EpisodeData, file_name: str):
|
|
||||||
output_file = f"{episode_data.timestamp}-{episode_data.id}.mp3"
|
|
||||||
ffmpeg_command = [
|
|
||||||
"ffmpeg", "-i", file_name,
|
|
||||||
"-metadata", f"title={episode_data.title}",
|
|
||||||
"-metadata", "artist=Pete Tong",
|
|
||||||
"-metadata", f"track={episode_data.track}",
|
|
||||||
"-metadata", f"date={episode_data.timestamp}",
|
|
||||||
"-metadata", f"comment={episode_data.description}",
|
|
||||||
output_file
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info(f"Converting {file_name} to {output_file}")
|
|
||||||
subprocess.run(ffmpeg_command, check=True)
|
|
||||||
os.remove(file_name)
|
|
||||||
except subprocess.CalledProcessError as e:
|
|
||||||
logger.error(f"ffmpeg conversion failed: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def download_latest_pete_tong_episode():
|
|
||||||
try:
|
|
||||||
base_url = "https://www.bbc.co.uk/programmes/b006ww0v"
|
|
||||||
episode_id = get_id_of_the_latest_episode(base_url)
|
|
||||||
download_episode_via_episode_id(episode_id)
|
|
||||||
script_dir = download_episode_via_episode_id(episode_id)
|
|
||||||
file_name = find_downloaded_file_name_via_id(script_dir, episode_id)
|
|
||||||
episode_data = extract_metadata_from_downloaded_episode(file_name, episode_id)
|
|
||||||
convert_episode_to_mp3(episode_data, file_name)
|
|
||||||
logger.info("Episode download and conversion completed successfully")
|
|
||||||
send_notification(episode_data.title)
|
|
||||||
logger.info("Notification sent")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to download latest Pete Tong episode: {e}", exc_info=True)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
download_latest_pete_tong_episode()
|
|
||||||
@ -1,4 +0,0 @@
|
|||||||
#!/bin/bash -e
|
|
||||||
|
|
||||||
docker run --network host --rm -v /home/florian/github/service-podcasts/src/petetong:/app bbcr1:latest /home/florian/.venv/bin/python /app/download_episode.py
|
|
||||||
mv /home/florian/github/service-podcasts/src/petetong/*.mp3 "/var/lib/audiobookshelf/music/Pete Tong/"
|
|
||||||
@ -1,19 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
|
|
||||||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
|
|
||||||
if LOG_LEVEL not in {"ERROR", "DEBUG", "INFO", "WARNING", "CRITICAL"}:
|
|
||||||
LOG_LEVEL = "INFO"
|
|
||||||
|
|
||||||
def setup_logger(name: str) -> logging.Logger:
|
|
||||||
logger = logging.getLogger(name)
|
|
||||||
if not logger.handlers:
|
|
||||||
handler = logging.StreamHandler()
|
|
||||||
formatter = logging.Formatter(
|
|
||||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
handler.setFormatter(formatter)
|
|
||||||
logger.addHandler(handler)
|
|
||||||
logger.setLevel(getattr(logging, LOG_LEVEL))
|
|
||||||
logger.debug(f"Logger {name} initialized with level {LOG_LEVEL}")
|
|
||||||
return logger
|
|
||||||
@ -1,2 +0,0 @@
|
|||||||
dotenv
|
|
||||||
requests
|
|
||||||
@ -1,60 +0,0 @@
|
|||||||
import requests
|
|
||||||
from requests.exceptions import RequestException, Timeout, ConnectionError
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from logger_handler import setup_logger
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
backend_api_url=os.getenv("BACKEND_API_URL","http://localhost:30101/internal/receive-notifications")
|
|
||||||
api_key= os.getenv("BACKEND_API_KEY")
|
|
||||||
logger = setup_logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def send_notification(body: str,max_retries: int = 5,timeout: int = 5):
|
|
||||||
"""
|
|
||||||
Sends a notification to the internal backend service when a new Pete Tong episode is out.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
body: Featured artist
|
|
||||||
"""
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
"X-API-Key-Internal": api_key,
|
|
||||||
"Content-Type": "application/json"
|
|
||||||
}
|
|
||||||
|
|
||||||
title = "New Pete Tong episode is available"
|
|
||||||
data = {
|
|
||||||
"receipent_user_id": 1,
|
|
||||||
"message": {
|
|
||||||
"title": title,
|
|
||||||
"body": f"Featured artist: {body}",
|
|
||||||
"category":"mixtapes",
|
|
||||||
"timestamp": int(time.time())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(f"[Notify] Preparing to send notification: title='{title}', body={body}")
|
|
||||||
with requests.Session() as session:
|
|
||||||
for attempt in range(1, max_retries + 1):
|
|
||||||
try:
|
|
||||||
logger.debug(f"[Notify] Sending request to backend (attempt {attempt}/{max_retries})")
|
|
||||||
response = session.post(backend_api_url, headers=headers, json=data, timeout=timeout)
|
|
||||||
response.raise_for_status()
|
|
||||||
logger.info(f"[Notify] Notification sent successfully for '{title}' (body {body})")
|
|
||||||
return
|
|
||||||
|
|
||||||
except (Timeout, ConnectionError) as e:
|
|
||||||
logger.warning(f"[Notify] Attempt {attempt}/{max_retries} failed: {type(e).__name__}")
|
|
||||||
if attempt == max_retries:
|
|
||||||
logger.error(f"[Notify] All retry attempts failed for '{title}'")
|
|
||||||
else:
|
|
||||||
sleep_time = 2 ** (attempt - 1)
|
|
||||||
logger.debug(f"[Notify] Retrying in {sleep_time} seconds...")
|
|
||||||
time.sleep(sleep_time)
|
|
||||||
|
|
||||||
except RequestException as e:
|
|
||||||
logger.error(f"[Notify] Unexpected request failure: {e}")
|
|
||||||
return
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user