Updated Perun script with logging and added a readme

This commit is contained in:
Florian 2025-10-19 17:56:59 +02:00
parent 2984c96de9
commit 7da6b09981
6 changed files with 243 additions and 80 deletions

View File

@ -1,18 +1,75 @@
# Perun
# Perun YouTube Podcast Downloader
A Python script that automatically downloads the latest video from the Perun YouTube channel, converts it to MP3 with metadata, removes sponsor segments, and uploads it to a podcast server.
## Features
- **Automatic Detection**: Checks for new episodes by comparing with Audiobookshelf library
- **Audio Conversion**: Downloads and converts YouTube videos to MP3 format
- **Sponsor Removal**: Uses SponsorBlock API to remove sponsored segments
- **Metadata Injection**: Adds title, artist, track number, date, and description to MP3 files
- **SFTP Upload**: Automatically uploads to remote podcast server
- **Push Notifications**: Sends notification when new episode is available
## Prerequisites
- Python 3.8+
- yt-dlp
- ffmpeg (for audio conversion)
- SSH key-based authentication configured
- Audiobookshelf server with API access
Youtube blocks a lot of server IPs so running this locally is just easier, expects the following environment variables in a .env file:
## Installation
REMOTE_HOSTNAME
```bash
pip install -r requirements.txt
```
Install ffmpeg:
```bash
# Ubuntu/Debian
sudo apt install ffmpeg
REMOTE_PATH
# macOS
brew install ffmpeg
```
## Usage
BACKEND_API_URL
Run the script manually:
```bash
python get_episode.py
```
BACKEND_API_KEY
Or schedule with cron and use the provided `grabEpisode.sh` (Monday at 7 AM):
```bash
0 7 * * 1 /path/to/script/grabEpisode.sh
```
YOUTUBE_CHANNEL_URL
Youtube blocks a lot of server IPs so running this locally is just easier.
PODCAST_AUTHORIZATION_TOKEN
## Configuration
PODCAST_API_URL
Create a `.env` file with the following variables:
```env
# YouTube channel to monitor
YOUTUBE_CHANNEL_URL=https://www.youtube.com/@PerunAU/videos
# Audiobookshelf API
PODCAST_API_URL=https://your-audiobookshelf.com/api/items/{item-id}
PODCAST_AUTHORIZATION_TOKEN=your_token_here
# SFTP upload destination
REMOTE_HOSTNAME=your_ssh_host_alias
REMOTE_PATH=/path/to/podcast/folder
# Backend notification service
BACKEND_API_URL=http://backend:8101/internal/receive-notifications
BACKEND_API_KEY=your_api_key
```
## Output
MP3 files are named: `perun-YYYY-MM-DD.mp3`
Example: `perun-2025-10-19.mp3`

View File

@ -2,10 +2,11 @@ import requests
import yt_dlp
import os
from dotenv import load_dotenv
from helper import log_message
from ssh_helper import upload_via_sftp, send_notification_via_ssh
from youtube_handler import get_url_for_latest_video, get_youtube_data, return_download_options
from logger_handler import setup_logger
logger = setup_logger(__name__)
load_dotenv()
PODCAST_AUTHORIZATION_TOKEN = os.getenv("PODCAST_AUTHORIZATION_TOKEN")
@ -19,48 +20,53 @@ def get_audiobookshelf_data()->tuple[int | None, str | None]:
response = requests.get(PODCAST_API_URL, headers=headers)
response.raise_for_status()
result = response.json()
audiobookshelf_track = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTrack"]
audiobookshelf_title = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTitle"]
logger.debug(f"Fetched Audiobookshelf data: track={audiobookshelf_track}, title={audiobookshelf_title}")
return audiobookshelf_track, audiobookshelf_title
except requests.RequestException as e:
log_message(f"Failed to fetch data: {e}")
logger.warning(f"Failed to fetch Audiobookshelf data: {e}")
return None
def download_episode():
log_message("Starting Perun")
logger.info("Starting Perun")
audiobookshelf_track, audiobookshelf_title = get_audiobookshelf_data()
if audiobookshelf_track is None or audiobookshelf_title is None:
log_message("Unable to fetch Audiobookshelf data. Exiting.")
logger.warning("Unable to fetch Audiobookshelf data. Exiting.")
return
episode_url = get_url_for_latest_video()
episode_info = get_youtube_data(episode_url)
log_message(f"Latest episode: {episode_info['title']}")
logger.info(f"Latest YouTube episode: {episode_info['title']}")
if audiobookshelf_title != episode_info["title"]:
log_message("New Episode found")
logger.info("New episode found")
track = str(int(audiobookshelf_track) + 1).zfill(4)
options = return_download_options(episode_info,track)
log_message("Downloading episode")
logger.info("Downloading new episode")
try:
with yt_dlp.YoutubeDL(options) as episode:
episode.download(episode_url)
logger.debug("Download completed successfully")
except Exception as e:
log_message(f"Failed to download episode: {e}")
logger.error(f"Failed to download episode: {e}", exc_info=True)
return
log_message("Uploading episode")
logger.info("Uploading episode via SFTP")
upload_via_sftp(f"perun-{episode_info['date']}.mp3")
log_message("Finished uploading, sending notification")
logger.info("Sending release notification")
send_notification_via_ssh(f"Perun episode {track} has been released",episode_info["title"])
log_message("Finished")
logger.info("Workflow complete")
else:
log_message("No new episode found, exiting...")
logger.debug("No new episode found, exiting.")
if __name__ == "__main__":
download_episode()

View File

@ -1,5 +1,4 @@
import re
import datetime
def return_string_as_html(input_text):
string_without_ads=""
@ -8,8 +7,3 @@ def return_string_as_html(input_text):
if not "Sponsored" in line:
string_without_ads+=line+"\n"
return("<p>"+string_without_ads.replace("\n\n", "</p>\n<p>").replace("\n", "<br>")+"</p>")
def log_message(message):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"[{timestamp}] {message}")
return(f"[{timestamp}] {message}\n")

View File

@ -0,0 +1,19 @@
import logging
import os
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
if LOG_LEVEL not in {"ERROR", "DEBUG", "INFO", "WARNING", "CRITICAL"}:
LOG_LEVEL = "INFO"
def setup_logger(name: str) -> logging.Logger:
logger = logging.getLogger(name)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(getattr(logging, LOG_LEVEL))
logger.debug(f"Logger {name} initialized with level {LOG_LEVEL}")
return logger

View File

@ -2,7 +2,10 @@ import paramiko
import os
from dotenv import load_dotenv
from json import dumps
from logger_handler import setup_logger
import time
logger = setup_logger(__name__)
load_dotenv()
REMOTE_HOSTNAME = os.getenv("REMOTE_HOSTNAME")
REMOTE_PATH = os.getenv("REMOTE_PATH")
@ -10,10 +13,20 @@ BACKEND_API_URL = os.getenv("BACKEND_API_URL")
BACKEND_API_KEY= os.getenv("BACKEND_API_KEY")
def load_ssh_config(host_alias):
"""
Load SSH connection details from ~/.ssh/config for the given alias.
"""
logger.debug(f"Loading SSH configuration for host alias '{host_alias}'")
ssh_config = paramiko.SSHConfig()
config_path = os.path.expanduser("~/.ssh/config")
try:
with open(config_path) as f:
ssh_config.parse(f)
except FileNotFoundError:
logger.error(f"SSH config file not found at {config_path}")
raise
host_config = ssh_config.lookup(host_alias)
hostname = host_config.get("hostname")
port = int(host_config.get("port", 22))
@ -21,22 +34,39 @@ def load_ssh_config(host_alias):
keyfile = host_config.get("identityfile", [None])[0]
if not all([hostname, username, keyfile]):
logger.error(f"Incomplete SSH configuration for alias '{host_alias}'")
raise ValueError(f"Missing SSH configuration for {host_alias}.")
logger.debug(f"SSH config loaded: host={hostname}, port={port}, user={username}, key={keyfile}")
return hostname, port, username, keyfile
def create_ssh_client(hostname, port, username, keyfile):
"""
Create and return a connected Paramiko SSHClient instance.
"""
try:
ssh = paramiko.SSHClient()
ssh.load_host_keys(os.path.expanduser("~/.ssh/known_hosts"))
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
pkey = paramiko.RSAKey.from_private_key_file(keyfile)
ssh.connect(hostname=hostname, username=username, port=port, pkey=pkey)
logger.debug("SSH connection established successfully")
return ssh
except Exception as e:
logger.error(f"SSH connection failed: {e}", exc_info=True)
raise
def upload_via_sftp(filename):
"""
Upload a file to the remote host via SFTP using SSH credentials.
"""
logger.info(f"Preparing to upload file '{filename}' via SFTP")
try:
hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME)
logger.debug(f"Connecting to {hostname}:{port} for file upload")
transport = paramiko.Transport((hostname, port))
pkey = paramiko.RSAKey.from_private_key_file(keyfile)
@ -44,34 +74,55 @@ def upload_via_sftp(filename):
sftp = paramiko.SFTPClient.from_transport(transport)
remote_file = os.path.join(REMOTE_PATH, os.path.basename(filename))
logger.info(f"Uploading to remote path: {remote_file}")
sftp.put(filename, remote_file)
sftp.close()
transport.close()
logger.info(f"File '{filename}' uploaded successfully")
except Exception as e:
logger.error(f"SFTP upload failed for '{filename}': {e}", exc_info=True)
raise
def send_notification_via_ssh(notification_title, notification_info):
"""
Send a JSON-formatted notification payload via SSH to the backend.
"""
logger.info(f"Sending SSH notification: {notification_title}")
try:
hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME)
with create_ssh_client(hostname, port, username, keyfile) as ssh:
data = {
"receipent_user_id": 1,
"message": {
"title": notification_title,
"info": notification_info,
"category": "mixtapes"
"body": notification_info,
"category": "mixtapes",
"timestamp": int(time.time())
}
}
json_payload = dumps(data)
logger.debug(f"Notification payload: {json_payload}")
# Command reads API key and JSON from stdin
notification_cmd = (
f"curl -s -X POST '{BACKEND_API_URL}' "
f"-H 'Content-Type: application/json' "
f"-H 'X-API-Key-Internal: $(head -n1)' "
f"-d @-"
)
stdin, stdout, stderr = ssh.exec_command(notification_cmd)
stdin.write(f"{BACKEND_API_KEY}\n{json_payload}")
stdin.flush()
stdin.channel.shutdown_write()
exit_status = stdout.channel.recv_exit_status()
if exit_status == 0:
logger.info("Notification sent successfully")
else:
error_output = stderr.read().decode()
logger.warning(f"Notification command exited with {exit_status}: {error_output}")
except Exception as e:
logger.error(f"Failed to send SSH notification: {e}", exc_info=True)
raise

View File

@ -4,13 +4,19 @@ import contextlib
from dotenv import load_dotenv
import os
from helper import return_string_as_html
from logger_handler import setup_logger
import json
logger = setup_logger(__name__)
load_dotenv()
YOUTUBE_CHANNEL_URL = os.getenv("YOUTUBE_CHANNEL_URL")
def get_url_for_latest_video():
"""
Fetch the URL of the latest video from a YouTube channel.
"""
logger.info("Fetching latest video URL from YouTube channel")
options = {
"extract_flat": True,
"playlist_items": "1",
@ -18,22 +24,50 @@ def get_url_for_latest_video():
"forcejson": True,
"simulate": True,
}
with open(os.devnull, "w") as devnull:
with contextlib.redirect_stdout(devnull):
with yt_dlp.YoutubeDL(options) as video:
info_dict = video.extract_info(YOUTUBE_CHANNEL_URL, download = False)
if "entries" in info_dict and len(info_dict["entries"]) > 0:
return info_dict["entries"][0]["url"]
def get_youtube_data(url):
with yt_dlp.YoutubeDL({"quiet":True,"noprogress":True}) as video:
info_dict = video.extract_info(url, download = False)
return {"date":datetime.datetime.fromtimestamp(info_dict["timestamp"], datetime.timezone.utc).strftime("%Y-%m-%d"),"title":info_dict["title"],
"description":return_string_as_html(info_dict["description"]),"upload_date":info_dict["upload_date"]}
try:
with open(os.devnull, "w") as devnull, contextlib.redirect_stdout(devnull):
with yt_dlp.YoutubeDL(options) as video:
info_dict = video.extract_info(YOUTUBE_CHANNEL_URL, download=False)
except Exception as e:
logger.error(f"Failed to fetch latest video info: {e}", exc_info=True)
return None
if "entries" in info_dict and len(info_dict["entries"]) > 0:
latest_url = info_dict["entries"][0]["url"]
logger.debug(f"Latest video URL found: {latest_url}")
return latest_url
else:
logger.warning("No entries found in channel feed")
return None
def get_youtube_data(url: str) -> dict:
"""
Fetch metadata for a given YouTube video URL.
"""
logger.info(f"Fetching YouTube metadata for video: {url}")
try:
with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as video:
info_dict = video.extract_info(url, download=False)
except Exception as e:
logger.error(f"Failed to fetch YouTube video info for {url}: {e}", exc_info=True)
return {}
video_data = {
"date": datetime.datetime.fromtimestamp(
info_dict["timestamp"], datetime.timezone.utc
).strftime("%Y-%m-%d"),
"title": info_dict["title"],
"description": return_string_as_html(info_dict["description"]),
"upload_date": info_dict["upload_date"]
}
logger.debug(f"Fetched video data: {json.dumps(video_data, indent=4)}")
return video_data
def return_download_options(information:dict,track:str)->dict:
return {
download_options = {
"quiet": True,
"noprogress": True,
"format": "bestaudio/best",
@ -72,3 +106,5 @@ def return_download_options(information:dict,track:str)->dict:
],
"merge_output_format": "mp3"
}
logger.debug(f"Created download options:\n {json.dumps(download_options, indent=4)}")
return download_options