From 3bba8f23310a3302180d848f5634b43838035daf Mon Sep 17 00:00:00 2001 From: Florian Date: Mon, 3 Nov 2025 10:51:03 +0100 Subject: [PATCH] Wip2 --- src/perun/get_episode.py | 133 ++++++++++++++++++++++++++--------- src/perun/requirements.txt | 3 +- src/perun/ssh_helper.py | 41 +++++------ src/perun/youtube_handler.py | 30 ++++---- 4 files changed, 136 insertions(+), 71 deletions(-) diff --git a/src/perun/get_episode.py b/src/perun/get_episode.py index 1a45743..7095958 100644 --- a/src/perun/get_episode.py +++ b/src/perun/get_episode.py @@ -15,8 +15,25 @@ PODCAST_API_URL = os.getenv("PODCAST_API_URL") def get_audiobookshelf_data()->tuple[int | None, str | None]: - headers = {"Authorization": f"Bearer {PODCAST_AUTHORIZATION_TOKEN}"} + """ + Fetches the latest episode data from the Audiobookshelf API. + Returns: + tuple[int | None, str | None]: + - The track number as an integer (or None if data could not be fetched due to retryable errors). + - The episode title as a string (or None if data could not be fetched due to retryable errors). + + Raises: + requests.exceptions.HTTPError: + If a non-retryable HTTP error occurs (e.g., 401 Unauthorized, 403 Forbidden, 404 Not Found). + + Notes: + - Connection errors, timeouts, and server-side HTTP errors (500, 502, 503, 504) are caught and logged. + In these cases, the function returns (None, None) so the caller can retry later. + """ + + headers = {"Authorization": f"Bearer {PODCAST_AUTHORIZATION_TOKEN}"} + logger.debug(f"[Audiobookshelf] Fetching Audiobookshelf data") try: response = requests.get(PODCAST_API_URL, headers=headers) response.raise_for_status() @@ -25,70 +42,120 @@ def get_audiobookshelf_data()->tuple[int | None, str | None]: audiobookshelf_track = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTrack"] audiobookshelf_title = result["media"]["episodes"][-1]["audioFile"]["metaTags"]["tagTitle"] - logger.debug(f"Fetched Audiobookshelf data: track={audiobookshelf_track}, title={audiobookshelf_title}") + logger.debug(f"[Audiobookshelf] Fetched Audiobookshelf data: track={audiobookshelf_track}, title={audiobookshelf_title}") return audiobookshelf_track, audiobookshelf_title - except requests.RequestException as e: - logger.warning(f"Failed to fetch Audiobookshelf data: {e}") + except requests.exceptions.ConnectionError as e: + logger.warning(f"[Audiobookshelf] Connection error, will retry: {e}") return None + except requests.exceptions.Timeout as e: + logger.warning(f"[Audiobookshelf] Request timed out, will retry: {e}") + return None + except requests.exceptions.HTTPError as e: + status = e.response.status_code + if status in {500, 502, 503, 504}: + logger.warning(f"[Audiobookshelf] Server error {status}, will retry: {e}") + return None + else: + logger.error(f"[Audiobookshelf] HTTP error {status}, not retrying: {e}") + raise def check_until_new_episode_gets_released(): - CHECK_INTERVAL = 3600 # seconds - MAX_HOURS = 24 - for _ in range(int(MAX_HOURS * 3600 / CHECK_INTERVAL)): + """ + Polls YouTube every hour for a new episode and compares it to the available episode on Audiobookshelf. + Stops after 72 hours. + """ + CHECK_INTERVAL_HOURS = 1 + MAX_HOURS = 72 + for attempt in range(1, MAX_HOURS + 1): + logger.debug(f"[EpisodeCheck] Waiting for a new episode to be released, attempt: {attempt}/{MAX_HOURS}") audiobookshelf_track, audiobookshelf_title = get_audiobookshelf_data() + if audiobookshelf_track is None or audiobookshelf_title is None: - logger.warning("Unable to fetch Audiobookshelf data. Exiting.") + logger.warning("[EpisodeCheck] Unable to fetch Audiobookshelf data, retrying in 1 hour.") + time.sleep(CHECK_INTERVAL_HOURS * 3600) + continue episode_url = get_url_for_latest_video() episode_info = get_youtube_data(episode_url) if audiobookshelf_title != episode_info["title"]: - logger.info(f"Latest YouTube episode: {episode_info['title']}") + logger.info(f"[EpisodeCheck] Latest YouTube episode: {episode_info['title']}") return audiobookshelf_track,episode_info,episode_url - else: - logger.debug("No new episode found, going to sleep.") - time.sleep(CHECK_INTERVAL) + + logger.debug("[EpisodeCheck] No new episode found, retrying in 1 hour.") + time.sleep(CHECK_INTERVAL_HOURS * 3600) + + logger.warning("[EpisodeCheck] No new episode found after maximum attempts.") + return None, None, None def wait_for_sponsorblock_segments_to_be_added(): - CHECK_INTERVAL = 3600 # seconds + """ + Polls SponsorBlock for segments on the current video until found or until max attempts. + """ + CHECK_INTERVAL_HOURS = 1 MAX_HOURS = 24 - for _ in range(int(MAX_HOURS * 3600 / CHECK_INTERVAL)): - segments= check_for_sponsorblock_segments() + for attempt in range(1, MAX_HOURS + 1): + logger.debug(f"[SponsorBlock] Waiting for SponsorBlock to be added, attempt: {attempt}/{MAX_HOURS} ") + segments = check_for_sponsorblock_segments() + if segments: - break - else: - logger.debug("Code eepy") - time.sleep(CHECK_INTERVAL) + logger.debug("[SponsorBlock] Segments found, existing loop.") + return True + + logger.debug("[SponsorBlock] No SponsorBlock segments found yet, retrying in 1 hour.") + time.sleep(CHECK_INTERVAL_HOURS * 3600) + + logger.warning("[SponsorBlock] Segments not found after maximum attempts.") + return None def download_episode(): - logger.info("Starting Perun") + logger.info("[App] Starting Perun") - audiobookshelf_track,episode_info,episode_url = check_until_new_episode_gets_released() - logger.info("New episode found") + try: + audiobookshelf_track,episode_info,episode_url = check_until_new_episode_gets_released() + logger.info("[App] New episode found") + except Exception as e: + logger.error(f"[App] Failed to fetch new episode info: {e}", exc_info=True) + return - episode_description = episode_info["description"] - if "sponsor" in episode_description.lower(): - wait_for_sponsorblock_segments_to_be_added() + try: + episode_description = episode_info.get("description", "") + if "sponsored" in episode_description.lower(): + logger.debug(f"[App] Sponsored segments found in description, waiting for SponsorBlock") + wait_for_sponsorblock_segments_to_be_added() + else: + logger.debug(f"[App] No sponsored segments found in description") + except Exception as e: + logger.warning(f"[App] Failed during SponsorBlock wait: {e}", exc_info=True) - track = str(int(audiobookshelf_track) + 1).zfill(4) - options = return_download_options(episode_info,track) + try: + track = str(int(audiobookshelf_track) + 1).zfill(4) + except Exception as e: + logger.error(f"[App] Invalid Audiobookshelf track number: {audiobookshelf_track}, error: {e}") + return + + try: + options = return_download_options(episode_info,track) + except Exception as e: + logger.error(f"[App] Failed to generate download options: {e}", exc_info=True) + return - logger.info("Downloading new episode") + logger.info("[App] Downloading new episode") try: with yt_dlp.YoutubeDL(options) as episode: episode.download(episode_url) - logger.debug("Download completed successfully") + logger.debug("[App] Download completed successfully") except Exception as e: - logger.error(f"Failed to download episode: {e}", exc_info=True) + logger.error(f"[App] Failed to download episode: {e}", exc_info=True) return - logger.info("Uploading episode via SFTP") + logger.info("[App] Uploading episode via SFTP") upload_via_sftp(f"perun-{episode_info['date']}.mp3") - logger.info("Sending release notification") + logger.info("[App] Sending release notification") send_notification_via_ssh(f"Perun episode {track} has been released",episode_info["title"]) - logger.info("Workflow complete") + logger.info("[App] Workflow complete") if __name__ == "__main__": diff --git a/src/perun/requirements.txt b/src/perun/requirements.txt index cc0de2e..785f6b3 100644 --- a/src/perun/requirements.txt +++ b/src/perun/requirements.txt @@ -14,5 +14,6 @@ pycryptodomex==3.23.0 PyNaCl==1.6.0 python-dotenv==1.1.1 requests==2.32.5 +sponsorblock.py==0.2.3 urllib3==2.5.0 -websockets==15.0.1 +websockets==15.0.1 \ No newline at end of file diff --git a/src/perun/ssh_helper.py b/src/perun/ssh_helper.py index e872202..886e089 100644 --- a/src/perun/ssh_helper.py +++ b/src/perun/ssh_helper.py @@ -16,7 +16,7 @@ def load_ssh_config(host_alias): """ Load SSH connection details from ~/.ssh/config for the given alias. """ - logger.debug(f"Loading SSH configuration for host alias '{host_alias}'") + logger.debug(f"[SSH] Loading SSH configuration for host alias '{host_alias}'") ssh_config = paramiko.SSHConfig() config_path = os.path.expanduser("~/.ssh/config") @@ -24,7 +24,7 @@ def load_ssh_config(host_alias): with open(config_path) as f: ssh_config.parse(f) except FileNotFoundError: - logger.error(f"SSH config file not found at {config_path}") + logger.error(f"[SSH] SSH config file not found at {config_path}") raise host_config = ssh_config.lookup(host_alias) @@ -34,10 +34,10 @@ def load_ssh_config(host_alias): keyfile = host_config.get("identityfile", [None])[0] if not all([hostname, username, keyfile]): - logger.error(f"Incomplete SSH configuration for alias '{host_alias}'") - raise ValueError(f"Missing SSH configuration for {host_alias}.") + logger.error(f"[SSH] Incomplete SSH configuration for alias '{host_alias}'") + raise ValueError(f"[SSH] Missing SSH configuration for {host_alias}.") - logger.debug(f"SSH config loaded: host={hostname}, port={port}, user={username}, key={keyfile}") + logger.debug(f"[SSH] SSH config loaded: host={hostname}, port={port}, user={username}, key={keyfile}") return hostname, port, username, keyfile @@ -45,6 +45,7 @@ def create_ssh_client(hostname, port, username, keyfile): """ Create and return a connected Paramiko SSHClient instance. """ + logger.debug("[SSH] Creating SSH client") try: ssh = paramiko.SSHClient() ssh.load_host_keys(os.path.expanduser("~/.ssh/known_hosts")) @@ -52,10 +53,10 @@ def create_ssh_client(hostname, port, username, keyfile): pkey = paramiko.RSAKey.from_private_key_file(keyfile) ssh.connect(hostname=hostname, username=username, port=port, pkey=pkey) - logger.debug("SSH connection established successfully") + logger.debug("[SSH] SSH connection established successfully") return ssh except Exception as e: - logger.error(f"SSH connection failed: {e}", exc_info=True) + logger.error(f"[SSH] SSH connection failed: {e}", exc_info=True) raise @@ -63,10 +64,10 @@ def upload_via_sftp(filename): """ Upload a file to the remote host via SFTP using SSH credentials. """ - logger.info(f"Preparing to upload file '{filename}' via SFTP") + logger.info(f"[SFTP] Preparing to upload file '{filename}' via SFTP") try: hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME) - logger.debug(f"Connecting to {hostname}:{port} for file upload") + logger.debug(f"[SFTP] Connecting to {hostname}:{port} for file upload") transport = paramiko.Transport((hostname, port)) pkey = paramiko.RSAKey.from_private_key_file(keyfile) @@ -74,14 +75,14 @@ def upload_via_sftp(filename): sftp = paramiko.SFTPClient.from_transport(transport) remote_file = os.path.join(REMOTE_PATH, os.path.basename(filename)) - logger.info(f"Uploading to remote path: {remote_file}") + logger.info(f"[SFTP] Uploading to remote path: {remote_file}") sftp.put(filename, remote_file) sftp.close() transport.close() - logger.info(f"File '{filename}' uploaded successfully") + logger.info(f"[SFTP] File '{filename}' uploaded successfully") except Exception as e: - logger.error(f"SFTP upload failed for '{filename}': {e}", exc_info=True) + logger.error(f"[SFTP] SFTP upload failed for '{filename}': {e}", exc_info=True) raise @@ -89,7 +90,7 @@ def send_notification_via_ssh(notification_title, notification_info): """ Send a JSON-formatted notification payload via SSH to the backend. """ - logger.info(f"Sending SSH notification: {notification_title}") + logger.info(f"[Notification] Sending SSH notification: {notification_title}") try: hostname, port, username, keyfile = load_ssh_config(REMOTE_HOSTNAME) with create_ssh_client(hostname, port, username, keyfile) as ssh: @@ -103,7 +104,7 @@ def send_notification_via_ssh(notification_title, notification_info): } } json_payload = dumps(data) - logger.debug(f"Notification payload: {json_payload}") + logger.debug(f"[Notification] Notification payload: {json_payload}") notification_cmd = ( f"API_KEY=$(head -n1) && " @@ -123,13 +124,13 @@ def send_notification_via_ssh(notification_title, notification_info): exit_status = stdout.channel.recv_exit_status() if exit_status == 0: - logger.info("Notification sent successfully") - logger.debug(f"Response: {response_output}") + logger.info("[Notification] Notification sent successfully") + logger.debug(f"[Notification] Response: {response_output}") else: error_output = stderr.read().decode() - logger.warning(f"Notification command exited with {exit_status}") - logger.warning(f"Error: {error_output}") - logger.warning(f"Response: {response_output}") + logger.warning(f"[Notification] Notification command exited with {exit_status}") + logger.warning(f"[Notification] Error: {error_output}") + logger.warning(f"[Notification] Response: {response_output}") except Exception as e: - logger.error(f"Failed to send SSH notification: {e}", exc_info=True) + logger.error(f"[Notification] Failed to send SSH notification: {e}", exc_info=True) raise \ No newline at end of file diff --git a/src/perun/youtube_handler.py b/src/perun/youtube_handler.py index 95b37d6..28532b0 100644 --- a/src/perun/youtube_handler.py +++ b/src/perun/youtube_handler.py @@ -18,7 +18,7 @@ def get_url_for_latest_video(): """ Fetch the URL of the latest video from a YouTube channel. """ - logger.info("Fetching latest video URL from YouTube channel") + logger.info("[YouTube] Fetching latest video URL from YouTube channel") options = { "extract_flat": True, "playlist_items": "1", @@ -32,15 +32,15 @@ def get_url_for_latest_video(): with yt_dlp.YoutubeDL(options) as video: info_dict = video.extract_info(YOUTUBE_CHANNEL_URL, download=False) except Exception as e: - logger.error(f"Failed to fetch latest video info: {e}", exc_info=True) + logger.error(f"[YouTube] Failed to fetch latest video info: {e}", exc_info=True) return None if "entries" in info_dict and len(info_dict["entries"]) > 0: latest_url = info_dict["entries"][0]["url"] - logger.debug(f"Latest video URL found: {latest_url}") + logger.debug(f"[YouTube] Latest video URL found: {latest_url}") return latest_url else: - logger.warning("No entries found in channel feed") + logger.warning("[YouTube] No entries found in channel feed") return None def get_youtube_data(url: str) -> dict: @@ -52,7 +52,7 @@ def get_youtube_data(url: str) -> dict: with yt_dlp.YoutubeDL({"quiet": True, "noprogress": True}) as video: info_dict = video.extract_info(url, download=False) except Exception as e: - logger.error(f"Failed to fetch YouTube video info for {url}: {e}", exc_info=True) + logger.error(f"[YouTube] Failed to fetch YouTube video info for {url}: {e}", exc_info=True) return {} video_data = { @@ -60,11 +60,10 @@ def get_youtube_data(url: str) -> dict: info_dict["timestamp"], datetime.timezone.utc ).strftime("%Y-%m-%d"), "title": info_dict["title"], - "description": return_string_as_html(info_dict["description"]), - "upload_date": info_dict["upload_date"] + "description": info_dict.get("description", "") } - logger.debug(f"Fetched video data: {json.dumps(video_data, indent=4)}") + logger.debug(f"[YouTube] Fetched video data: {json.dumps(video_data, indent=4)}") return video_data def check_for_sponsorblock_segments(youtube_video:str) -> bool: @@ -72,15 +71,12 @@ def check_for_sponsorblock_segments(youtube_video:str) -> bool: try: segments = client.get_skip_segments(youtube_video) except sb.errors.NotFoundException: - logger.debug(f"No SponsorBlock information for video:{youtube_video}") + logger.debug(f"[SponsorBlock] No SponsorBlock information for video:{youtube_video}") return False if segments: - logger.debug(f"SponsorBlock segments found for video: {youtube_video}") + logger.debug(f"[SponsorBlock] SponsorBlock segments found for video: {youtube_video}") return True - else: - logger.debug(f"SponsorBlock returned empty segments for video: {youtube_video}") - return False def return_download_options(information:dict,track:str)->dict: download_options = { @@ -117,13 +113,13 @@ def return_download_options(information:dict,track:str)->dict: "-metadata", f"artist=Perun", "-metadata", f"track={track}", "-metadata", f"date={information['date']}", - "-metadata", f"comment={information['description']}", - "-metadata", f"description={information['description']}", + "-metadata", f"comment={return_string_as_html(information['description'])}", + "-metadata", f"description={return_string_as_html(information['description'])}", ], "merge_output_format": "mp3" } - logger.debug(f"Created download options:\n {json.dumps(download_options, indent=4)}") + logger.debug(f"[YouTube] Created download options:\n {json.dumps(download_options, indent=4)}") return download_options if __name__ == "__main__": - print(check_for_sponsorblock_segments("https://www.youtube.com/watch?v=M0t8UYZ9rrQ")) \ No newline at end of file + print(check_for_sponsorblock_segments("https://www.youtube.com/watch?v=M0t8UYZ9rrQ"))