From 75d50a2a55368b2e34fdef0ecb9dbba223ce0b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Pi=C3=A9dallu?= Date: Tue, 20 Feb 2024 11:57:05 +0100 Subject: [PATCH] Add DownloadPageAPI to the autoupdater to download from HTML web pages --- .../autoupdate_app_sources.py | 12 +++++++++++- autoupdate_app_sources/requirements.txt | 2 ++ autoupdate_app_sources/rest_api.py | 17 +++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/autoupdate_app_sources/autoupdate_app_sources.py b/autoupdate_app_sources/autoupdate_app_sources.py index 358aa55..0da58bd 100755 --- a/autoupdate_app_sources/autoupdate_app_sources.py +++ b/autoupdate_app_sources/autoupdate_app_sources.py @@ -24,6 +24,7 @@ from rest_api import ( GithubAPI, GitlabAPI, GiteaForgejoAPI, + DownloadPageAPI, RefType, ) # noqa: E402,E501 pylint: disable=import-error,wrong-import-position import appslib.logging_sender # noqa: E402 pylint: disable=import-error,wrong-import-position @@ -49,6 +50,7 @@ STRATEGIES = [ "latest_forgejo_release", "latest_forgejo_tag", "latest_forgejo_commit", + "latest_webpage_link", ] @@ -466,7 +468,7 @@ class AppAutoUpdater: allow_prereleases = autoupdate.get("allow_prereleases", False) _, remote_type, revision_type = strategy.split("_") - api: Union[GithubAPI, GitlabAPI, GiteaForgejoAPI] + api: Union[GithubAPI, GitlabAPI, GiteaForgejoAPI, DownloadPageAPI] if remote_type == "github": assert upstream and upstream.startswith( "https://github.com/" @@ -575,6 +577,14 @@ class AppAutoUpdater: latest_commit["sha"], self.get_old_ref(infos), RefType.commits ), ) + + if remote_type == "webpage" and revision_type == "link": + api = DownloadPageAPI(upstream) + links = api.get_web_page_links() + latest_version_orig, latest_version = self.relevant_versions(list(links.keys()), self.app_id, version_re) + latest_url = links[latest_version_orig] + return latest_version, latest_url, "" + return None @staticmethod diff --git a/autoupdate_app_sources/requirements.txt b/autoupdate_app_sources/requirements.txt index 4acb0cc..50abff9 100644 --- a/autoupdate_app_sources/requirements.txt +++ b/autoupdate_app_sources/requirements.txt @@ -2,3 +2,5 @@ requests PyGithub toml tqdm +beautifulsoup4 +lxml diff --git a/autoupdate_app_sources/rest_api.py b/autoupdate_app_sources/rest_api.py index 2cedefd..f519318 100644 --- a/autoupdate_app_sources/rest_api.py +++ b/autoupdate_app_sources/rest_api.py @@ -4,6 +4,8 @@ import re from enum import Enum from typing import Any, Optional +from bs4 import BeautifulSoup +from urllib.parse import urljoin import requests @@ -206,3 +208,18 @@ class GiteaForgejoAPI: ) else: return f"{self.forge_root}/{self.project_path}/releases/tag/{new_ref}" + + +class DownloadPageAPI: + def __init__(self, upstream: str) -> None: + self.web_page = upstream + + def get_web_page_links(self) -> dict[str, str]: + r = requests.get(self.web_page) + r.raise_for_status() + soup = BeautifulSoup(r.text, features="lxml") + + return { + link.string: urljoin(self.web_page, link.get("href")) + for link in soup.find_all('a') + }