From b18ed48de27f1d2784ab3ddcdd00b79790511654 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Pi=C3=A9dallu?= Date: Wed, 7 Feb 2024 23:28:54 +0100 Subject: [PATCH] Rework list_builder.py FAAAAAASTEEEEEEER --- app_caches.py | 2 +- appslib/xmpplogger.py | 33 +++++ list_builder.py | 281 ++++++++++++++---------------------------- 3 files changed, 128 insertions(+), 188 deletions(-) create mode 100644 appslib/xmpplogger.py diff --git a/app_caches.py b/app_caches.py index f3e35fd..180c4df 100755 --- a/app_caches.py +++ b/app_caches.py @@ -76,7 +76,7 @@ def __app_cache_clone_or_update_mapped(data): def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None: with Pool(processes=parallel) as pool: tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items()) - for _ in tqdm.tqdm(tasks, total=len(apps.keys())): + for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"): pass diff --git a/appslib/xmpplogger.py b/appslib/xmpplogger.py new file mode 100644 index 0000000..69c4814 --- /dev/null +++ b/appslib/xmpplogger.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 + +import subprocess +from shutil import which +import logging +import logging.handlers + + +class XmppLogHandler(logging.Handler): + def __init__(self): + logging.Handler.__init__(self) + self.is_logging = False + + def emit(self, record): + if which("sendxmpppy") is None: + return + + msg = f"[Applist builder error] {record.msg}" + subprocess.call(["sendxmpppy", msg], stdout=subprocess.DEVNULL) + + @classmethod + def add(cls, level=logging.ERROR): + if not logging.getLogger().handlers: + logging.basicConfig() + + # create handler + handler = cls() + handler.setLevel(level) + # add the handler + logging.getLogger().handlers.append(handler) + + +XmppLogHandler.add(logging.ERROR) diff --git a/list_builder.py b/list_builder.py index 1994f01..1bb947a 100755 --- a/list_builder.py +++ b/list_builder.py @@ -3,27 +3,34 @@ import copy import json import os -import re import subprocess -import sys -import time -from collections import OrderedDict +import multiprocessing from pathlib import Path -from shutil import which -from typing import Any, Generator, TextIO +import time +import shutil +from collections import OrderedDict +import tqdm +import logging import toml from git import Repo +from app_caches import apps_cache_update_all, app_cache_folder # pylint: disable=import-error from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \ convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error +from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error + get_catalog, git_repo_age) + +# Automatically enables error-to-xmpp +import appslib.xmpplogger # pylint: disable=import-error + + now = time.time() -REPO_APPS_PATH = Path(__file__).parent.parent # Load categories and reformat the structure to have a list with an "id" key -categories = toml.load((REPO_APPS_PATH / "categories.toml").open("r", encoding="utf-8")) +categories = toml.load((REPO_APPS_ROOT / "categories.toml").open("r", encoding="utf-8")) for category_id, infos in categories.items(): infos["id"] = category_id for subtag_id, subtag_infos in infos.get("subtags", {}).items(): @@ -33,13 +40,13 @@ for category_id, infos in categories.items(): categories = list(categories.values()) # (Same for antifeatures) -antifeatures = toml.load((REPO_APPS_PATH / "antifeatures.toml").open("r", encoding="utf-8")) +antifeatures = toml.load((REPO_APPS_ROOT / "antifeatures.toml").open("r", encoding="utf-8")) for antifeature_id, infos in antifeatures.items(): infos["id"] = antifeature_id antifeatures = list(antifeatures.values()) # Load the app catalog and filter out the non-working ones -catalog = toml.load((REPO_APPS_PATH / "apps.toml").open("r", encoding="utf-8")) +catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8")) catalog = { app: infos for app, infos in catalog.items() if infos.get("state") != "notworking" } @@ -47,165 +54,55 @@ catalog = { my_env = os.environ.copy() my_env["GIT_TERMINAL_PROMPT"] = "0" -(REPO_APPS_PATH / ".apps_cache").mkdir(exist_ok=True) -(REPO_APPS_PATH / "builds").mkdir(exist_ok=True) - - -def error(msg: str) -> None: - msg = "[Applist builder error] " + msg - if which("sendxmpppy") is not None: - subprocess.call(["sendxmpppy", msg], stdout=open(os.devnull, "wb")) - print(msg + "\n") - - -# Progress bar helper, stolen from https://stackoverflow.com/a/34482761 -def progressbar(it: list[Any], prefix: str = "", size: int = 60, file: TextIO = sys.stdout - ) -> Generator[Any, None, None]: - count = len(it) - - def show(j, name=""): - name += " " - x = int(size * j / count) - file.write( - "%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name) - ) - file.flush() - - show(0) - for i, item in enumerate(it): - yield item - show(i + 1, item[0]) - file.write("\n") - file.flush() - - -################################### -# App git clones cache management # -################################### - - -def app_cache_folder(app: str) -> Path: - return REPO_APPS_PATH / ".apps_cache" / app - - -def refresh_all_caches() -> None: - for app, infos in progressbar(sorted(catalog.items()), "Updating git clones: ", 40): - app = app.lower() - if not app_cache_folder(app).exists(): - try: - init_cache(app, infos) - except Exception as e: - error("Failed to init cache for %s" % app) - else: - try: - refresh_cache(app, infos) - except Exception as e: - error("Failed to not refresh cache for %s: %s" % (app, e)) - raise e - - -def init_cache(app: str, infos: dict[str, str]) -> None: - git_depths = { - "notworking": 5, - "inprogress": 20, - "default": 40, - } - - Repo.clone_from( - infos["url"], - to_path=app_cache_folder(app), - depth=git_depths.get(infos["state"], git_depths["default"]), - single_branch=True, branch=infos.get("branch", "master"), - ) - - -def git_repo_age(path: Path) -> bool | int: - fetch_head = path / ".git" / "FETCH_HEAD" - if fetch_head.exists(): - return int(time.time() - fetch_head.stat().st_mtime) - return False - - -def refresh_cache(app: str, infos: dict[str, str]) -> None: - app_path = app_cache_folder(app) - - # Don't refresh if already refreshed during last hour - age = git_repo_age(app_path) - if age is not False and age < 3600: - return - - try: - repo = Repo(app_path) - - repo.remote("origin").set_url(infos["url"]) - - branch = infos.get("branch", "master") - if repo.active_branch != branch: - all_branches = [str(b) for b in repo.branches] - if branch in all_branches: - repo.git.checkout(branch, "--force") - else: - repo.git.remote("set-branches", "--add", "origin", branch) - repo.remote("origin").fetch(f"{branch}:{branch}") - - repo.remote("origin").fetch(refspec=branch, force=True) - repo.git.reset("--hard", f"origin/{branch}") - except: - # Sometimes there are tmp issue such that the refresh cache .. - # we don't trigger an error unless the cache hasnt been updated since more than 24 hours - age = git_repo_age(app_path) - if age is not False and age < 24 * 3600: - pass - else: - raise +(REPO_APPS_ROOT / "builds").mkdir(exist_ok=True) ################################ # Actual list build management # ################################ +def __build_app_dict(data): + name, info = data + try: + return name, build_app_dict(name, info) + except Exception as err: + logging.error("Error while updating %s: %s", name, err) -def build_catalog(): +def build_base_catalog(): result_dict = {} - for app, infos in progressbar(sorted(catalog.items()), "Processing: ", 40): + with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool: + tasks = pool.imap(__build_app_dict, catalog.items()) - app = app.lower() + for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"): + assert result is not None + name, info = result + result_dict[name] = info - try: - app_dict = build_app_dict(app, infos) - except Exception as e: - error("Processing %s failed: %s" % (app, str(e))) - continue + return result_dict - result_dict[app_dict["id"]] = app_dict - ############################# - # Current catalog API v2 # - ############################# +def write_catalog_v2(base_catalog, target_dir: Path) -> None: + result_dict_with_manifest_v1 = copy.deepcopy(base_catalog) + result_dict_with_manifest_v1 = { + name: infos + for name, infos in result_dict_with_manifest_v1.items() + if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2 + } + full_catalog = { + "apps": result_dict_with_manifest_v1, + "categories": categories, + "antifeatures": antifeatures, + } - result_dict_with_manifest_v1 = copy.deepcopy(result_dict) - result_dict_with_manifest_v1 = {name: infos for name, infos in result_dict_with_manifest_v1.items() if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2} + target_file = target_dir / "apps.json" + target_file.parent.mkdir(parents=True, exist_ok=True) + target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True)) - os.system("mkdir -p ./builds/default/v2/") - with open("builds/default/v2/apps.json", "w") as f: - f.write( - json.dumps( - { - "apps": result_dict_with_manifest_v1, - "categories": categories, - "antifeatures": antifeatures, - }, - sort_keys=True, - ) - ) - ############################################# - # Catalog catalog API v3 (with manifest v2) # - ############################################# - - result_dict_with_manifest_v2 = copy.deepcopy(result_dict) +def write_catalog_v3(base_catalog, target_dir: Path) -> None: + result_dict_with_manifest_v2 = copy.deepcopy(base_catalog) for app in result_dict_with_manifest_v2.values(): packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0") if packaging_format < 2: @@ -218,34 +115,31 @@ def build_catalog(): if "manifest" in app and "resources" in app["manifest"]: del app["manifest"]["resources"] + logos_dir = target_dir / "logos" + logos_dir.mkdir(parents=True, exist_ok=True) for appid, app in result_dict_with_manifest_v2.items(): appid = appid.lower() - if (REPO_APPS_PATH / "logos" / f"{appid}.png").exists(): - logo_hash = subprocess.check_output(["sha256sum", f"logos/{appid}.png"]).strip().decode("utf-8").split()[0] - os.system(f"cp logos/{appid}.png builds/default/v3/logos/{logo_hash}.png") + logo_source = REPO_APPS_ROOT / "logos" / f"{appid}.png" + if logo_source.exists(): + logo_hash = subprocess.check_output(["sha256sum", logo_source]).strip().decode("utf-8").split()[0] + shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png") # FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow else: logo_hash = None app["logo_hash"] = logo_hash - os.system("mkdir -p ./builds/default/v3/") - with open("builds/default/v3/apps.json", "w") as f: - f.write( - json.dumps( - { - "apps": result_dict_with_manifest_v2, - "categories": categories, - "antifeatures": antifeatures, - }, - sort_keys=True, - ) - ) + full_catalog = { + "apps": result_dict_with_manifest_v2, + "categories": categories, + "antifeatures": antifeatures, + } - ############################## - # Version for catalog in doc # - ############################## - os.system("mkdir -p ./builds/default/doc_catalog") + target_file = target_dir / "apps.json" + target_file.parent.mkdir(parents=True, exist_ok=True) + target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True)) + +def write_catalog_doc(base_catalog, target_dir: Path) -> None: def infos_for_doc_catalog(infos): level = infos.get("level") if not isinstance(level, int): @@ -267,31 +161,40 @@ def build_catalog(): result_dict_doc = { k: infos_for_doc_catalog(v) - for k, v in result_dict.items() + for k, v in base_catalog.items() if v["state"] == "working" } - with open("builds/default/doc_catalog/apps.json", "w") as f: - f.write( - json.dumps( - {"apps": result_dict_doc, "categories": categories}, sort_keys=True - ) - ) + full_catalog = { + "apps": result_dict_doc, + "categories": categories + } + + target_file = target_dir / "apps.json" + target_file.parent.mkdir(parents=True, exist_ok=True) + target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True)) def build_app_dict(app, infos): - # Make sure we have some cache this_app_cache = app_cache_folder(app) - assert this_app_cache.exists(), "No cache yet for %s" % app + assert this_app_cache.exists(), f"No cache yet for {app}" repo = Repo(this_app_cache) - commit_timestamps_for_this_app_in_catalog = \ - repo.git.log("-G", f"cinny", "--first-parent", "--reverse", "--date=unix", - "--format=%cd", "--", "apps.json", "apps.toml") + commits_in_apps_json = Repo(REPO_APPS_ROOT).git.log( + "-S", f"\"{app}\"", "--first-parent", "--reverse", "--date=unix", + "--format=%cd", "--", "apps.json").split("\n") + if len(commits_in_apps_json) > 1: + first_commit = commits_in_apps_json[0] + else: + commits_in_apps_toml = Repo(REPO_APPS_ROOT).git.log( + "-S", f"[{app}]", "--first-parent", "--reverse", "--date=unix", + "--format=%cd", "--", "apps.json", "apps.toml").split("\n") + first_commit = commits_in_apps_toml[0] # Assume the first entry we get (= the oldest) is the time the app was added - infos["added_in_catalog"] = int(commit_timestamps_for_this_app_in_catalog.split("\n")[0]) + infos["added_in_catalog"] = int(first_commit) + # int(commit_timestamps_for_this_app_in_catalog.split("\n")[0]) infos["branch"] = infos.get("branch", "master") infos["revision"] = infos.get("revision", "HEAD") @@ -338,7 +241,7 @@ def build_app_dict(app, infos): "manifest": manifest, "state": infos["state"], "level": infos.get("level", "?"), - "maintained": not 'package-not-maintained' in infos.get('antifeatures', []), + "maintained": 'package-not-maintained' not in infos.get('antifeatures', []), "high_quality": infos.get("high_quality", False), "featured": infos.get("featured", False), "category": infos.get("category", None), @@ -351,5 +254,9 @@ def build_app_dict(app, infos): if __name__ == "__main__": - refresh_all_caches() - build_catalog() + apps_cache_update_all(get_catalog(), parallel=50) + + catalog = build_base_catalog() + write_catalog_v2(catalog, REPO_APPS_ROOT / "builds" / "default" / "v2") + write_catalog_v3(catalog, REPO_APPS_ROOT / "builds" / "default" / "v3") + write_catalog_doc(catalog, REPO_APPS_ROOT / "builds" / "default" / "doc_catalog")