1
0
Fork 0

Rework list_builder.py

FAAAAAASTEEEEEEER
This commit is contained in:
Félix Piédallu 2024-02-07 23:28:54 +01:00
parent 265cf0b186
commit b18ed48de2
3 changed files with 128 additions and 188 deletions

View file

@ -76,7 +76,7 @@ def __app_cache_clone_or_update_mapped(data):
def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None:
with Pool(processes=parallel) as pool:
tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items())
for _ in tqdm.tqdm(tasks, total=len(apps.keys())):
for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
pass

33
appslib/xmpplogger.py Normal file
View file

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import subprocess
from shutil import which
import logging
import logging.handlers
class XmppLogHandler(logging.Handler):
def __init__(self):
logging.Handler.__init__(self)
self.is_logging = False
def emit(self, record):
if which("sendxmpppy") is None:
return
msg = f"[Applist builder error] {record.msg}"
subprocess.call(["sendxmpppy", msg], stdout=subprocess.DEVNULL)
@classmethod
def add(cls, level=logging.ERROR):
if not logging.getLogger().handlers:
logging.basicConfig()
# create handler
handler = cls()
handler.setLevel(level)
# add the handler
logging.getLogger().handlers.append(handler)
XmppLogHandler.add(logging.ERROR)

View file

@ -3,27 +3,34 @@
import copy
import json
import os
import re
import subprocess
import sys
import time
from collections import OrderedDict
import multiprocessing
from pathlib import Path
from shutil import which
from typing import Any, Generator, TextIO
import time
import shutil
from collections import OrderedDict
import tqdm
import logging
import toml
from git import Repo
from app_caches import apps_cache_update_all, app_cache_folder # pylint: disable=import-error
from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \
convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error
from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
get_catalog, git_repo_age)
# Automatically enables error-to-xmpp
import appslib.xmpplogger # pylint: disable=import-error
now = time.time()
REPO_APPS_PATH = Path(__file__).parent.parent
# Load categories and reformat the structure to have a list with an "id" key
categories = toml.load((REPO_APPS_PATH / "categories.toml").open("r", encoding="utf-8"))
categories = toml.load((REPO_APPS_ROOT / "categories.toml").open("r", encoding="utf-8"))
for category_id, infos in categories.items():
infos["id"] = category_id
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
@ -33,13 +40,13 @@ for category_id, infos in categories.items():
categories = list(categories.values())
# (Same for antifeatures)
antifeatures = toml.load((REPO_APPS_PATH / "antifeatures.toml").open("r", encoding="utf-8"))
antifeatures = toml.load((REPO_APPS_ROOT / "antifeatures.toml").open("r", encoding="utf-8"))
for antifeature_id, infos in antifeatures.items():
infos["id"] = antifeature_id
antifeatures = list(antifeatures.values())
# Load the app catalog and filter out the non-working ones
catalog = toml.load((REPO_APPS_PATH / "apps.toml").open("r", encoding="utf-8"))
catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8"))
catalog = {
app: infos for app, infos in catalog.items() if infos.get("state") != "notworking"
}
@ -47,165 +54,55 @@ catalog = {
my_env = os.environ.copy()
my_env["GIT_TERMINAL_PROMPT"] = "0"
(REPO_APPS_PATH / ".apps_cache").mkdir(exist_ok=True)
(REPO_APPS_PATH / "builds").mkdir(exist_ok=True)
def error(msg: str) -> None:
msg = "[Applist builder error] " + msg
if which("sendxmpppy") is not None:
subprocess.call(["sendxmpppy", msg], stdout=open(os.devnull, "wb"))
print(msg + "\n")
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
def progressbar(it: list[Any], prefix: str = "", size: int = 60, file: TextIO = sys.stdout
) -> Generator[Any, None, None]:
count = len(it)
def show(j, name=""):
name += " "
x = int(size * j / count)
file.write(
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
)
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1, item[0])
file.write("\n")
file.flush()
###################################
# App git clones cache management #
###################################
def app_cache_folder(app: str) -> Path:
return REPO_APPS_PATH / ".apps_cache" / app
def refresh_all_caches() -> None:
for app, infos in progressbar(sorted(catalog.items()), "Updating git clones: ", 40):
app = app.lower()
if not app_cache_folder(app).exists():
try:
init_cache(app, infos)
except Exception as e:
error("Failed to init cache for %s" % app)
else:
try:
refresh_cache(app, infos)
except Exception as e:
error("Failed to not refresh cache for %s: %s" % (app, e))
raise e
def init_cache(app: str, infos: dict[str, str]) -> None:
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
Repo.clone_from(
infos["url"],
to_path=app_cache_folder(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=True, branch=infos.get("branch", "master"),
)
def git_repo_age(path: Path) -> bool | int:
fetch_head = path / ".git" / "FETCH_HEAD"
if fetch_head.exists():
return int(time.time() - fetch_head.stat().st_mtime)
return False
def refresh_cache(app: str, infos: dict[str, str]) -> None:
app_path = app_cache_folder(app)
# Don't refresh if already refreshed during last hour
age = git_repo_age(app_path)
if age is not False and age < 3600:
return
try:
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])
branch = infos.get("branch", "master")
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")
except:
# Sometimes there are tmp issue such that the refresh cache ..
# we don't trigger an error unless the cache hasnt been updated since more than 24 hours
age = git_repo_age(app_path)
if age is not False and age < 24 * 3600:
pass
else:
raise
(REPO_APPS_ROOT / "builds").mkdir(exist_ok=True)
################################
# Actual list build management #
################################
def __build_app_dict(data):
name, info = data
try:
return name, build_app_dict(name, info)
except Exception as err:
logging.error("Error while updating %s: %s", name, err)
def build_catalog():
def build_base_catalog():
result_dict = {}
for app, infos in progressbar(sorted(catalog.items()), "Processing: ", 40):
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
tasks = pool.imap(__build_app_dict, catalog.items())
app = app.lower()
for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"):
assert result is not None
name, info = result
result_dict[name] = info
try:
app_dict = build_app_dict(app, infos)
except Exception as e:
error("Processing %s failed: %s" % (app, str(e)))
continue
return result_dict
result_dict[app_dict["id"]] = app_dict
#############################
# Current catalog API v2 #
#############################
def write_catalog_v2(base_catalog, target_dir: Path) -> None:
result_dict_with_manifest_v1 = copy.deepcopy(base_catalog)
result_dict_with_manifest_v1 = {
name: infos
for name, infos in result_dict_with_manifest_v1.items()
if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2
}
full_catalog = {
"apps": result_dict_with_manifest_v1,
"categories": categories,
"antifeatures": antifeatures,
}
result_dict_with_manifest_v1 = copy.deepcopy(result_dict)
result_dict_with_manifest_v1 = {name: infos for name, infos in result_dict_with_manifest_v1.items() if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2}
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
os.system("mkdir -p ./builds/default/v2/")
with open("builds/default/v2/apps.json", "w") as f:
f.write(
json.dumps(
{
"apps": result_dict_with_manifest_v1,
"categories": categories,
"antifeatures": antifeatures,
},
sort_keys=True,
)
)
#############################################
# Catalog catalog API v3 (with manifest v2) #
#############################################
result_dict_with_manifest_v2 = copy.deepcopy(result_dict)
def write_catalog_v3(base_catalog, target_dir: Path) -> None:
result_dict_with_manifest_v2 = copy.deepcopy(base_catalog)
for app in result_dict_with_manifest_v2.values():
packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0")
if packaging_format < 2:
@ -218,34 +115,31 @@ def build_catalog():
if "manifest" in app and "resources" in app["manifest"]:
del app["manifest"]["resources"]
logos_dir = target_dir / "logos"
logos_dir.mkdir(parents=True, exist_ok=True)
for appid, app in result_dict_with_manifest_v2.items():
appid = appid.lower()
if (REPO_APPS_PATH / "logos" / f"{appid}.png").exists():
logo_hash = subprocess.check_output(["sha256sum", f"logos/{appid}.png"]).strip().decode("utf-8").split()[0]
os.system(f"cp logos/{appid}.png builds/default/v3/logos/{logo_hash}.png")
logo_source = REPO_APPS_ROOT / "logos" / f"{appid}.png"
if logo_source.exists():
logo_hash = subprocess.check_output(["sha256sum", logo_source]).strip().decode("utf-8").split()[0]
shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png")
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
else:
logo_hash = None
app["logo_hash"] = logo_hash
os.system("mkdir -p ./builds/default/v3/")
with open("builds/default/v3/apps.json", "w") as f:
f.write(
json.dumps(
{
"apps": result_dict_with_manifest_v2,
"categories": categories,
"antifeatures": antifeatures,
},
sort_keys=True,
)
)
full_catalog = {
"apps": result_dict_with_manifest_v2,
"categories": categories,
"antifeatures": antifeatures,
}
##############################
# Version for catalog in doc #
##############################
os.system("mkdir -p ./builds/default/doc_catalog")
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
def write_catalog_doc(base_catalog, target_dir: Path) -> None:
def infos_for_doc_catalog(infos):
level = infos.get("level")
if not isinstance(level, int):
@ -267,31 +161,40 @@ def build_catalog():
result_dict_doc = {
k: infos_for_doc_catalog(v)
for k, v in result_dict.items()
for k, v in base_catalog.items()
if v["state"] == "working"
}
with open("builds/default/doc_catalog/apps.json", "w") as f:
f.write(
json.dumps(
{"apps": result_dict_doc, "categories": categories}, sort_keys=True
)
)
full_catalog = {
"apps": result_dict_doc,
"categories": categories
}
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
def build_app_dict(app, infos):
# Make sure we have some cache
this_app_cache = app_cache_folder(app)
assert this_app_cache.exists(), "No cache yet for %s" % app
assert this_app_cache.exists(), f"No cache yet for {app}"
repo = Repo(this_app_cache)
commit_timestamps_for_this_app_in_catalog = \
repo.git.log("-G", f"cinny", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json", "apps.toml")
commits_in_apps_json = Repo(REPO_APPS_ROOT).git.log(
"-S", f"\"{app}\"", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json").split("\n")
if len(commits_in_apps_json) > 1:
first_commit = commits_in_apps_json[0]
else:
commits_in_apps_toml = Repo(REPO_APPS_ROOT).git.log(
"-S", f"[{app}]", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json", "apps.toml").split("\n")
first_commit = commits_in_apps_toml[0]
# Assume the first entry we get (= the oldest) is the time the app was added
infos["added_in_catalog"] = int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
infos["added_in_catalog"] = int(first_commit)
# int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
infos["branch"] = infos.get("branch", "master")
infos["revision"] = infos.get("revision", "HEAD")
@ -338,7 +241,7 @@ def build_app_dict(app, infos):
"manifest": manifest,
"state": infos["state"],
"level": infos.get("level", "?"),
"maintained": not 'package-not-maintained' in infos.get('antifeatures', []),
"maintained": 'package-not-maintained' not in infos.get('antifeatures', []),
"high_quality": infos.get("high_quality", False),
"featured": infos.get("featured", False),
"category": infos.get("category", None),
@ -351,5 +254,9 @@ def build_app_dict(app, infos):
if __name__ == "__main__":
refresh_all_caches()
build_catalog()
apps_cache_update_all(get_catalog(), parallel=50)
catalog = build_base_catalog()
write_catalog_v2(catalog, REPO_APPS_ROOT / "builds" / "default" / "v2")
write_catalog_v3(catalog, REPO_APPS_ROOT / "builds" / "default" / "v3")
write_catalog_doc(catalog, REPO_APPS_ROOT / "builds" / "default" / "doc_catalog")