Rework list_builder.py
FAAAAAASTEEEEEEER
This commit is contained in:
parent
265cf0b186
commit
b18ed48de2
3 changed files with 128 additions and 188 deletions
|
@ -76,7 +76,7 @@ def __app_cache_clone_or_update_mapped(data):
|
|||
def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None:
|
||||
with Pool(processes=parallel) as pool:
|
||||
tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items())
|
||||
for _ in tqdm.tqdm(tasks, total=len(apps.keys())):
|
||||
for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
|
||||
pass
|
||||
|
||||
|
||||
|
|
33
appslib/xmpplogger.py
Normal file
33
appslib/xmpplogger.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import subprocess
|
||||
from shutil import which
|
||||
import logging
|
||||
import logging.handlers
|
||||
|
||||
|
||||
class XmppLogHandler(logging.Handler):
|
||||
def __init__(self):
|
||||
logging.Handler.__init__(self)
|
||||
self.is_logging = False
|
||||
|
||||
def emit(self, record):
|
||||
if which("sendxmpppy") is None:
|
||||
return
|
||||
|
||||
msg = f"[Applist builder error] {record.msg}"
|
||||
subprocess.call(["sendxmpppy", msg], stdout=subprocess.DEVNULL)
|
||||
|
||||
@classmethod
|
||||
def add(cls, level=logging.ERROR):
|
||||
if not logging.getLogger().handlers:
|
||||
logging.basicConfig()
|
||||
|
||||
# create handler
|
||||
handler = cls()
|
||||
handler.setLevel(level)
|
||||
# add the handler
|
||||
logging.getLogger().handlers.append(handler)
|
||||
|
||||
|
||||
XmppLogHandler.add(logging.ERROR)
|
281
list_builder.py
281
list_builder.py
|
@ -3,27 +3,34 @@
|
|||
import copy
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
import multiprocessing
|
||||
from pathlib import Path
|
||||
from shutil import which
|
||||
from typing import Any, Generator, TextIO
|
||||
import time
|
||||
import shutil
|
||||
from collections import OrderedDict
|
||||
|
||||
import tqdm
|
||||
import logging
|
||||
import toml
|
||||
from git import Repo
|
||||
|
||||
from app_caches import apps_cache_update_all, app_cache_folder # pylint: disable=import-error
|
||||
from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \
|
||||
convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error
|
||||
|
||||
from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
|
||||
get_catalog, git_repo_age)
|
||||
|
||||
# Automatically enables error-to-xmpp
|
||||
import appslib.xmpplogger # pylint: disable=import-error
|
||||
|
||||
|
||||
now = time.time()
|
||||
|
||||
REPO_APPS_PATH = Path(__file__).parent.parent
|
||||
|
||||
# Load categories and reformat the structure to have a list with an "id" key
|
||||
categories = toml.load((REPO_APPS_PATH / "categories.toml").open("r", encoding="utf-8"))
|
||||
categories = toml.load((REPO_APPS_ROOT / "categories.toml").open("r", encoding="utf-8"))
|
||||
for category_id, infos in categories.items():
|
||||
infos["id"] = category_id
|
||||
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
|
||||
|
@ -33,13 +40,13 @@ for category_id, infos in categories.items():
|
|||
categories = list(categories.values())
|
||||
|
||||
# (Same for antifeatures)
|
||||
antifeatures = toml.load((REPO_APPS_PATH / "antifeatures.toml").open("r", encoding="utf-8"))
|
||||
antifeatures = toml.load((REPO_APPS_ROOT / "antifeatures.toml").open("r", encoding="utf-8"))
|
||||
for antifeature_id, infos in antifeatures.items():
|
||||
infos["id"] = antifeature_id
|
||||
antifeatures = list(antifeatures.values())
|
||||
|
||||
# Load the app catalog and filter out the non-working ones
|
||||
catalog = toml.load((REPO_APPS_PATH / "apps.toml").open("r", encoding="utf-8"))
|
||||
catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8"))
|
||||
catalog = {
|
||||
app: infos for app, infos in catalog.items() if infos.get("state") != "notworking"
|
||||
}
|
||||
|
@ -47,165 +54,55 @@ catalog = {
|
|||
my_env = os.environ.copy()
|
||||
my_env["GIT_TERMINAL_PROMPT"] = "0"
|
||||
|
||||
(REPO_APPS_PATH / ".apps_cache").mkdir(exist_ok=True)
|
||||
(REPO_APPS_PATH / "builds").mkdir(exist_ok=True)
|
||||
|
||||
|
||||
def error(msg: str) -> None:
|
||||
msg = "[Applist builder error] " + msg
|
||||
if which("sendxmpppy") is not None:
|
||||
subprocess.call(["sendxmpppy", msg], stdout=open(os.devnull, "wb"))
|
||||
print(msg + "\n")
|
||||
|
||||
|
||||
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
|
||||
def progressbar(it: list[Any], prefix: str = "", size: int = 60, file: TextIO = sys.stdout
|
||||
) -> Generator[Any, None, None]:
|
||||
count = len(it)
|
||||
|
||||
def show(j, name=""):
|
||||
name += " "
|
||||
x = int(size * j / count)
|
||||
file.write(
|
||||
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
|
||||
)
|
||||
file.flush()
|
||||
|
||||
show(0)
|
||||
for i, item in enumerate(it):
|
||||
yield item
|
||||
show(i + 1, item[0])
|
||||
file.write("\n")
|
||||
file.flush()
|
||||
|
||||
|
||||
###################################
|
||||
# App git clones cache management #
|
||||
###################################
|
||||
|
||||
|
||||
def app_cache_folder(app: str) -> Path:
|
||||
return REPO_APPS_PATH / ".apps_cache" / app
|
||||
|
||||
|
||||
def refresh_all_caches() -> None:
|
||||
for app, infos in progressbar(sorted(catalog.items()), "Updating git clones: ", 40):
|
||||
app = app.lower()
|
||||
if not app_cache_folder(app).exists():
|
||||
try:
|
||||
init_cache(app, infos)
|
||||
except Exception as e:
|
||||
error("Failed to init cache for %s" % app)
|
||||
else:
|
||||
try:
|
||||
refresh_cache(app, infos)
|
||||
except Exception as e:
|
||||
error("Failed to not refresh cache for %s: %s" % (app, e))
|
||||
raise e
|
||||
|
||||
|
||||
def init_cache(app: str, infos: dict[str, str]) -> None:
|
||||
git_depths = {
|
||||
"notworking": 5,
|
||||
"inprogress": 20,
|
||||
"default": 40,
|
||||
}
|
||||
|
||||
Repo.clone_from(
|
||||
infos["url"],
|
||||
to_path=app_cache_folder(app),
|
||||
depth=git_depths.get(infos["state"], git_depths["default"]),
|
||||
single_branch=True, branch=infos.get("branch", "master"),
|
||||
)
|
||||
|
||||
|
||||
def git_repo_age(path: Path) -> bool | int:
|
||||
fetch_head = path / ".git" / "FETCH_HEAD"
|
||||
if fetch_head.exists():
|
||||
return int(time.time() - fetch_head.stat().st_mtime)
|
||||
return False
|
||||
|
||||
|
||||
def refresh_cache(app: str, infos: dict[str, str]) -> None:
|
||||
app_path = app_cache_folder(app)
|
||||
|
||||
# Don't refresh if already refreshed during last hour
|
||||
age = git_repo_age(app_path)
|
||||
if age is not False and age < 3600:
|
||||
return
|
||||
|
||||
try:
|
||||
repo = Repo(app_path)
|
||||
|
||||
repo.remote("origin").set_url(infos["url"])
|
||||
|
||||
branch = infos.get("branch", "master")
|
||||
if repo.active_branch != branch:
|
||||
all_branches = [str(b) for b in repo.branches]
|
||||
if branch in all_branches:
|
||||
repo.git.checkout(branch, "--force")
|
||||
else:
|
||||
repo.git.remote("set-branches", "--add", "origin", branch)
|
||||
repo.remote("origin").fetch(f"{branch}:{branch}")
|
||||
|
||||
repo.remote("origin").fetch(refspec=branch, force=True)
|
||||
repo.git.reset("--hard", f"origin/{branch}")
|
||||
except:
|
||||
# Sometimes there are tmp issue such that the refresh cache ..
|
||||
# we don't trigger an error unless the cache hasnt been updated since more than 24 hours
|
||||
age = git_repo_age(app_path)
|
||||
if age is not False and age < 24 * 3600:
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
(REPO_APPS_ROOT / "builds").mkdir(exist_ok=True)
|
||||
|
||||
|
||||
################################
|
||||
# Actual list build management #
|
||||
################################
|
||||
|
||||
def __build_app_dict(data):
|
||||
name, info = data
|
||||
try:
|
||||
return name, build_app_dict(name, info)
|
||||
except Exception as err:
|
||||
logging.error("Error while updating %s: %s", name, err)
|
||||
|
||||
def build_catalog():
|
||||
|
||||
def build_base_catalog():
|
||||
result_dict = {}
|
||||
|
||||
for app, infos in progressbar(sorted(catalog.items()), "Processing: ", 40):
|
||||
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
|
||||
tasks = pool.imap(__build_app_dict, catalog.items())
|
||||
|
||||
app = app.lower()
|
||||
for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"):
|
||||
assert result is not None
|
||||
name, info = result
|
||||
result_dict[name] = info
|
||||
|
||||
try:
|
||||
app_dict = build_app_dict(app, infos)
|
||||
except Exception as e:
|
||||
error("Processing %s failed: %s" % (app, str(e)))
|
||||
continue
|
||||
return result_dict
|
||||
|
||||
result_dict[app_dict["id"]] = app_dict
|
||||
|
||||
#############################
|
||||
# Current catalog API v2 #
|
||||
#############################
|
||||
def write_catalog_v2(base_catalog, target_dir: Path) -> None:
|
||||
result_dict_with_manifest_v1 = copy.deepcopy(base_catalog)
|
||||
result_dict_with_manifest_v1 = {
|
||||
name: infos
|
||||
for name, infos in result_dict_with_manifest_v1.items()
|
||||
if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2
|
||||
}
|
||||
full_catalog = {
|
||||
"apps": result_dict_with_manifest_v1,
|
||||
"categories": categories,
|
||||
"antifeatures": antifeatures,
|
||||
}
|
||||
|
||||
result_dict_with_manifest_v1 = copy.deepcopy(result_dict)
|
||||
result_dict_with_manifest_v1 = {name: infos for name, infos in result_dict_with_manifest_v1.items() if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2}
|
||||
target_file = target_dir / "apps.json"
|
||||
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
|
||||
|
||||
os.system("mkdir -p ./builds/default/v2/")
|
||||
with open("builds/default/v2/apps.json", "w") as f:
|
||||
f.write(
|
||||
json.dumps(
|
||||
{
|
||||
"apps": result_dict_with_manifest_v1,
|
||||
"categories": categories,
|
||||
"antifeatures": antifeatures,
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
)
|
||||
|
||||
#############################################
|
||||
# Catalog catalog API v3 (with manifest v2) #
|
||||
#############################################
|
||||
|
||||
result_dict_with_manifest_v2 = copy.deepcopy(result_dict)
|
||||
def write_catalog_v3(base_catalog, target_dir: Path) -> None:
|
||||
result_dict_with_manifest_v2 = copy.deepcopy(base_catalog)
|
||||
for app in result_dict_with_manifest_v2.values():
|
||||
packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0")
|
||||
if packaging_format < 2:
|
||||
|
@ -218,34 +115,31 @@ def build_catalog():
|
|||
if "manifest" in app and "resources" in app["manifest"]:
|
||||
del app["manifest"]["resources"]
|
||||
|
||||
logos_dir = target_dir / "logos"
|
||||
logos_dir.mkdir(parents=True, exist_ok=True)
|
||||
for appid, app in result_dict_with_manifest_v2.items():
|
||||
appid = appid.lower()
|
||||
if (REPO_APPS_PATH / "logos" / f"{appid}.png").exists():
|
||||
logo_hash = subprocess.check_output(["sha256sum", f"logos/{appid}.png"]).strip().decode("utf-8").split()[0]
|
||||
os.system(f"cp logos/{appid}.png builds/default/v3/logos/{logo_hash}.png")
|
||||
logo_source = REPO_APPS_ROOT / "logos" / f"{appid}.png"
|
||||
if logo_source.exists():
|
||||
logo_hash = subprocess.check_output(["sha256sum", logo_source]).strip().decode("utf-8").split()[0]
|
||||
shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png")
|
||||
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
|
||||
else:
|
||||
logo_hash = None
|
||||
app["logo_hash"] = logo_hash
|
||||
|
||||
os.system("mkdir -p ./builds/default/v3/")
|
||||
with open("builds/default/v3/apps.json", "w") as f:
|
||||
f.write(
|
||||
json.dumps(
|
||||
{
|
||||
"apps": result_dict_with_manifest_v2,
|
||||
"categories": categories,
|
||||
"antifeatures": antifeatures,
|
||||
},
|
||||
sort_keys=True,
|
||||
)
|
||||
)
|
||||
full_catalog = {
|
||||
"apps": result_dict_with_manifest_v2,
|
||||
"categories": categories,
|
||||
"antifeatures": antifeatures,
|
||||
}
|
||||
|
||||
##############################
|
||||
# Version for catalog in doc #
|
||||
##############################
|
||||
os.system("mkdir -p ./builds/default/doc_catalog")
|
||||
target_file = target_dir / "apps.json"
|
||||
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
|
||||
|
||||
|
||||
def write_catalog_doc(base_catalog, target_dir: Path) -> None:
|
||||
def infos_for_doc_catalog(infos):
|
||||
level = infos.get("level")
|
||||
if not isinstance(level, int):
|
||||
|
@ -267,31 +161,40 @@ def build_catalog():
|
|||
|
||||
result_dict_doc = {
|
||||
k: infos_for_doc_catalog(v)
|
||||
for k, v in result_dict.items()
|
||||
for k, v in base_catalog.items()
|
||||
if v["state"] == "working"
|
||||
}
|
||||
with open("builds/default/doc_catalog/apps.json", "w") as f:
|
||||
f.write(
|
||||
json.dumps(
|
||||
{"apps": result_dict_doc, "categories": categories}, sort_keys=True
|
||||
)
|
||||
)
|
||||
full_catalog = {
|
||||
"apps": result_dict_doc,
|
||||
"categories": categories
|
||||
}
|
||||
|
||||
target_file = target_dir / "apps.json"
|
||||
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
|
||||
|
||||
|
||||
def build_app_dict(app, infos):
|
||||
|
||||
# Make sure we have some cache
|
||||
this_app_cache = app_cache_folder(app)
|
||||
assert this_app_cache.exists(), "No cache yet for %s" % app
|
||||
assert this_app_cache.exists(), f"No cache yet for {app}"
|
||||
|
||||
repo = Repo(this_app_cache)
|
||||
|
||||
commit_timestamps_for_this_app_in_catalog = \
|
||||
repo.git.log("-G", f"cinny", "--first-parent", "--reverse", "--date=unix",
|
||||
"--format=%cd", "--", "apps.json", "apps.toml")
|
||||
commits_in_apps_json = Repo(REPO_APPS_ROOT).git.log(
|
||||
"-S", f"\"{app}\"", "--first-parent", "--reverse", "--date=unix",
|
||||
"--format=%cd", "--", "apps.json").split("\n")
|
||||
if len(commits_in_apps_json) > 1:
|
||||
first_commit = commits_in_apps_json[0]
|
||||
else:
|
||||
commits_in_apps_toml = Repo(REPO_APPS_ROOT).git.log(
|
||||
"-S", f"[{app}]", "--first-parent", "--reverse", "--date=unix",
|
||||
"--format=%cd", "--", "apps.json", "apps.toml").split("\n")
|
||||
first_commit = commits_in_apps_toml[0]
|
||||
|
||||
# Assume the first entry we get (= the oldest) is the time the app was added
|
||||
infos["added_in_catalog"] = int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
|
||||
infos["added_in_catalog"] = int(first_commit)
|
||||
# int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
|
||||
|
||||
infos["branch"] = infos.get("branch", "master")
|
||||
infos["revision"] = infos.get("revision", "HEAD")
|
||||
|
@ -338,7 +241,7 @@ def build_app_dict(app, infos):
|
|||
"manifest": manifest,
|
||||
"state": infos["state"],
|
||||
"level": infos.get("level", "?"),
|
||||
"maintained": not 'package-not-maintained' in infos.get('antifeatures', []),
|
||||
"maintained": 'package-not-maintained' not in infos.get('antifeatures', []),
|
||||
"high_quality": infos.get("high_quality", False),
|
||||
"featured": infos.get("featured", False),
|
||||
"category": infos.get("category", None),
|
||||
|
@ -351,5 +254,9 @@ def build_app_dict(app, infos):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
refresh_all_caches()
|
||||
build_catalog()
|
||||
apps_cache_update_all(get_catalog(), parallel=50)
|
||||
|
||||
catalog = build_base_catalog()
|
||||
write_catalog_v2(catalog, REPO_APPS_ROOT / "builds" / "default" / "v2")
|
||||
write_catalog_v3(catalog, REPO_APPS_ROOT / "builds" / "default" / "v3")
|
||||
write_catalog_doc(catalog, REPO_APPS_ROOT / "builds" / "default" / "doc_catalog")
|
||||
|
|
Loading…
Reference in a new issue