Rework list_builder.py
FAAAAAASTEEEEEEER
This commit is contained in:
parent
265cf0b186
commit
b18ed48de2
3 changed files with 128 additions and 188 deletions
|
@ -76,7 +76,7 @@ def __app_cache_clone_or_update_mapped(data):
|
||||||
def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None:
|
def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None:
|
||||||
with Pool(processes=parallel) as pool:
|
with Pool(processes=parallel) as pool:
|
||||||
tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items())
|
tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items())
|
||||||
for _ in tqdm.tqdm(tasks, total=len(apps.keys())):
|
for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
33
appslib/xmpplogger.py
Normal file
33
appslib/xmpplogger.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from shutil import which
|
||||||
|
import logging
|
||||||
|
import logging.handlers
|
||||||
|
|
||||||
|
|
||||||
|
class XmppLogHandler(logging.Handler):
|
||||||
|
def __init__(self):
|
||||||
|
logging.Handler.__init__(self)
|
||||||
|
self.is_logging = False
|
||||||
|
|
||||||
|
def emit(self, record):
|
||||||
|
if which("sendxmpppy") is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
msg = f"[Applist builder error] {record.msg}"
|
||||||
|
subprocess.call(["sendxmpppy", msg], stdout=subprocess.DEVNULL)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def add(cls, level=logging.ERROR):
|
||||||
|
if not logging.getLogger().handlers:
|
||||||
|
logging.basicConfig()
|
||||||
|
|
||||||
|
# create handler
|
||||||
|
handler = cls()
|
||||||
|
handler.setLevel(level)
|
||||||
|
# add the handler
|
||||||
|
logging.getLogger().handlers.append(handler)
|
||||||
|
|
||||||
|
|
||||||
|
XmppLogHandler.add(logging.ERROR)
|
281
list_builder.py
281
list_builder.py
|
@ -3,27 +3,34 @@
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import multiprocessing
|
||||||
import time
|
|
||||||
from collections import OrderedDict
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shutil import which
|
import time
|
||||||
from typing import Any, Generator, TextIO
|
import shutil
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import tqdm
|
||||||
|
import logging
|
||||||
import toml
|
import toml
|
||||||
from git import Repo
|
from git import Repo
|
||||||
|
|
||||||
|
from app_caches import apps_cache_update_all, app_cache_folder # pylint: disable=import-error
|
||||||
from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \
|
from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \
|
||||||
convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error
|
convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error
|
||||||
|
|
||||||
|
from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
|
||||||
|
get_catalog, git_repo_age)
|
||||||
|
|
||||||
|
# Automatically enables error-to-xmpp
|
||||||
|
import appslib.xmpplogger # pylint: disable=import-error
|
||||||
|
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
REPO_APPS_PATH = Path(__file__).parent.parent
|
|
||||||
|
|
||||||
# Load categories and reformat the structure to have a list with an "id" key
|
# Load categories and reformat the structure to have a list with an "id" key
|
||||||
categories = toml.load((REPO_APPS_PATH / "categories.toml").open("r", encoding="utf-8"))
|
categories = toml.load((REPO_APPS_ROOT / "categories.toml").open("r", encoding="utf-8"))
|
||||||
for category_id, infos in categories.items():
|
for category_id, infos in categories.items():
|
||||||
infos["id"] = category_id
|
infos["id"] = category_id
|
||||||
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
|
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
|
||||||
|
@ -33,13 +40,13 @@ for category_id, infos in categories.items():
|
||||||
categories = list(categories.values())
|
categories = list(categories.values())
|
||||||
|
|
||||||
# (Same for antifeatures)
|
# (Same for antifeatures)
|
||||||
antifeatures = toml.load((REPO_APPS_PATH / "antifeatures.toml").open("r", encoding="utf-8"))
|
antifeatures = toml.load((REPO_APPS_ROOT / "antifeatures.toml").open("r", encoding="utf-8"))
|
||||||
for antifeature_id, infos in antifeatures.items():
|
for antifeature_id, infos in antifeatures.items():
|
||||||
infos["id"] = antifeature_id
|
infos["id"] = antifeature_id
|
||||||
antifeatures = list(antifeatures.values())
|
antifeatures = list(antifeatures.values())
|
||||||
|
|
||||||
# Load the app catalog and filter out the non-working ones
|
# Load the app catalog and filter out the non-working ones
|
||||||
catalog = toml.load((REPO_APPS_PATH / "apps.toml").open("r", encoding="utf-8"))
|
catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8"))
|
||||||
catalog = {
|
catalog = {
|
||||||
app: infos for app, infos in catalog.items() if infos.get("state") != "notworking"
|
app: infos for app, infos in catalog.items() if infos.get("state") != "notworking"
|
||||||
}
|
}
|
||||||
|
@ -47,165 +54,55 @@ catalog = {
|
||||||
my_env = os.environ.copy()
|
my_env = os.environ.copy()
|
||||||
my_env["GIT_TERMINAL_PROMPT"] = "0"
|
my_env["GIT_TERMINAL_PROMPT"] = "0"
|
||||||
|
|
||||||
(REPO_APPS_PATH / ".apps_cache").mkdir(exist_ok=True)
|
(REPO_APPS_ROOT / "builds").mkdir(exist_ok=True)
|
||||||
(REPO_APPS_PATH / "builds").mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
|
|
||||||
def error(msg: str) -> None:
|
|
||||||
msg = "[Applist builder error] " + msg
|
|
||||||
if which("sendxmpppy") is not None:
|
|
||||||
subprocess.call(["sendxmpppy", msg], stdout=open(os.devnull, "wb"))
|
|
||||||
print(msg + "\n")
|
|
||||||
|
|
||||||
|
|
||||||
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
|
|
||||||
def progressbar(it: list[Any], prefix: str = "", size: int = 60, file: TextIO = sys.stdout
|
|
||||||
) -> Generator[Any, None, None]:
|
|
||||||
count = len(it)
|
|
||||||
|
|
||||||
def show(j, name=""):
|
|
||||||
name += " "
|
|
||||||
x = int(size * j / count)
|
|
||||||
file.write(
|
|
||||||
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
|
|
||||||
)
|
|
||||||
file.flush()
|
|
||||||
|
|
||||||
show(0)
|
|
||||||
for i, item in enumerate(it):
|
|
||||||
yield item
|
|
||||||
show(i + 1, item[0])
|
|
||||||
file.write("\n")
|
|
||||||
file.flush()
|
|
||||||
|
|
||||||
|
|
||||||
###################################
|
|
||||||
# App git clones cache management #
|
|
||||||
###################################
|
|
||||||
|
|
||||||
|
|
||||||
def app_cache_folder(app: str) -> Path:
|
|
||||||
return REPO_APPS_PATH / ".apps_cache" / app
|
|
||||||
|
|
||||||
|
|
||||||
def refresh_all_caches() -> None:
|
|
||||||
for app, infos in progressbar(sorted(catalog.items()), "Updating git clones: ", 40):
|
|
||||||
app = app.lower()
|
|
||||||
if not app_cache_folder(app).exists():
|
|
||||||
try:
|
|
||||||
init_cache(app, infos)
|
|
||||||
except Exception as e:
|
|
||||||
error("Failed to init cache for %s" % app)
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
refresh_cache(app, infos)
|
|
||||||
except Exception as e:
|
|
||||||
error("Failed to not refresh cache for %s: %s" % (app, e))
|
|
||||||
raise e
|
|
||||||
|
|
||||||
|
|
||||||
def init_cache(app: str, infos: dict[str, str]) -> None:
|
|
||||||
git_depths = {
|
|
||||||
"notworking": 5,
|
|
||||||
"inprogress": 20,
|
|
||||||
"default": 40,
|
|
||||||
}
|
|
||||||
|
|
||||||
Repo.clone_from(
|
|
||||||
infos["url"],
|
|
||||||
to_path=app_cache_folder(app),
|
|
||||||
depth=git_depths.get(infos["state"], git_depths["default"]),
|
|
||||||
single_branch=True, branch=infos.get("branch", "master"),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def git_repo_age(path: Path) -> bool | int:
|
|
||||||
fetch_head = path / ".git" / "FETCH_HEAD"
|
|
||||||
if fetch_head.exists():
|
|
||||||
return int(time.time() - fetch_head.stat().st_mtime)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def refresh_cache(app: str, infos: dict[str, str]) -> None:
|
|
||||||
app_path = app_cache_folder(app)
|
|
||||||
|
|
||||||
# Don't refresh if already refreshed during last hour
|
|
||||||
age = git_repo_age(app_path)
|
|
||||||
if age is not False and age < 3600:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
repo = Repo(app_path)
|
|
||||||
|
|
||||||
repo.remote("origin").set_url(infos["url"])
|
|
||||||
|
|
||||||
branch = infos.get("branch", "master")
|
|
||||||
if repo.active_branch != branch:
|
|
||||||
all_branches = [str(b) for b in repo.branches]
|
|
||||||
if branch in all_branches:
|
|
||||||
repo.git.checkout(branch, "--force")
|
|
||||||
else:
|
|
||||||
repo.git.remote("set-branches", "--add", "origin", branch)
|
|
||||||
repo.remote("origin").fetch(f"{branch}:{branch}")
|
|
||||||
|
|
||||||
repo.remote("origin").fetch(refspec=branch, force=True)
|
|
||||||
repo.git.reset("--hard", f"origin/{branch}")
|
|
||||||
except:
|
|
||||||
# Sometimes there are tmp issue such that the refresh cache ..
|
|
||||||
# we don't trigger an error unless the cache hasnt been updated since more than 24 hours
|
|
||||||
age = git_repo_age(app_path)
|
|
||||||
if age is not False and age < 24 * 3600:
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
################################
|
################################
|
||||||
# Actual list build management #
|
# Actual list build management #
|
||||||
################################
|
################################
|
||||||
|
|
||||||
|
def __build_app_dict(data):
|
||||||
|
name, info = data
|
||||||
|
try:
|
||||||
|
return name, build_app_dict(name, info)
|
||||||
|
except Exception as err:
|
||||||
|
logging.error("Error while updating %s: %s", name, err)
|
||||||
|
|
||||||
def build_catalog():
|
|
||||||
|
|
||||||
|
def build_base_catalog():
|
||||||
result_dict = {}
|
result_dict = {}
|
||||||
|
|
||||||
for app, infos in progressbar(sorted(catalog.items()), "Processing: ", 40):
|
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
|
||||||
|
tasks = pool.imap(__build_app_dict, catalog.items())
|
||||||
|
|
||||||
app = app.lower()
|
for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"):
|
||||||
|
assert result is not None
|
||||||
|
name, info = result
|
||||||
|
result_dict[name] = info
|
||||||
|
|
||||||
try:
|
return result_dict
|
||||||
app_dict = build_app_dict(app, infos)
|
|
||||||
except Exception as e:
|
|
||||||
error("Processing %s failed: %s" % (app, str(e)))
|
|
||||||
continue
|
|
||||||
|
|
||||||
result_dict[app_dict["id"]] = app_dict
|
|
||||||
|
|
||||||
#############################
|
def write_catalog_v2(base_catalog, target_dir: Path) -> None:
|
||||||
# Current catalog API v2 #
|
result_dict_with_manifest_v1 = copy.deepcopy(base_catalog)
|
||||||
#############################
|
result_dict_with_manifest_v1 = {
|
||||||
|
name: infos
|
||||||
|
for name, infos in result_dict_with_manifest_v1.items()
|
||||||
|
if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2
|
||||||
|
}
|
||||||
|
full_catalog = {
|
||||||
|
"apps": result_dict_with_manifest_v1,
|
||||||
|
"categories": categories,
|
||||||
|
"antifeatures": antifeatures,
|
||||||
|
}
|
||||||
|
|
||||||
result_dict_with_manifest_v1 = copy.deepcopy(result_dict)
|
target_file = target_dir / "apps.json"
|
||||||
result_dict_with_manifest_v1 = {name: infos for name, infos in result_dict_with_manifest_v1.items() if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2}
|
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
|
||||||
|
|
||||||
os.system("mkdir -p ./builds/default/v2/")
|
|
||||||
with open("builds/default/v2/apps.json", "w") as f:
|
|
||||||
f.write(
|
|
||||||
json.dumps(
|
|
||||||
{
|
|
||||||
"apps": result_dict_with_manifest_v1,
|
|
||||||
"categories": categories,
|
|
||||||
"antifeatures": antifeatures,
|
|
||||||
},
|
|
||||||
sort_keys=True,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
#############################################
|
def write_catalog_v3(base_catalog, target_dir: Path) -> None:
|
||||||
# Catalog catalog API v3 (with manifest v2) #
|
result_dict_with_manifest_v2 = copy.deepcopy(base_catalog)
|
||||||
#############################################
|
|
||||||
|
|
||||||
result_dict_with_manifest_v2 = copy.deepcopy(result_dict)
|
|
||||||
for app in result_dict_with_manifest_v2.values():
|
for app in result_dict_with_manifest_v2.values():
|
||||||
packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0")
|
packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0")
|
||||||
if packaging_format < 2:
|
if packaging_format < 2:
|
||||||
|
@ -218,34 +115,31 @@ def build_catalog():
|
||||||
if "manifest" in app and "resources" in app["manifest"]:
|
if "manifest" in app and "resources" in app["manifest"]:
|
||||||
del app["manifest"]["resources"]
|
del app["manifest"]["resources"]
|
||||||
|
|
||||||
|
logos_dir = target_dir / "logos"
|
||||||
|
logos_dir.mkdir(parents=True, exist_ok=True)
|
||||||
for appid, app in result_dict_with_manifest_v2.items():
|
for appid, app in result_dict_with_manifest_v2.items():
|
||||||
appid = appid.lower()
|
appid = appid.lower()
|
||||||
if (REPO_APPS_PATH / "logos" / f"{appid}.png").exists():
|
logo_source = REPO_APPS_ROOT / "logos" / f"{appid}.png"
|
||||||
logo_hash = subprocess.check_output(["sha256sum", f"logos/{appid}.png"]).strip().decode("utf-8").split()[0]
|
if logo_source.exists():
|
||||||
os.system(f"cp logos/{appid}.png builds/default/v3/logos/{logo_hash}.png")
|
logo_hash = subprocess.check_output(["sha256sum", logo_source]).strip().decode("utf-8").split()[0]
|
||||||
|
shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png")
|
||||||
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
|
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
|
||||||
else:
|
else:
|
||||||
logo_hash = None
|
logo_hash = None
|
||||||
app["logo_hash"] = logo_hash
|
app["logo_hash"] = logo_hash
|
||||||
|
|
||||||
os.system("mkdir -p ./builds/default/v3/")
|
full_catalog = {
|
||||||
with open("builds/default/v3/apps.json", "w") as f:
|
"apps": result_dict_with_manifest_v2,
|
||||||
f.write(
|
"categories": categories,
|
||||||
json.dumps(
|
"antifeatures": antifeatures,
|
||||||
{
|
}
|
||||||
"apps": result_dict_with_manifest_v2,
|
|
||||||
"categories": categories,
|
|
||||||
"antifeatures": antifeatures,
|
|
||||||
},
|
|
||||||
sort_keys=True,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
##############################
|
target_file = target_dir / "apps.json"
|
||||||
# Version for catalog in doc #
|
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
##############################
|
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
|
||||||
os.system("mkdir -p ./builds/default/doc_catalog")
|
|
||||||
|
|
||||||
|
|
||||||
|
def write_catalog_doc(base_catalog, target_dir: Path) -> None:
|
||||||
def infos_for_doc_catalog(infos):
|
def infos_for_doc_catalog(infos):
|
||||||
level = infos.get("level")
|
level = infos.get("level")
|
||||||
if not isinstance(level, int):
|
if not isinstance(level, int):
|
||||||
|
@ -267,31 +161,40 @@ def build_catalog():
|
||||||
|
|
||||||
result_dict_doc = {
|
result_dict_doc = {
|
||||||
k: infos_for_doc_catalog(v)
|
k: infos_for_doc_catalog(v)
|
||||||
for k, v in result_dict.items()
|
for k, v in base_catalog.items()
|
||||||
if v["state"] == "working"
|
if v["state"] == "working"
|
||||||
}
|
}
|
||||||
with open("builds/default/doc_catalog/apps.json", "w") as f:
|
full_catalog = {
|
||||||
f.write(
|
"apps": result_dict_doc,
|
||||||
json.dumps(
|
"categories": categories
|
||||||
{"apps": result_dict_doc, "categories": categories}, sort_keys=True
|
}
|
||||||
)
|
|
||||||
)
|
target_file = target_dir / "apps.json"
|
||||||
|
target_file.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
|
||||||
|
|
||||||
|
|
||||||
def build_app_dict(app, infos):
|
def build_app_dict(app, infos):
|
||||||
|
|
||||||
# Make sure we have some cache
|
# Make sure we have some cache
|
||||||
this_app_cache = app_cache_folder(app)
|
this_app_cache = app_cache_folder(app)
|
||||||
assert this_app_cache.exists(), "No cache yet for %s" % app
|
assert this_app_cache.exists(), f"No cache yet for {app}"
|
||||||
|
|
||||||
repo = Repo(this_app_cache)
|
repo = Repo(this_app_cache)
|
||||||
|
|
||||||
commit_timestamps_for_this_app_in_catalog = \
|
commits_in_apps_json = Repo(REPO_APPS_ROOT).git.log(
|
||||||
repo.git.log("-G", f"cinny", "--first-parent", "--reverse", "--date=unix",
|
"-S", f"\"{app}\"", "--first-parent", "--reverse", "--date=unix",
|
||||||
"--format=%cd", "--", "apps.json", "apps.toml")
|
"--format=%cd", "--", "apps.json").split("\n")
|
||||||
|
if len(commits_in_apps_json) > 1:
|
||||||
|
first_commit = commits_in_apps_json[0]
|
||||||
|
else:
|
||||||
|
commits_in_apps_toml = Repo(REPO_APPS_ROOT).git.log(
|
||||||
|
"-S", f"[{app}]", "--first-parent", "--reverse", "--date=unix",
|
||||||
|
"--format=%cd", "--", "apps.json", "apps.toml").split("\n")
|
||||||
|
first_commit = commits_in_apps_toml[0]
|
||||||
|
|
||||||
# Assume the first entry we get (= the oldest) is the time the app was added
|
# Assume the first entry we get (= the oldest) is the time the app was added
|
||||||
infos["added_in_catalog"] = int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
|
infos["added_in_catalog"] = int(first_commit)
|
||||||
|
# int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
|
||||||
|
|
||||||
infos["branch"] = infos.get("branch", "master")
|
infos["branch"] = infos.get("branch", "master")
|
||||||
infos["revision"] = infos.get("revision", "HEAD")
|
infos["revision"] = infos.get("revision", "HEAD")
|
||||||
|
@ -338,7 +241,7 @@ def build_app_dict(app, infos):
|
||||||
"manifest": manifest,
|
"manifest": manifest,
|
||||||
"state": infos["state"],
|
"state": infos["state"],
|
||||||
"level": infos.get("level", "?"),
|
"level": infos.get("level", "?"),
|
||||||
"maintained": not 'package-not-maintained' in infos.get('antifeatures', []),
|
"maintained": 'package-not-maintained' not in infos.get('antifeatures', []),
|
||||||
"high_quality": infos.get("high_quality", False),
|
"high_quality": infos.get("high_quality", False),
|
||||||
"featured": infos.get("featured", False),
|
"featured": infos.get("featured", False),
|
||||||
"category": infos.get("category", None),
|
"category": infos.get("category", None),
|
||||||
|
@ -351,5 +254,9 @@ def build_app_dict(app, infos):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
refresh_all_caches()
|
apps_cache_update_all(get_catalog(), parallel=50)
|
||||||
build_catalog()
|
|
||||||
|
catalog = build_base_catalog()
|
||||||
|
write_catalog_v2(catalog, REPO_APPS_ROOT / "builds" / "default" / "v2")
|
||||||
|
write_catalog_v3(catalog, REPO_APPS_ROOT / "builds" / "default" / "v3")
|
||||||
|
write_catalog_doc(catalog, REPO_APPS_ROOT / "builds" / "default" / "doc_catalog")
|
||||||
|
|
Loading…
Reference in a new issue