1
0
Fork 0

Merge pull request #1999 from Salamandar/refactor

Rework list_builder.py
This commit is contained in:
Alexandre Aubin 2024-02-08 22:38:54 +01:00 committed by GitHub
commit 9dd6086415
8 changed files with 183 additions and 340 deletions

View file

@ -76,7 +76,7 @@ def __app_cache_clone_or_update_mapped(data):
def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None: def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None:
with Pool(processes=parallel) as pool: with Pool(processes=parallel) as pool:
tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items()) tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items())
for _ in tqdm.tqdm(tasks, total=len(apps.keys())): for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
pass pass

View file

@ -1,68 +0,0 @@
#!/usr/bin/env python3
import logging
from pathlib import Path
import utils
from git import Repo
def apps_cache_path() -> Path:
path = apps_repo_root() / ".apps_cache"
path.mkdir()
return path
def app_cache_path(app: str) -> Path:
path = apps_cache_path() / app
path.mkdir()
return path
# def refresh_all_caches(catalog: dict[str, dict[str, str]]):
# for app, infos
# pass
def app_cache_clone(app: str, infos: dict[str, str]) -> None:
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
Repo.clone_from(
infos["url"],
to_path=app_cache_path(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=True, branch=infos.get("branch", "master"),
)
def app_cache_update(app: str, infos: dict[str, str]) -> None:
app_path = app_cache_path(app)
age = utils.git_repo_age(app_path)
if age is False:
return app_cache_clone(app, infos)
if age < 3600:
logging.info(f"Skipping {app}, it's been updated recently.")
return
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])
branch = infos.get("branch", "master")
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")
def cache_all_apps(catalog: dict[str, dict[str, str]]) -> None:

36
appslib/logging_sender.py Normal file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
import subprocess
from shutil import which
import logging
import logging.handlers
class LogSenderHandler(logging.Handler):
def __init__(self):
logging.Handler.__init__(self)
self.is_logging = False
def emit(self, record):
if which("sendxmpppy") is None:
logging.warning("Could not send error via xmpp.")
return
msg = f"[Applist builder error] {record.msg}"
subprocess.call(["sendxmpppy", msg], stdout=subprocess.DEVNULL)
@classmethod
def add(cls, level=logging.ERROR):
if not logging.getLogger().handlers:
logging.basicConfig()
# create handler
handler = cls()
handler.setLevel(level)
# add the handler
logging.getLogger().handlers.append(handler)
def enable():
"""Enables the LogSenderHandler"""
LogSenderHandler.add(logging.ERROR)

View file

@ -36,32 +36,8 @@ def git_repo_age(path: Path) -> bool | int:
return False return False
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
def progressbar(
it: list[Any],
prefix: str = "",
size: int = 60,
file: TextIO = sys.stdout) -> Generator[Any, None, None]:
count = len(it)
def show(j, name=""):
name += " "
x = int(size * j / count)
file.write(
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
)
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1, item[0])
file.write("\n")
file.flush()
@cache @cache
def get_catalog(working_only=False): def get_catalog(working_only: bool = False) -> dict[str, dict[str, Any]]:
"""Load the app catalog and filter out the non-working ones""" """Load the app catalog and filter out the non-working ones"""
catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8")) catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8"))
if working_only: if working_only:
@ -70,3 +46,27 @@ def get_catalog(working_only=False):
if infos.get("state") != "notworking" if infos.get("state") != "notworking"
} }
return catalog return catalog
@cache
def get_categories() -> dict[str, Any]:
categories_path = REPO_APPS_ROOT / "categories.toml"
return toml.load(categories_path)
@cache
def get_antifeatures() -> dict[str, Any]:
antifeatures_path = REPO_APPS_ROOT / "antifeatures.toml"
return toml.load(antifeatures_path)
@cache
def get_wishlist() -> dict[str, dict[str, str]]:
wishlist_path = REPO_APPS_ROOT / "wishlist.toml"
return toml.load(wishlist_path)
@cache
def get_graveyard() -> dict[str, dict[str, str]]:
wishlist_path = REPO_APPS_ROOT / "graveyard.toml"
return toml.load(wishlist_path)

View file

@ -1,4 +1,4 @@
#!/usr/bin/python3 #!/usr/bin/env python3
import json import json
import os import os

View file

@ -1,4 +1,4 @@
#!/usr/bin/python3 #!/usr/bin/env python3
import csv import csv
import json import json

View file

@ -3,48 +3,16 @@
import json import json
import sys import sys
from difflib import SequenceMatcher from difflib import SequenceMatcher
from functools import cache
from pathlib import Path
from typing import Any, Dict, Generator, List, Tuple from typing import Any, Dict, Generator, List, Tuple
import jsonschema import jsonschema
import toml from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
get_antifeatures, get_catalog, get_categories,
APPS_ROOT = Path(__file__).parent.parent get_graveyard, get_wishlist)
@cache
def get_catalog() -> Dict[str, Dict[str, Any]]:
catalog_path = APPS_ROOT / "apps.toml"
return toml.load(catalog_path)
@cache
def get_categories() -> Dict[str, Any]:
categories_path = APPS_ROOT / "categories.toml"
return toml.load(categories_path)
@cache
def get_antifeatures() -> Dict[str, Any]:
antifeatures_path = APPS_ROOT / "antifeatures.toml"
return toml.load(antifeatures_path)
@cache
def get_wishlist() -> Dict[str, Dict[str, str]]:
wishlist_path = APPS_ROOT / "wishlist.toml"
return toml.load(wishlist_path)
@cache
def get_graveyard() -> Dict[str, Dict[str, str]]:
wishlist_path = APPS_ROOT / "graveyard.toml"
return toml.load(wishlist_path)
def validate_schema() -> Generator[str, None, None]: def validate_schema() -> Generator[str, None, None]:
with open(APPS_ROOT / "schemas" / "apps.toml.schema.json", encoding="utf-8") as file: with open(REPO_APPS_ROOT / "schemas" / "apps.toml.schema.json", encoding="utf-8") as file:
apps_catalog_schema = json.load(file) apps_catalog_schema = json.load(file)
validator = jsonschema.Draft202012Validator(apps_catalog_schema) validator = jsonschema.Draft202012Validator(apps_catalog_schema)
for error in validator.iter_errors(get_catalog()): for error in validator.iter_errors(get_catalog()):

View file

@ -1,251 +1,140 @@
#!/usr/bin/python3 #!/usr/bin/env python3
import copy import copy
import json import json
import os import logging
import re import multiprocessing
import shutil
import subprocess import subprocess
import sys
import time import time
from collections import OrderedDict from collections import OrderedDict
from functools import cache
from pathlib import Path from pathlib import Path
from shutil import which from typing import Any
from typing import Any, Generator, TextIO
import toml import toml
import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
from git import Repo from git import Repo
import appslib.logging_sender # pylint: disable=import-error
from app_caches import app_cache_folder # pylint: disable=import-error
from app_caches import apps_cache_update_all # pylint: disable=import-error
from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
get_antifeatures, get_catalog, get_categories)
from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \ from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \
convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error
now = time.time() now = time.time()
REPO_APPS_PATH = Path(__file__).parent.parent
@cache
def categories_list():
# Load categories and reformat the structure to have a list with an "id" key # Load categories and reformat the structure to have a list with an "id" key
categories = toml.load((REPO_APPS_PATH / "categories.toml").open("r", encoding="utf-8")) new_categories = get_categories()
for category_id, infos in categories.items(): for category_id, infos in new_categories.items():
infos["id"] = category_id infos["id"] = category_id
for subtag_id, subtag_infos in infos.get("subtags", {}).items(): for subtag_id, subtag_infos in infos.get("subtags", {}).items():
subtag_infos["id"] = subtag_id subtag_infos["id"] = subtag_id
infos["subtags"] = list(infos.get('subtags', {}).values()) infos["subtags"] = list(infos.get('subtags', {}).values())
return list(new_categories.values())
categories = list(categories.values())
@cache
def antifeatures_list():
# (Same for antifeatures) # (Same for antifeatures)
antifeatures = toml.load((REPO_APPS_PATH / "antifeatures.toml").open("r", encoding="utf-8")) new_antifeatures = get_antifeatures()
for antifeature_id, infos in antifeatures.items(): for antifeature_id, infos in new_antifeatures.items():
infos["id"] = antifeature_id infos["id"] = antifeature_id
antifeatures = list(antifeatures.values()) return list(new_antifeatures.values())
# Load the app catalog and filter out the non-working ones
catalog = toml.load((REPO_APPS_PATH / "apps.toml").open("r", encoding="utf-8"))
catalog = {
app: infos for app, infos in catalog.items() if infos.get("state") != "notworking"
}
my_env = os.environ.copy()
my_env["GIT_TERMINAL_PROMPT"] = "0"
(REPO_APPS_PATH / ".apps_cache").mkdir(exist_ok=True)
(REPO_APPS_PATH / "builds").mkdir(exist_ok=True)
def error(msg: str) -> None:
msg = "[Applist builder error] " + msg
if which("sendxmpppy") is not None:
subprocess.call(["sendxmpppy", msg], stdout=open(os.devnull, "wb"))
print(msg + "\n")
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
def progressbar(it: list[Any], prefix: str = "", size: int = 60, file: TextIO = sys.stdout
) -> Generator[Any, None, None]:
count = len(it)
def show(j, name=""):
name += " "
x = int(size * j / count)
file.write(
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
)
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1, item[0])
file.write("\n")
file.flush()
###################################
# App git clones cache management #
###################################
def app_cache_folder(app: str) -> Path:
return REPO_APPS_PATH / ".apps_cache" / app
def refresh_all_caches() -> None:
for app, infos in progressbar(sorted(catalog.items()), "Updating git clones: ", 40):
app = app.lower()
if not app_cache_folder(app).exists():
try:
init_cache(app, infos)
except Exception as e:
error("Failed to init cache for %s" % app)
else:
try:
refresh_cache(app, infos)
except Exception as e:
error("Failed to not refresh cache for %s: %s" % (app, e))
raise e
def init_cache(app: str, infos: dict[str, str]) -> None:
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
Repo.clone_from(
infos["url"],
to_path=app_cache_folder(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=True, branch=infos.get("branch", "master"),
)
def git_repo_age(path: Path) -> bool | int:
fetch_head = path / ".git" / "FETCH_HEAD"
if fetch_head.exists():
return int(time.time() - fetch_head.stat().st_mtime)
return False
def refresh_cache(app: str, infos: dict[str, str]) -> None:
app_path = app_cache_folder(app)
# Don't refresh if already refreshed during last hour
age = git_repo_age(app_path)
if age is not False and age < 3600:
return
try:
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])
branch = infos.get("branch", "master")
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")
except:
# Sometimes there are tmp issue such that the refresh cache ..
# we don't trigger an error unless the cache hasnt been updated since more than 24 hours
age = git_repo_age(app_path)
if age is not False and age < 24 * 3600:
pass
else:
raise
################################ ################################
# Actual list build management # # Actual list build management #
################################ ################################
def __build_app_dict(data) -> tuple[str, dict[str, Any]] | None:
def build_catalog(): name, info = data
result_dict = {}
for app, infos in progressbar(sorted(catalog.items()), "Processing: ", 40):
app = app.lower()
try: try:
app_dict = build_app_dict(app, infos) return name, build_app_dict(name, info)
except Exception as e: except Exception as err:
error("Processing %s failed: %s" % (app, str(e))) logging.error("Error while updating %s: %s", name, err)
continue
result_dict[app_dict["id"]] = app_dict
############################# def build_base_catalog():
# Current catalog API v2 # result_dict = {}
############################# catalog = get_catalog(working_only=True)
result_dict_with_manifest_v1 = copy.deepcopy(result_dict) with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
result_dict_with_manifest_v1 = {name: infos for name, infos in result_dict_with_manifest_v1.items() if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2} with logging_redirect_tqdm():
tasks = pool.imap(__build_app_dict, catalog.items())
os.system("mkdir -p ./builds/default/v2/") for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"):
with open("builds/default/v2/apps.json", "w") as f: if result is not None:
f.write( name, info = result
json.dumps( result_dict[name] = info
{
return result_dict
def write_catalog_v2(base_catalog, target_dir: Path) -> None:
result_dict_with_manifest_v1 = copy.deepcopy(base_catalog)
result_dict_with_manifest_v1 = {
name: infos
for name, infos in result_dict_with_manifest_v1.items()
if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2
}
full_catalog = {
"apps": result_dict_with_manifest_v1, "apps": result_dict_with_manifest_v1,
"categories": categories, "categories": categories_list(),
"antifeatures": antifeatures, "antifeatures": antifeatures_list(),
}, }
sort_keys=True,
)
)
############################################# target_file = target_dir / "apps.json"
# Catalog catalog API v3 (with manifest v2) # target_file.parent.mkdir(parents=True, exist_ok=True)
############################################# target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
result_dict_with_manifest_v2 = copy.deepcopy(result_dict)
def write_catalog_v3(base_catalog, target_dir: Path) -> None:
result_dict_with_manifest_v2 = copy.deepcopy(base_catalog)
for app in result_dict_with_manifest_v2.values(): for app in result_dict_with_manifest_v2.values():
packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0") packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0")
if packaging_format < 2: if packaging_format < 2:
app["manifest"] = convert_v1_manifest_to_v2_for_catalog(app["manifest"]) app["manifest"] = convert_v1_manifest_to_v2_for_catalog(app["manifest"])
# We also remove the app install question and resources parts which aint needed anymore by webadmin etc (or at least we think ;P) # We also remove the app install question and resources parts which aint needed anymore
# by webadmin etc (or at least we think ;P)
for app in result_dict_with_manifest_v2.values(): for app in result_dict_with_manifest_v2.values():
if "manifest" in app and "install" in app["manifest"]: if "manifest" in app and "install" in app["manifest"]:
del app["manifest"]["install"] del app["manifest"]["install"]
if "manifest" in app and "resources" in app["manifest"]: if "manifest" in app and "resources" in app["manifest"]:
del app["manifest"]["resources"] del app["manifest"]["resources"]
logos_dir = target_dir / "logos"
logos_dir.mkdir(parents=True, exist_ok=True)
for appid, app in result_dict_with_manifest_v2.items(): for appid, app in result_dict_with_manifest_v2.items():
appid = appid.lower() appid = appid.lower()
if (REPO_APPS_PATH / "logos" / f"{appid}.png").exists(): logo_source = REPO_APPS_ROOT / "logos" / f"{appid}.png"
logo_hash = subprocess.check_output(["sha256sum", f"logos/{appid}.png"]).strip().decode("utf-8").split()[0] if logo_source.exists():
os.system(f"cp logos/{appid}.png builds/default/v3/logos/{logo_hash}.png") logo_hash = subprocess.check_output(["sha256sum", logo_source]).strip().decode("utf-8").split()[0]
shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png")
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow # FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
else: else:
logo_hash = None logo_hash = None
app["logo_hash"] = logo_hash app["logo_hash"] = logo_hash
os.system("mkdir -p ./builds/default/v3/") full_catalog = {
with open("builds/default/v3/apps.json", "w") as f:
f.write(
json.dumps(
{
"apps": result_dict_with_manifest_v2, "apps": result_dict_with_manifest_v2,
"categories": categories, "categories": categories_list(),
"antifeatures": antifeatures, "antifeatures": antifeatures_list(),
}, }
sort_keys=True,
)
)
############################## target_file = target_dir / "apps.json"
# Version for catalog in doc # target_file.parent.mkdir(parents=True, exist_ok=True)
############################## target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
os.system("mkdir -p ./builds/default/doc_catalog")
def write_catalog_doc(base_catalog, target_dir: Path) -> None:
def infos_for_doc_catalog(infos): def infos_for_doc_catalog(infos):
level = infos.get("level") level = infos.get("level")
if not isinstance(level, int): if not isinstance(level, int):
@ -267,31 +156,40 @@ def build_catalog():
result_dict_doc = { result_dict_doc = {
k: infos_for_doc_catalog(v) k: infos_for_doc_catalog(v)
for k, v in result_dict.items() for k, v in base_catalog.items()
if v["state"] == "working" if v["state"] == "working"
} }
with open("builds/default/doc_catalog/apps.json", "w") as f: full_catalog = {
f.write( "apps": result_dict_doc,
json.dumps( "categories": categories_list()
{"apps": result_dict_doc, "categories": categories}, sort_keys=True }
)
) target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
def build_app_dict(app, infos): def build_app_dict(app, infos):
# Make sure we have some cache # Make sure we have some cache
this_app_cache = app_cache_folder(app) this_app_cache = app_cache_folder(app)
assert this_app_cache.exists(), "No cache yet for %s" % app assert this_app_cache.exists(), f"No cache yet for {app}"
repo = Repo(this_app_cache) repo = Repo(this_app_cache)
commit_timestamps_for_this_app_in_catalog = \ commits_in_apps_json = Repo(REPO_APPS_ROOT).git.log(
repo.git.log("-G", f"cinny", "--first-parent", "--reverse", "--date=unix", "-S", f"\"{app}\"", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json", "apps.toml") "--format=%cd", "--", "apps.json").split("\n")
if len(commits_in_apps_json) > 1:
first_commit = commits_in_apps_json[0]
else:
commits_in_apps_toml = Repo(REPO_APPS_ROOT).git.log(
"-S", f"[{app}]", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json", "apps.toml").split("\n")
first_commit = commits_in_apps_toml[0]
# Assume the first entry we get (= the oldest) is the time the app was added # Assume the first entry we get (= the oldest) is the time the app was added
infos["added_in_catalog"] = int(commit_timestamps_for_this_app_in_catalog.split("\n")[0]) infos["added_in_catalog"] = int(first_commit)
# int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
infos["branch"] = infos.get("branch", "master") infos["branch"] = infos.get("branch", "master")
infos["revision"] = infos.get("revision", "HEAD") infos["revision"] = infos.get("revision", "HEAD")
@ -338,7 +236,7 @@ def build_app_dict(app, infos):
"manifest": manifest, "manifest": manifest,
"state": infos["state"], "state": infos["state"],
"level": infos.get("level", "?"), "level": infos.get("level", "?"),
"maintained": not 'package-not-maintained' in infos.get('antifeatures', []), "maintained": 'package-not-maintained' not in infos.get('antifeatures', []),
"high_quality": infos.get("high_quality", False), "high_quality": infos.get("high_quality", False),
"featured": infos.get("featured", False), "featured": infos.get("featured", False),
"category": infos.get("category", None), "category": infos.get("category", None),
@ -350,6 +248,15 @@ def build_app_dict(app, infos):
} }
def main() -> None:
appslib.logging_sender.enable()
apps_cache_update_all(get_catalog(), parallel=50)
catalog = build_base_catalog()
write_catalog_v2(catalog, REPO_APPS_ROOT / "builds" / "default" / "v2")
write_catalog_v3(catalog, REPO_APPS_ROOT / "builds" / "default" / "v3")
write_catalog_doc(catalog, REPO_APPS_ROOT / "builds" / "default" / "doc_catalog")
if __name__ == "__main__": if __name__ == "__main__":
refresh_all_caches() main()
build_catalog()