1
0
Fork 0

Merge pull request #1999 from Salamandar/refactor

Rework list_builder.py
This commit is contained in:
Alexandre Aubin 2024-02-08 22:38:54 +01:00 committed by GitHub
commit 9dd6086415
8 changed files with 183 additions and 340 deletions

View file

@ -76,7 +76,7 @@ def __app_cache_clone_or_update_mapped(data):
def apps_cache_update_all(apps: dict[str, dict[str, Any]], parallel: int = 8) -> None:
with Pool(processes=parallel) as pool:
tasks = pool.imap_unordered(__app_cache_clone_or_update_mapped, apps.items())
for _ in tqdm.tqdm(tasks, total=len(apps.keys())):
for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
pass

View file

@ -1,68 +0,0 @@
#!/usr/bin/env python3
import logging
from pathlib import Path
import utils
from git import Repo
def apps_cache_path() -> Path:
path = apps_repo_root() / ".apps_cache"
path.mkdir()
return path
def app_cache_path(app: str) -> Path:
path = apps_cache_path() / app
path.mkdir()
return path
# def refresh_all_caches(catalog: dict[str, dict[str, str]]):
# for app, infos
# pass
def app_cache_clone(app: str, infos: dict[str, str]) -> None:
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
Repo.clone_from(
infos["url"],
to_path=app_cache_path(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=True, branch=infos.get("branch", "master"),
)
def app_cache_update(app: str, infos: dict[str, str]) -> None:
app_path = app_cache_path(app)
age = utils.git_repo_age(app_path)
if age is False:
return app_cache_clone(app, infos)
if age < 3600:
logging.info(f"Skipping {app}, it's been updated recently.")
return
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])
branch = infos.get("branch", "master")
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")
def cache_all_apps(catalog: dict[str, dict[str, str]]) -> None:

36
appslib/logging_sender.py Normal file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
import subprocess
from shutil import which
import logging
import logging.handlers
class LogSenderHandler(logging.Handler):
def __init__(self):
logging.Handler.__init__(self)
self.is_logging = False
def emit(self, record):
if which("sendxmpppy") is None:
logging.warning("Could not send error via xmpp.")
return
msg = f"[Applist builder error] {record.msg}"
subprocess.call(["sendxmpppy", msg], stdout=subprocess.DEVNULL)
@classmethod
def add(cls, level=logging.ERROR):
if not logging.getLogger().handlers:
logging.basicConfig()
# create handler
handler = cls()
handler.setLevel(level)
# add the handler
logging.getLogger().handlers.append(handler)
def enable():
"""Enables the LogSenderHandler"""
LogSenderHandler.add(logging.ERROR)

View file

@ -36,32 +36,8 @@ def git_repo_age(path: Path) -> bool | int:
return False
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
def progressbar(
it: list[Any],
prefix: str = "",
size: int = 60,
file: TextIO = sys.stdout) -> Generator[Any, None, None]:
count = len(it)
def show(j, name=""):
name += " "
x = int(size * j / count)
file.write(
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
)
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1, item[0])
file.write("\n")
file.flush()
@cache
def get_catalog(working_only=False):
def get_catalog(working_only: bool = False) -> dict[str, dict[str, Any]]:
"""Load the app catalog and filter out the non-working ones"""
catalog = toml.load((REPO_APPS_ROOT / "apps.toml").open("r", encoding="utf-8"))
if working_only:
@ -70,3 +46,27 @@ def get_catalog(working_only=False):
if infos.get("state") != "notworking"
}
return catalog
@cache
def get_categories() -> dict[str, Any]:
categories_path = REPO_APPS_ROOT / "categories.toml"
return toml.load(categories_path)
@cache
def get_antifeatures() -> dict[str, Any]:
antifeatures_path = REPO_APPS_ROOT / "antifeatures.toml"
return toml.load(antifeatures_path)
@cache
def get_wishlist() -> dict[str, dict[str, str]]:
wishlist_path = REPO_APPS_ROOT / "wishlist.toml"
return toml.load(wishlist_path)
@cache
def get_graveyard() -> dict[str, dict[str, str]]:
wishlist_path = REPO_APPS_ROOT / "graveyard.toml"
return toml.load(wishlist_path)

View file

@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import json
import os

View file

@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import csv
import json

View file

@ -3,48 +3,16 @@
import json
import sys
from difflib import SequenceMatcher
from functools import cache
from pathlib import Path
from typing import Any, Dict, Generator, List, Tuple
import jsonschema
import toml
APPS_ROOT = Path(__file__).parent.parent
@cache
def get_catalog() -> Dict[str, Dict[str, Any]]:
catalog_path = APPS_ROOT / "apps.toml"
return toml.load(catalog_path)
@cache
def get_categories() -> Dict[str, Any]:
categories_path = APPS_ROOT / "categories.toml"
return toml.load(categories_path)
@cache
def get_antifeatures() -> Dict[str, Any]:
antifeatures_path = APPS_ROOT / "antifeatures.toml"
return toml.load(antifeatures_path)
@cache
def get_wishlist() -> Dict[str, Dict[str, str]]:
wishlist_path = APPS_ROOT / "wishlist.toml"
return toml.load(wishlist_path)
@cache
def get_graveyard() -> Dict[str, Dict[str, str]]:
wishlist_path = APPS_ROOT / "graveyard.toml"
return toml.load(wishlist_path)
from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
get_antifeatures, get_catalog, get_categories,
get_graveyard, get_wishlist)
def validate_schema() -> Generator[str, None, None]:
with open(APPS_ROOT / "schemas" / "apps.toml.schema.json", encoding="utf-8") as file:
with open(REPO_APPS_ROOT / "schemas" / "apps.toml.schema.json", encoding="utf-8") as file:
apps_catalog_schema = json.load(file)
validator = jsonschema.Draft202012Validator(apps_catalog_schema)
for error in validator.iter_errors(get_catalog()):

View file

@ -1,251 +1,140 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import copy
import json
import os
import re
import logging
import multiprocessing
import shutil
import subprocess
import sys
import time
from collections import OrderedDict
from functools import cache
from pathlib import Path
from shutil import which
from typing import Any, Generator, TextIO
from typing import Any
import toml
import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
from git import Repo
import appslib.logging_sender # pylint: disable=import-error
from app_caches import app_cache_folder # pylint: disable=import-error
from app_caches import apps_cache_update_all # pylint: disable=import-error
from appslib.utils import (REPO_APPS_ROOT, # pylint: disable=import-error
get_antifeatures, get_catalog, get_categories)
from packaging_v2.convert_v1_manifest_to_v2_for_catalog import \
convert_v1_manifest_to_v2_for_catalog # pylint: disable=import-error
now = time.time()
REPO_APPS_PATH = Path(__file__).parent.parent
# Load categories and reformat the structure to have a list with an "id" key
categories = toml.load((REPO_APPS_PATH / "categories.toml").open("r", encoding="utf-8"))
for category_id, infos in categories.items():
infos["id"] = category_id
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
subtag_infos["id"] = subtag_id
infos["subtags"] = list(infos.get('subtags', {}).values())
categories = list(categories.values())
# (Same for antifeatures)
antifeatures = toml.load((REPO_APPS_PATH / "antifeatures.toml").open("r", encoding="utf-8"))
for antifeature_id, infos in antifeatures.items():
infos["id"] = antifeature_id
antifeatures = list(antifeatures.values())
# Load the app catalog and filter out the non-working ones
catalog = toml.load((REPO_APPS_PATH / "apps.toml").open("r", encoding="utf-8"))
catalog = {
app: infos for app, infos in catalog.items() if infos.get("state") != "notworking"
}
my_env = os.environ.copy()
my_env["GIT_TERMINAL_PROMPT"] = "0"
(REPO_APPS_PATH / ".apps_cache").mkdir(exist_ok=True)
(REPO_APPS_PATH / "builds").mkdir(exist_ok=True)
@cache
def categories_list():
# Load categories and reformat the structure to have a list with an "id" key
new_categories = get_categories()
for category_id, infos in new_categories.items():
infos["id"] = category_id
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
subtag_infos["id"] = subtag_id
infos["subtags"] = list(infos.get('subtags', {}).values())
return list(new_categories.values())
def error(msg: str) -> None:
msg = "[Applist builder error] " + msg
if which("sendxmpppy") is not None:
subprocess.call(["sendxmpppy", msg], stdout=open(os.devnull, "wb"))
print(msg + "\n")
# Progress bar helper, stolen from https://stackoverflow.com/a/34482761
def progressbar(it: list[Any], prefix: str = "", size: int = 60, file: TextIO = sys.stdout
) -> Generator[Any, None, None]:
count = len(it)
def show(j, name=""):
name += " "
x = int(size * j / count)
file.write(
"%s[%s%s] %i/%i %s\r" % (prefix, "#" * x, "." * (size - x), j, count, name)
)
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1, item[0])
file.write("\n")
file.flush()
###################################
# App git clones cache management #
###################################
def app_cache_folder(app: str) -> Path:
return REPO_APPS_PATH / ".apps_cache" / app
def refresh_all_caches() -> None:
for app, infos in progressbar(sorted(catalog.items()), "Updating git clones: ", 40):
app = app.lower()
if not app_cache_folder(app).exists():
try:
init_cache(app, infos)
except Exception as e:
error("Failed to init cache for %s" % app)
else:
try:
refresh_cache(app, infos)
except Exception as e:
error("Failed to not refresh cache for %s: %s" % (app, e))
raise e
def init_cache(app: str, infos: dict[str, str]) -> None:
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
Repo.clone_from(
infos["url"],
to_path=app_cache_folder(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=True, branch=infos.get("branch", "master"),
)
def git_repo_age(path: Path) -> bool | int:
fetch_head = path / ".git" / "FETCH_HEAD"
if fetch_head.exists():
return int(time.time() - fetch_head.stat().st_mtime)
return False
def refresh_cache(app: str, infos: dict[str, str]) -> None:
app_path = app_cache_folder(app)
# Don't refresh if already refreshed during last hour
age = git_repo_age(app_path)
if age is not False and age < 3600:
return
try:
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])
branch = infos.get("branch", "master")
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")
except:
# Sometimes there are tmp issue such that the refresh cache ..
# we don't trigger an error unless the cache hasnt been updated since more than 24 hours
age = git_repo_age(app_path)
if age is not False and age < 24 * 3600:
pass
else:
raise
@cache
def antifeatures_list():
# (Same for antifeatures)
new_antifeatures = get_antifeatures()
for antifeature_id, infos in new_antifeatures.items():
infos["id"] = antifeature_id
return list(new_antifeatures.values())
################################
# Actual list build management #
################################
def __build_app_dict(data) -> tuple[str, dict[str, Any]] | None:
name, info = data
try:
return name, build_app_dict(name, info)
except Exception as err:
logging.error("Error while updating %s: %s", name, err)
def build_catalog():
def build_base_catalog():
result_dict = {}
catalog = get_catalog(working_only=True)
for app, infos in progressbar(sorted(catalog.items()), "Processing: ", 40):
with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
with logging_redirect_tqdm():
tasks = pool.imap(__build_app_dict, catalog.items())
app = app.lower()
for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"):
if result is not None:
name, info = result
result_dict[name] = info
try:
app_dict = build_app_dict(app, infos)
except Exception as e:
error("Processing %s failed: %s" % (app, str(e)))
continue
return result_dict
result_dict[app_dict["id"]] = app_dict
#############################
# Current catalog API v2 #
#############################
def write_catalog_v2(base_catalog, target_dir: Path) -> None:
result_dict_with_manifest_v1 = copy.deepcopy(base_catalog)
result_dict_with_manifest_v1 = {
name: infos
for name, infos in result_dict_with_manifest_v1.items()
if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2
}
full_catalog = {
"apps": result_dict_with_manifest_v1,
"categories": categories_list(),
"antifeatures": antifeatures_list(),
}
result_dict_with_manifest_v1 = copy.deepcopy(result_dict)
result_dict_with_manifest_v1 = {name: infos for name, infos in result_dict_with_manifest_v1.items() if float(str(infos["manifest"].get("packaging_format", "")).strip() or "0") < 2}
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
os.system("mkdir -p ./builds/default/v2/")
with open("builds/default/v2/apps.json", "w") as f:
f.write(
json.dumps(
{
"apps": result_dict_with_manifest_v1,
"categories": categories,
"antifeatures": antifeatures,
},
sort_keys=True,
)
)
#############################################
# Catalog catalog API v3 (with manifest v2) #
#############################################
result_dict_with_manifest_v2 = copy.deepcopy(result_dict)
def write_catalog_v3(base_catalog, target_dir: Path) -> None:
result_dict_with_manifest_v2 = copy.deepcopy(base_catalog)
for app in result_dict_with_manifest_v2.values():
packaging_format = float(str(app["manifest"].get("packaging_format", "")).strip() or "0")
if packaging_format < 2:
app["manifest"] = convert_v1_manifest_to_v2_for_catalog(app["manifest"])
# We also remove the app install question and resources parts which aint needed anymore by webadmin etc (or at least we think ;P)
# We also remove the app install question and resources parts which aint needed anymore
# by webadmin etc (or at least we think ;P)
for app in result_dict_with_manifest_v2.values():
if "manifest" in app and "install" in app["manifest"]:
del app["manifest"]["install"]
if "manifest" in app and "resources" in app["manifest"]:
del app["manifest"]["resources"]
logos_dir = target_dir / "logos"
logos_dir.mkdir(parents=True, exist_ok=True)
for appid, app in result_dict_with_manifest_v2.items():
appid = appid.lower()
if (REPO_APPS_PATH / "logos" / f"{appid}.png").exists():
logo_hash = subprocess.check_output(["sha256sum", f"logos/{appid}.png"]).strip().decode("utf-8").split()[0]
os.system(f"cp logos/{appid}.png builds/default/v3/logos/{logo_hash}.png")
logo_source = REPO_APPS_ROOT / "logos" / f"{appid}.png"
if logo_source.exists():
logo_hash = subprocess.check_output(["sha256sum", logo_source]).strip().decode("utf-8").split()[0]
shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png")
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
else:
logo_hash = None
app["logo_hash"] = logo_hash
os.system("mkdir -p ./builds/default/v3/")
with open("builds/default/v3/apps.json", "w") as f:
f.write(
json.dumps(
{
"apps": result_dict_with_manifest_v2,
"categories": categories,
"antifeatures": antifeatures,
},
sort_keys=True,
)
)
full_catalog = {
"apps": result_dict_with_manifest_v2,
"categories": categories_list(),
"antifeatures": antifeatures_list(),
}
##############################
# Version for catalog in doc #
##############################
os.system("mkdir -p ./builds/default/doc_catalog")
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
def write_catalog_doc(base_catalog, target_dir: Path) -> None:
def infos_for_doc_catalog(infos):
level = infos.get("level")
if not isinstance(level, int):
@ -267,31 +156,40 @@ def build_catalog():
result_dict_doc = {
k: infos_for_doc_catalog(v)
for k, v in result_dict.items()
for k, v in base_catalog.items()
if v["state"] == "working"
}
with open("builds/default/doc_catalog/apps.json", "w") as f:
f.write(
json.dumps(
{"apps": result_dict_doc, "categories": categories}, sort_keys=True
)
)
full_catalog = {
"apps": result_dict_doc,
"categories": categories_list()
}
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
target_file.open("w", encoding="utf-8").write(json.dumps(full_catalog, sort_keys=True))
def build_app_dict(app, infos):
# Make sure we have some cache
this_app_cache = app_cache_folder(app)
assert this_app_cache.exists(), "No cache yet for %s" % app
assert this_app_cache.exists(), f"No cache yet for {app}"
repo = Repo(this_app_cache)
commit_timestamps_for_this_app_in_catalog = \
repo.git.log("-G", f"cinny", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json", "apps.toml")
commits_in_apps_json = Repo(REPO_APPS_ROOT).git.log(
"-S", f"\"{app}\"", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json").split("\n")
if len(commits_in_apps_json) > 1:
first_commit = commits_in_apps_json[0]
else:
commits_in_apps_toml = Repo(REPO_APPS_ROOT).git.log(
"-S", f"[{app}]", "--first-parent", "--reverse", "--date=unix",
"--format=%cd", "--", "apps.json", "apps.toml").split("\n")
first_commit = commits_in_apps_toml[0]
# Assume the first entry we get (= the oldest) is the time the app was added
infos["added_in_catalog"] = int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
infos["added_in_catalog"] = int(first_commit)
# int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
infos["branch"] = infos.get("branch", "master")
infos["revision"] = infos.get("revision", "HEAD")
@ -338,7 +236,7 @@ def build_app_dict(app, infos):
"manifest": manifest,
"state": infos["state"],
"level": infos.get("level", "?"),
"maintained": not 'package-not-maintained' in infos.get('antifeatures', []),
"maintained": 'package-not-maintained' not in infos.get('antifeatures', []),
"high_quality": infos.get("high_quality", False),
"featured": infos.get("featured", False),
"category": infos.get("category", None),
@ -350,6 +248,15 @@ def build_app_dict(app, infos):
}
def main() -> None:
appslib.logging_sender.enable()
apps_cache_update_all(get_catalog(), parallel=50)
catalog = build_base_catalog()
write_catalog_v2(catalog, REPO_APPS_ROOT / "builds" / "default" / "v2")
write_catalog_v3(catalog, REPO_APPS_ROOT / "builds" / "default" / "v3")
write_catalog_doc(catalog, REPO_APPS_ROOT / "builds" / "default" / "doc_catalog")
if __name__ == "__main__":
refresh_all_caches()
build_catalog()
main()