1
0
Fork 0
ynh-apps_tools/list_builder.py

267 lines
8.4 KiB
Python
Raw Normal View History

2024-02-08 22:34:48 +01:00
#!/usr/bin/env python3
import argparse
import copy
import json
2024-02-08 22:14:35 +01:00
import logging
import multiprocessing
import shutil
2024-02-08 22:14:35 +01:00
import subprocess
import time
from collections import OrderedDict
2024-02-08 22:14:35 +01:00
from functools import cache
2024-09-11 14:47:49 +02:00
from itertools import repeat
2024-02-08 22:14:35 +01:00
from pathlib import Path
from typing import Any, Optional
import toml
2024-02-08 22:14:35 +01:00
import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
from git import Repo
2024-02-08 22:18:59 +01:00
import appslib.logging_sender # pylint: disable=import-error
2024-03-11 17:34:33 +01:00
from appslib.utils import (
2024-09-11 14:47:49 +02:00
get_antifeatures, # pylint: disable=import-error
2024-03-11 17:34:33 +01:00
get_catalog,
get_categories,
)
2024-09-11 14:47:49 +02:00
import appslib.get_apps_repo as get_apps_repo
now = time.time()
@cache
def categories_list():
# Load categories and reformat the structure to have a list with an "id" key
new_categories = get_categories()
for category_id, infos in new_categories.items():
infos["id"] = category_id
for subtag_id, subtag_infos in infos.get("subtags", {}).items():
subtag_infos["id"] = subtag_id
2024-03-11 17:34:33 +01:00
infos["subtags"] = list(infos.get("subtags", {}).values())
return list(new_categories.values())
@cache
def antifeatures_list():
# (Same for antifeatures)
new_antifeatures = get_antifeatures()
for antifeature_id, infos in new_antifeatures.items():
infos["id"] = antifeature_id
return list(new_antifeatures.values())
################################
# Actual list build management #
################################
2024-03-11 17:34:33 +01:00
def __build_app_dict(data) -> Optional[tuple[str, dict[str, Any]]]:
2024-09-11 14:47:49 +02:00
(name, info), cache_path = data
try:
2024-09-11 14:47:49 +02:00
return name, build_app_dict(name, info, cache_path)
except Exception as err:
logging.error("[List builder] Error while updating %s: %s", name, err)
2024-08-19 17:26:08 +02:00
return None
2024-09-11 14:47:49 +02:00
def build_base_catalog(
catalog: dict[str, dict[str, Any]], cache_path: Path, nproc: int
):
result_dict = {}
with multiprocessing.Pool(processes=nproc) as pool:
2024-02-08 22:14:35 +01:00
with logging_redirect_tqdm():
2024-09-11 14:47:49 +02:00
tasks = pool.imap(
__build_app_dict, zip(catalog.items(), repeat(cache_path))
)
2024-02-08 22:14:35 +01:00
for result in tqdm.tqdm(tasks, total=len(catalog.keys()), ascii=" ·#"):
if result is not None:
name, info = result
result_dict[name] = info
return result_dict
2024-09-11 14:47:49 +02:00
def write_catalog_v3(base_catalog, apps_path: Path, target_dir: Path) -> None:
logos_dir = target_dir / "logos"
logos_dir.mkdir(parents=True, exist_ok=True)
def infos_for_v3(app_id: str, infos: Any) -> Any:
# We remove the app install question and resources parts which aint
# needed anymore by webadmin etc (or at least we think ;P)
if "manifest" in infos and "install" in infos["manifest"]:
del infos["manifest"]["install"]
if "manifest" in infos and "resources" in infos["manifest"]:
del infos["manifest"]["resources"]
app_id = app_id.lower()
2024-09-11 14:47:49 +02:00
logo_source = apps_path / "logos" / f"{app_id}.png"
if logo_source.exists():
2024-03-11 17:34:33 +01:00
logo_hash = (
subprocess.check_output(["sha256sum", logo_source])
.strip()
.decode("utf-8")
.split()[0]
)
shutil.copyfile(logo_source, logos_dir / f"{logo_hash}.png")
# FIXME: implement something to cleanup old logo stuf in the builds/.../logos/ folder somehow
else:
logo_hash = None
infos["logo_hash"] = logo_hash
return infos
full_catalog = {
"apps": {app: infos_for_v3(app, info) for app, info in base_catalog.items()},
"categories": categories_list(),
"antifeatures": antifeatures_list(),
}
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
2024-03-11 17:34:33 +01:00
target_file.open("w", encoding="utf-8").write(
json.dumps(full_catalog, sort_keys=True)
)
def write_catalog_doc(base_catalog, target_dir: Path) -> None:
def infos_for_doc_catalog(infos):
level = infos.get("level")
if not isinstance(level, int):
level = -1
return {
"id": infos["id"],
"category": infos["category"],
"url": infos["git"]["url"],
"name": infos["manifest"]["name"],
"description": infos["manifest"]["description"],
"state": infos["state"],
"level": level,
"broken": level <= 0,
"good_quality": level >= 8,
"bad_quality": level <= 5,
"antifeatures": infos.get("antifeatures"),
"potential_alternative_to": infos.get("potential_alternative_to", []),
}
result_dict_doc = {
k: infos_for_doc_catalog(v)
for k, v in base_catalog.items()
if v["state"] == "working"
}
2024-03-11 17:34:33 +01:00
full_catalog = {"apps": result_dict_doc, "categories": categories_list()}
target_file = target_dir / "apps.json"
target_file.parent.mkdir(parents=True, exist_ok=True)
2024-03-11 17:34:33 +01:00
target_file.open("w", encoding="utf-8").write(
json.dumps(full_catalog, sort_keys=True)
)
2024-09-11 14:47:49 +02:00
def build_app_dict(app, infos, cache_path: Path):
# Make sure we have some cache
2024-09-11 14:47:49 +02:00
this_app_cache = cache_path / app
assert this_app_cache.exists(), f"No cache yet for {app}"
repo = Repo(this_app_cache)
# If added_date is not present, we are in a github action of the PR that adds it... so default to a bad value.
infos["added_in_catalog"] = infos.get("added_date", 0)
# int(commit_timestamps_for_this_app_in_catalog.split("\n")[0])
infos["branch"] = infos.get("branch", "master")
infos["revision"] = infos.get("revision", "HEAD")
# If using head, find the most recent meaningful commit in logs
if infos["revision"] == "HEAD":
infos["revision"] = repo.head.commit.hexsha
# Otherwise, validate commit exists
else:
try:
_ = repo.commit(infos["revision"])
except ValueError as err:
2024-03-11 17:34:33 +01:00
raise RuntimeError(
f"Revision ain't in history ? {infos['revision']}"
) from err
# Find timestamp corresponding to that commit
timestamp = repo.commit(infos["revision"]).committed_date
# Build the dict with all the infos
if (this_app_cache / "manifest.toml").exists():
2024-03-11 17:34:33 +01:00
manifest = toml.load(
(this_app_cache / "manifest.toml").open("r"), _dict=OrderedDict
)
else:
manifest = json.load((this_app_cache / "manifest.json").open("r"))
return {
"id": manifest["id"],
"git": {
"branch": infos["branch"],
"revision": infos["revision"],
"url": infos["url"],
},
"added_in_catalog": infos["added_in_catalog"],
"lastUpdate": timestamp,
"manifest": manifest,
"state": infos["state"],
"level": infos.get("level", "?"),
2024-03-11 17:34:33 +01:00
"maintained": "package-not-maintained" not in infos.get("antifeatures", []),
"high_quality": infos.get("high_quality", False),
"featured": infos.get("featured", False),
"category": infos.get("category", None),
"subtags": infos.get("subtags", []),
"potential_alternative_to": infos.get("potential_alternative_to", []),
"antifeatures": list(
2024-03-11 17:34:33 +01:00
set(
list(manifest.get("antifeatures", {}).keys())
+ infos.get("antifeatures", [])
)
),
}
2024-02-08 22:14:35 +01:00
def main() -> None:
parser = argparse.ArgumentParser()
2024-09-11 14:47:49 +02:00
get_apps_repo.add_args(parser)
2024-03-11 17:34:33 +01:00
parser.add_argument(
"target_dir",
type=Path,
nargs="?",
2024-09-11 14:47:49 +02:00
help="The directory to write the catalogs to. Defaults to apps/builds/default",
2024-03-11 17:34:33 +01:00
)
parser.add_argument(
"-j",
"--jobs",
type=int,
default=multiprocessing.cpu_count(),
metavar="N",
help="Allow N threads to run in parallel",
)
args = parser.parse_args()
2024-02-08 22:18:59 +01:00
appslib.logging_sender.enable()
2024-09-11 14:47:49 +02:00
apps_dir = get_apps_repo.from_args(args)
cache_path = get_apps_repo.cache_path(args)
cache_path.mkdir(exist_ok=True, parents=True)
target_dir = args.target_dir or apps_dir / "builds" / "default"
catalog = get_catalog(apps_dir)
print("Retrieving all apps' information to build the catalog...")
2024-09-11 14:47:49 +02:00
base_catalog = build_base_catalog(catalog, cache_path, args.jobs)
2024-09-11 14:47:49 +02:00
print(f"Writing the catalogs to {target_dir}...")
write_catalog_v3(base_catalog, apps_dir, target_dir / "v3")
write_catalog_doc(base_catalog, target_dir / "doc_catalog")
print("Done!")
2024-02-08 22:14:35 +01:00
if __name__ == "__main__":
main()