1
0
Fork 0

Refactor app_caches: use a class to store local data

This commit is contained in:
Félix Piédallu 2024-09-10 11:41:06 +02:00 committed by Salamandar
parent 6b17d3125e
commit a08974bf87
2 changed files with 81 additions and 93 deletions

View file

@ -10,116 +10,110 @@ from typing import Any
import tqdm import tqdm
from git import Repo
from git.repo.fun import is_git_dir
from appslib.utils import ( from appslib.utils import (
REPO_APPS_ROOT, # pylint: disable=import-error REPO_APPS_ROOT, # pylint: disable=import-error
get_catalog, get_catalog,
git_repo_age, git_repo_age,
) )
from git import Repo
APPS_CACHE_DIR = REPO_APPS_ROOT / ".apps_cache" class AppDir:
def __init__(self, name: str, path: Path) -> None:
self.name = name
self.path = path
def ensure(
self, remote: str, branch: str, url_ssh: bool, all_branches: bool
) -> None:
# Patch url for ssh clone
if url_ssh:
remote = remote.replace("https://github.com/", "git@github.com:")
def app_cache_folder(app: str) -> Path: op = self._update if is_git_dir(self.path / ".git") else self._clone
return APPS_CACHE_DIR / app op(remote, all_branches, branch)
def cleanup(self) -> None:
def app_cache_clone( logging.warning(f"Cleaning up {self.path}...")
app: str, infos: dict[str, str], all_branches: bool = False if self.path.exists():
) -> None: if self.path.is_dir():
logging.info("Cloning %s...", app) shutil.rmtree(self.path)
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
if app_cache_folder(app).exists():
shutil.rmtree(app_cache_folder(app))
Repo.clone_from(
infos["url"],
to_path=app_cache_folder(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=not all_branches,
branch=infos.get("branch", "master"),
)
def app_cache_clone_or_update(
app: str,
infos: dict[str, str],
ssh_clone: bool = False,
fetch_all_branches: bool = False,
) -> None:
app_path = app_cache_folder(app)
# Patch url for ssh clone
if ssh_clone:
infos["url"] = infos["url"].replace("https://github.com/", "git@github.com:")
# Don't refresh if already refreshed during last hour
age = git_repo_age(app_path)
if age is False:
app_cache_clone(app, infos, fetch_all_branches)
return
# if age < 3600:
# logging.info(f"Skipping {app}, it's been updated recently.")
# return
logging.info("Updating %s...", app)
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])
branch = infos.get("branch", "master")
if fetch_all_branches:
repo.git.remote("set-branches", "origin", "*")
repo.remote("origin").fetch()
repo.remote("origin").pull()
else:
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
else: else:
repo.git.remote("set-branches", "--add", "origin", branch) self.path.unlink()
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True) def _clone(self, remote: str, all_branches: bool, branch: str) -> None:
repo.git.reset("--hard", f"origin/{branch}") logging.info("Cloning %s...", self.name)
if self.path.exists():
self.cleanup()
Repo.clone_from(
remote,
to_path=self.path,
depth=40,
single_branch=not all_branches,
branch=branch,
)
def _update(self, remote: str, all_branches: bool, branch: str) -> None:
logging.info("Updating %s...", self.name)
repo = Repo(self.path)
repo.remote("origin").set_url(remote)
if all_branches:
repo.git.remote("set-branches", "origin", "*")
repo.remote("origin").fetch()
repo.remote("origin").pull()
else:
if repo.active_branch != branch:
repo_branches = [str(b) for b in repo.heads]
if branch in repo_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")
repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")
def __app_cache_clone_or_update_mapped(data): def __appdir_ensure_mapped(data):
name, info, ssh_clone, all_branches = data name, path, url, branch, url_ssh, all_branches = data
try: try:
app_cache_clone_or_update(name, info, ssh_clone, all_branches) AppDir(name, path).ensure(url, branch, url_ssh, all_branches)
except Exception as err: except Exception as err:
logging.error("[App caches] Error while updating %s: %s", name, err) logging.error("[App caches] Error while updating %s: %s", name, err)
def apps_cache_update_all( def apps_cache_update_all(
cache_path: Path,
apps: dict[str, dict[str, Any]], apps: dict[str, dict[str, Any]],
parallel: int = 8, parallel: int = 8,
ssh_clone: bool = False, url_ssh: bool = False,
all_branches: bool = False, all_branches: bool = False,
) -> None: ) -> None:
with Pool(processes=parallel) as pool: args = (
tasks = pool.imap_unordered( (
__app_cache_clone_or_update_mapped, app,
zip(apps.keys(), apps.values(), repeat(ssh_clone), repeat(all_branches)), cache_path / app,
info["url"],
info.get("branch", "master"),
url_ssh,
all_branches,
) )
for app, info in apps.items()
)
with Pool(processes=parallel) as pool:
tasks = pool.imap_unordered(__appdir_ensure_mapped, args)
for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"): for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
pass pass
def apps_cache_cleanup(apps: dict[str, dict[str, Any]]) -> None: def apps_cache_cleanup(cache_path: Path, apps: dict[str, dict[str, Any]]) -> None:
for element in APPS_CACHE_DIR.iterdir(): for element in cache_path.iterdir():
if element.name not in apps.keys(): if element.name not in apps.keys():
logging.warning(f"Removing {element}...") AppDir("", element).cleanup()
if element.is_dir():
shutil.rmtree(element)
else:
element.unlink()
def __run_for_catalog(): def __run_for_catalog():
@ -151,14 +145,17 @@ def __run_for_catalog():
if args.verbose: if args.verbose:
logging.getLogger().setLevel(logging.INFO) logging.getLogger().setLevel(logging.INFO)
APPS_CACHE_DIR.mkdir(exist_ok=True, parents=True) cache_path = REPO_APPS_ROOT / ".apps_cache"
cache_path.mkdir(exist_ok=True, parents=True)
if args.cleanup: if args.cleanup:
apps_cache_cleanup(get_catalog()) apps_cache_cleanup(cache_path, get_catalog())
apps_cache_update_all( apps_cache_update_all(
cache_path,
get_catalog(), get_catalog(),
parallel=args.processes, parallel=args.processes,
ssh_clone=args.ssh, url_ssh=args.ssh,
all_branches=args.all_branches, all_branches=args.all_branches,
) )

View file

@ -1,9 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import sys
import subprocess import subprocess
from typing import Any, TextIO, Generator, Optional, Union from typing import Any, Optional
import time
from functools import cache from functools import cache
from pathlib import Path from pathlib import Path
from git import Repo from git import Repo
@ -28,13 +26,6 @@ def git(cmd: list[str], cwd: Optional[Path] = None) -> str:
) )
def git_repo_age(path: Path) -> Union[bool, int]:
for file in [path / ".git" / "FETCH_HEAD", path / ".git" / "HEAD"]:
if file.exists():
return int(time.time() - file.stat().st_mtime)
return False
@cache @cache
def get_catalog(working_only: bool = False) -> dict[str, dict[str, Any]]: def get_catalog(working_only: bool = False) -> dict[str, dict[str, Any]]:
"""Load the app catalog and filter out the non-working ones""" """Load the app catalog and filter out the non-working ones"""