#!/usr/bin/env python3 # # COPYRIGHT: ReproNim/containers Team 2018-2025 # # LICENSE: MIT # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # # Description for invocation # - you can specify a list of images, as their original docker/github ids # which should be considered. E.g. # # scripts/create_singularities poldracklab/ds003-example bids/validator # # will consider only two images - one for poldracklab and another one # bids/validator, the other ones will be reported skipped from __future__ import annotations from collections.abc import Generator, Iterator from dataclasses import dataclass import click import logging import os from pathlib import Path import re import subprocess import sys from time import sleep from typing import Any, Optional, cast import requests import datalad.api as dl log = logging.getLogger(__name__) @dataclass class NeuroDeskSingularityImage: container: str name: str version: str date: str urls: list[str] @dataclass class Builder: repo_dir: Path images_dir: Path tmp_dir: Path githubids: set[str] def runcmd(self, *args: str, **kwargs: Any) -> subprocess.CompletedProcess: kwargs.setdefault("check", True) kwargs["cwd"] = str(self.repo_dir) kwargs["env"] = { **os.environ, "TMPDIR": str(self.tmp_dir), "SINGULARITY_TMPDIR": str(self.tmp_dir), } return subprocess.run(args, **kwargs) @staticmethod def get_last_docker_version_tag(dh: str, only_good_versions: bool=False, version_regex: Optional[str]=None) -> Optional[tuple[str, str]]: r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{dh}/tags") versions = [cast(str, res["name"]) for res in r.json()["results"]] if version_regex: versions = [v for v in versions if re.search(version_regex, v)] if len(versions) > 1 or (versions and only_good_versions): # select only the ones which seems to be semantic and/or # master/latest. Some release alpha releases, so probably would # skip those for now good_versions: dict[str, str] = {} for v in versions: if not ( re.search(r"[ab][0-9]+$", v) or re.search(r"rc[0-9]*$", v) or "master" in v ): m = re.match(r"(([Vv]|version-|release-|)([0-9]{1,10}\..*))", v) if m: good_versions[m[3]] = m[1] if good_versions: k = max(good_versions, key=version_key) return (k, good_versions[k]) else: return None elif not versions: log.info(" %s no version. Tags: %s", dh, " ".join(versions)) return None else: return (versions[0], versions[0]) @staticmethod def get_docker_repositories(namespace: str, full:bool = True) -> Generator[str, None, None]: """Return repositories for a specific namespace (user or organization) """ r = retry_get(f"https://registry.hub.docker.com/v2/repositories/{namespace}") for res in r.json()["results"]: if res["repository_type"] != "image": # don't know what to do with those continue name = cast(str, res["name"]) assert namespace == res["namespace"] if full: yield f"{namespace}/{name}" else: yield name @staticmethod def get_neurodesk_images() -> Generator[NeuroDeskSingularityImage, None, None]: """Return list of Neurodesk singularity images Following https://github.com/ReproNim/containers/issues/64#issuecomment-1418301696 """ r = requests.get(f"https://raw.githubusercontent.com/NeuroDesk/neurocommand/main/cvmfs/log.txt") r.raise_for_status() # look like afni_21.2.00_20210714 categories:functional imaging, # for now we disregard the metadata for l in r.text.splitlines(): container = l.split()[0] name, version, date = container.split('_') yield NeuroDeskSingularityImage( container=container, name=name, version=version, date=date, urls=[ f'https://d15yxasja65rk8.cloudfront.net/{container}.simg' ] ) def add_neurodesk_singularity_images( self, ) -> None: # AFAIK they are sorted by version and we need to add # datalad-container one only to the latest version. # For the others -- check if image file exists and if not -- add all_imgs = list(self.get_neurodesk_images())[::-1] ds = dl.Dataset(self.repo_dir) outdir = self.images_dir / 'neurodesk' outdir.mkdir(parents=True, exist_ok=True) prior_img = None added, registered = [], 0 for img in all_imgs: log.info("neurodesk <- singularity %s", img.container) imagefile = Path(str(outdir / get_imagename(img.name, 'neurodesk', img.version)) + ".simg") if not (imagefile.exists() or imagefile.is_symlink()): out = dl.download_url( img.urls[0], path=imagefile, save=False ) key = ds.repo.add(str(imagefile))['key'] for url in img.urls: self.runcmd( "git", "annex", "registerurl", "-c", "annex.alwayscommit=false", key, url ) added.append(str(imagefile)) if img.name != prior_img: prior_img = img.name registered += 1 log.info(" <- register %s %s", img.name, img.version) self.runcmd( "datalad", "containers-add", get_familyname(img.name, 'neurodesk'), "-i", str(imagefile), "--update", "--call-fmt", "{img_dspath}/scripts/singularity_cmd run {img} {cmd}", ) if added: ds.repo.set_metadata(files=added, add={'distribution-restrictions': 'out-of-space'}) ds.save(path=added, message=f"Add {len(added)} NeuroDesk images with {registered} registered") def create_singularity_file( self, dh: str, version_tag: str, version_pure: str, family: str, save_push: bool=False ) -> SingularityFile: imagename = get_imagename(dh, family, version_pure) outdir = self.images_dir / family outdir.mkdir(parents=True, exist_ok=True) singfile = outdir / f"Singularity.{imagename}" # log.info("%s \t from \t %s:%s", singfile, dh, version_tag) # Do not recreate the file ATM. Since we changed the env vars we # define, we don't want to recreate it if already present if not singfile.exists(): singfile.write_text( f"""\ # # Automagically prepared for ReproNim/containers distribution. # See http://github.com/ReproNim/containers for more info # Bootstrap: docker From: {dh}:{version_tag} %post # Create commonly present root directories to avoid need in overlays not supported # on older systems mkdir -p /ihome /data /data2 /zfs /isi /dartfs /dartfs-hpc %environment export LANG="C.UTF-8" export LC_ALL="C.UTF-8" # TODO: Take advantage of the fact that it is a bids-app somehow? """ ) if save_push: # Save it immediately and push for build process to start # Git branch should be configured properly to push self.runcmd( "datalad", "save", "-m", f"Adding singularity recipe for {dh}:{version_tag}", str(singfile), ) self.runcmd("git", "pull") # to possibly merge remote changes self.runcmd("datalad", "push", "--data", "auto") return SingularityFile(sourcefile=singfile, imagename=imagename) def add_singularity_versioned_image( self, singfile: SingularityFile, repoid: str, family: str, origrepoid: Optional[str] = None, familysuf: Optional[str]=None, ) -> None: # If image repo id is different (e.g. ReproNim/containers) from # original repoid where we are getting image for (e.g. generating # Singularity containers from Docker images) -- specify it as 4th arg if origrepoid is None: origrepoid = repoid familyname = get_familyname(origrepoid, family, familysuf) imagefile = singfile.build(self) if imagefile is not None: self.runcmd( "datalad", "save", "-m", f"Adding recipe&image for {singfile.imagename}", str(imagefile), str(singfile.sourcefile), ) base_opts = ' ' if familyname == 'bids-mriqc': # need to explicitly specify option to avoid use of datalad inside # ref: https://github.com/ReproNim/containers/issues/131 base_opts = ' --no-datalad-get ' self.runcmd( "datalad", "containers-add", familyname, "-i", str(imagefile), "--update", "--call-fmt", "{img_dspath}/scripts/singularity_cmd run {img}" + base_opts + "{cmd}", ) # TODO: later make it work with updating existing one. We will not # be able to use containers-add --update since original URL is # version specific. That is why it also does not make much sense to # create a image file without version. We better collect all # versions available, so in case an execution of previous version # is needed it could still be done "semi-manually". May be just # via adding -c datalad.containers.NAME.image variable pointing to # the previous version of the container def generate_singularity_for_docker_image(self, githubid: str, family: Optional[str]=None, only_good_versions: bool=False, version_regex: Optional[str]=None, familysuf: Optional[str]=None, ) \ -> None: dockerhubid = githubid.lower() if not family: family = dockerhubid.split('/', 1)[0] if self.githubids and githubid not in self.githubids: log.info("skip %s", githubid) return if dockerhubid in ("djarecka/fmriprep_fake", "pennlinc/toy_bids_app", "nipreps/fmripost-aroma", "nipreps/fmripost-phase", "nipreps/fmripost-rapidtide"): log.info("TEMP TODO skip %s due to odd OCI issue ATM with old singularity", dockerhubid) return log.info("%s <- docker %s", family, dockerhubid) last_version = self.get_last_docker_version_tag( dockerhubid, only_good_versions=only_good_versions, version_regex=version_regex) if last_version is not None: last_version_pure, last_version_tag = last_version # print(dockerhubid, last_version) singfile = self.create_singularity_file( dockerhubid, last_version_tag, last_version_pure, family ) self.add_singularity_versioned_image( singfile, "ReproNim/containers", family, dockerhubid, familysuf=familysuf, ) def generate_singularity_for_docker_namespace(self, namespace: str, family: Optional[str]=None, only_good_versions: bool=False) -> None: if not family: family = namespace.lower() for name in self.get_docker_repositories(namespace): self.generate_singularity_for_docker_image( name, family=family, only_good_versions=only_good_versions) @dataclass class SingularityFile: sourcefile: Path imagename: str def build(self, builder: Builder) -> Optional[Path]: # Returns the path to the new imagefile, or None if it already exists imagefilename = f"{self.imagename}.sing" imagefile = self.sourcefile.with_name(imagefilename) if not imagefile.exists() and not imagefile.is_symlink(): # eventually can use docker builds, e.g. # docker run -it -v $PWD:$PWD -w $PWD --privileged quay.io/singularity/singularity:v3.5.1 build ./busybox.sing-3.5.1 ./Singularity.busybox # docker run -it -v $PWD:$PWD -w $PWD --entrypoint chown quay.io/singularity/singularity:v2.6 -- $UID.$GID ./busybox.sing # unfortunately 2.6 version tries to connect to docker service and # fails, thus can't fetch anything tmpfile = self.sourcefile.with_name(f"{imagefilename}.tmp") builder.runcmd( "sudo", "--preserve-env=TMPDIR,SINGULARITY_TMPDIR", "singularity", "build", str(tmpfile), str(self.sourcefile), ) # abuse BTRFS CoW to avoid sudo/docker to chown builder.runcmd("cp", "--reflink=auto", str(tmpfile), str(imagefile)) tmpfile.unlink() return imagefile else: return None def version_key(vstr: str) -> tuple[int, tuple[int, ...], str]: m = re.match(r"[0-9]+(?:[._][0-9]+)*", vstr) if m: return (0, tuple(int(c) for c in m[0].split(".")), vstr[m.end() :]) else: return (1, (), vstr) # Use familysuf to add smth like "LTS" or alike def get_familyname(repoid: str, family: str, familysuf: Optional[str]=None) -> str: name = re.sub(r"^[^/]*/", "", repoid) # sanitize for datalad not allowing _ in the container names name = name.replace("_", "-") familyname = f"{family}-{name}" if familysuf: familyname += f"-{familysuf}" return familyname def get_imagename(repoid: str, family: str, versiontag: str, familysuf: Optional[str]=None) -> str: familyname = get_familyname(repoid, family, familysuf) return f"{familyname}--{versiontag}" @click.command() @click.option("--push", is_flag=True, show_default=True, default=False, help="Push upon completion.") @click.argument("githubids", nargs=-1, default=None) def main(push: bool, githubids: list[str]|None) -> None: logging.basicConfig(format="[%(levelname)-8s] %(message)s", level=logging.INFO) # Use local dedicated "tmp/" directory since any other might be too small topdir = Path(__file__).parent.parent topdir.mkdir(parents=True, exist_ok=True) builder = Builder( repo_dir=topdir, images_dir=topdir / "images", tmp_dir=topdir / "tmp", githubids=set(githubids or []), ) # We are still using elderly singularity 2.6.1 to build images, so elderly # singularity could run images we generate, since images produced by 3 # seems to not be usable with 2. rc = subprocess.run( ["singularity", "--version"], check=True, stdout=subprocess.PIPE, universal_newlines=True, ) if "2.6.1" not in rc.stdout: log.error("singularity is 'too new'") sys.exit(1) r = requests.get( "https://raw.githubusercontent.com/bids-standard/bids-website/main/data/tools/apps.yml" ) r.raise_for_status() for line in r.text.splitlines(): m = re.match(r" *dh:\s+(\S+)", line.rstrip("\n\r")) if m: dockerhubid = re.sub(r"[\"']", "", m[1]) if "micalab/micapipe" not in dockerhubid: builder.generate_singularity_for_docker_image(dockerhubid, "bids") # validator is not bids-app but we will stick it along with bids builder.generate_singularity_for_docker_image("bids/validator") # LTS releases for some bids-apps. TODO later may be: check for having ALL versions builder.generate_singularity_for_docker_image("nipreps/fmriprep", version_regex=r"^v?20\.2\.[0-9]+$", family="bids", familysuf="20-2") # Additional ones from Poldrack builder.generate_singularity_for_docker_image("poldracklab/ds003-example") # # ReproNim # # "Native" Singularity image for ReproIn # TODO: figure out who would build those now that shub is gone?! # ATM: built locally and pushed to docker hub... so let's build from those builder.generate_singularity_for_docker_image("repronim/reproin") # nipy/heudiconv builder.generate_singularity_for_docker_image("nipy/heudiconv") # Docker image for simple_workflow builder.generate_singularity_for_docker_image("ReproNim/simple_workflow") # neuronets builder.generate_singularity_for_docker_image("neuronets/kwyk") # Jeremy Magland images, to try out dendro # Seems to be 404ing https://github.com/scratchrealm/pc-spike-sorting/issues/10 #builder.generate_singularity_for_docker_image("magland/pc-mountainsort5") #builder.generate_singularity_for_docker_image("magland/pc-kilosort3") # Entire organization # Some purely "latest" give us troubles. Let them tag builder.generate_singularity_for_docker_namespace("brainiak", only_good_versions=True) # Custom: neurodesk singularity images # TODO: update URLs for all existing to not have oraclecloud.com ones and only new # https://d15yxasja65rk8.cloudfront.net ones builder.add_neurodesk_singularity_images() builder.runcmd("git", "pull") # to possibly merge remote changes if push: builder.runcmd("datalad", "push", "--data=auto") # so we share with the world def retry_get(url: str) -> requests.Response: sleepiter = exp_wait(attempts=10) while True: try: r = requests.get(url) r.raise_for_status() return r except (requests.ConnectionError, requests.HTTPError, requests.Timeout) as e: if ( isinstance(e, requests.HTTPError) and e.response is not None and e.response.status_code < 500 ): raise e if (wait := next(sleepiter, None)) is not None: log.warning( "Request to %s failed due to %s: %s; sleeping for %f" " seconds and retrying", type(e).__name__, str(e), wait, ) sleep(wait) else: raise e def exp_wait( base: float = 1.25, multiplier: float = 1, attempts: int | None = None, ) -> Iterator[float]: """ Returns a generator of values usable as `sleep()` times when retrying something with exponential backoff. :param float base: the base value for exponentiation :param float multiplier: value to multiply values by after exponentiation :param Optional[int] attempts: how many values to yield; set to `None` to yield forever :rtype: Iterator[float] """ n = 0 while attempts is None or n < attempts: yield base**n * multiplier n += 1 if __name__ == "__main__": #print(list(Builder.get_docker_repositories('brainiak'))) main()