Source code for dandi.consts

from __future__ import annotations

from collections.abc import Iterator
from dataclasses import dataclass
from enum import Enum
import os

#: A list of metadata fields which dandi extracts from .nwb files.
#: Additional fields (such as ``number_of_*``) might be added by
#: `get_metadata()`
metadata_nwb_file_fields = (
    "experiment_description",
    "experimenter",
    "identifier",  # note: required arg2 of NWBFile
    "institution",
    "keywords",
    "lab",
    "related_publications",
    "session_description",  # note: required arg1 of NWBFile
    "session_id",
    "session_start_time",
)

metadata_nwb_subject_fields = (
    "age",
    "date_of_birth",
    "genotype",
    "sex",
    "species",
    "subject_id",
)

metadata_nwb_dandi_fields = ("cell_id", "slice_id", "tissue_sample_id", "probe_ids")

metadata_nwb_computed_fields = (
    "number_of_electrodes",
    "number_of_units",
    "nwb_version",
    "nd_types",
)

metadata_bids_fields = ("bids_schema_version",)

metadata_nwb_fields = (
    metadata_nwb_file_fields
    + metadata_nwb_subject_fields
    + metadata_nwb_dandi_fields
    + metadata_nwb_computed_fields
)

# TODO: include/use schema, for now hardcoding most useful ones to be used
# while listing dandisets
metadata_dandiset_fields = (
    "identifier",
    "name",
    "description",
    "license",
    "keywords",
    "version",
    "doi",
    "url",
    "variables_measured",
    "sex",
    "organism",
    "probe_ids",
    "number_of_subjects",
    "number_of_cells",
    "number_of_tissue_samples",
)

metadata_all_fields = (
    metadata_bids_fields + metadata_nwb_fields + metadata_dandiset_fields
)

#: Regular expression for a valid Dandiset identifier.  This regex is not
#: anchored.
DANDISET_ID_REGEX = r"[0-9]{6}"

#: Regular expression for a valid published (i.e., non-draft) Dandiset version
#: identifier.  This regex is not anchored.
PUBLISHED_VERSION_REGEX = r"[0-9]+\.[0-9]+\.[0-9]+"

#: Regular expression for a valid Dandiset version identifier.  This regex is
#: not anchored.
VERSION_REGEX = rf"(?:{PUBLISHED_VERSION_REGEX}|draft)"


[docs]class EmbargoStatus(Enum): OPEN = "OPEN" UNEMBARGOING = "UNEMBARGOING" EMBARGOED = "EMBARGOED"
dandiset_metadata_file = "dandiset.yaml" dandiset_identifier_regex = f"^{DANDISET_ID_REGEX}$"
[docs]@dataclass(frozen=True) class DandiInstance: name: str gui: str | None api: str @property def redirector(self) -> None: # For "backwards compatibility" return None
[docs] def urls(self) -> Iterator[str]: if self.gui is not None: yield self.gui yield self.api
# So it could be easily mapped to external IP (e.g. from within VM) # to test against instance running outside of current environment instancehost = os.environ.get("DANDI_INSTANCEHOST", "localhost") known_instances = { "dandi": DandiInstance( "dandi", "https://dandiarchive.org", "https://api.dandiarchive.org/api", ), "dandi-staging": DandiInstance( "dandi-staging", "https://gui-staging.dandiarchive.org", "https://api-staging.dandiarchive.org/api", ), "dandi-api-local-docker-tests": DandiInstance( "dandi-api-local-docker-tests", f"http://{instancehost}:8085", f"http://{instancehost}:8000/api", ), } # to map back url: name known_instances_rev = { vv: k for k, v in known_instances.items() for vv in v.urls() if vv } # Download (upload?) specific constants #: Chunk size when iterating a download (and upload) body. Taken from girder-cli #: TODO: should we make them smaller for download than for upload? #: ATM used only in download MAX_CHUNK_SIZE = int(os.environ.get("DANDI_MAX_CHUNK_SIZE", 1024 * 1024 * 8)) # 64 #: The identifier for draft Dandiset versions DRAFT = "draft" #: HTTP response status codes that should always be retried (until we run out #: of retries) RETRY_STATUSES = (500, 502, 503, 504) VIDEO_FILE_EXTENSIONS = [".mp4", ".avi", ".wmv", ".mov", ".flv", ".mkv"] VIDEO_FILE_MODULES = ["processing", "acquisition"] ZARR_EXTENSIONS = [".ngff", ".zarr"] #: Maximum allowed depth of a Zarr directory tree MAX_ZARR_DEPTH = 7 #: MIME type assigned to & used to identify Zarr assets ZARR_MIME_TYPE = "application/x-zarr" #: Maximum number of Zarr directory entries to upload at once ZARR_UPLOAD_BATCH_SIZE = 255 #: Maximum number of Zarr directory entries to delete at once ZARR_DELETE_BATCH_SIZE = 100 BIDS_DATASET_DESCRIPTION = "dataset_description.json" # Fields which would be used to compose organized filenames # TODO: add full description into command --help etc # Order matters! dandi_layout_fields = { # "type" - if not defined, additional "subject_id": {"format": "sub-{}", "type": "required"}, "session_id": {"format": "_ses-{}"}, "tissue_sample_id": {"format": "_tis-{}"}, "slice_id": {"format": "_slice-{}"}, "cell_id": {"format": "_cell-{}"}, # disambiguation ones "description": {"format": "_desc-{}", "type": "disambiguation"}, "probe_ids": {"format": "_probe-{}", "type": "disambiguation"}, "obj_id": { "format": "_obj-{}", "type": "disambiguation", }, # will be not id, but checksum of it to shorten # "session_description" "modalities": {"format": "_{}", "type": "required_if_not_empty"}, "extension": {"format": "{}", "type": "required"}, } # verify no typos assert {v.get("type", "additional") for v in dandi_layout_fields.values()} == { "required", "disambiguation", "additional", "required_if_not_empty", } REQUEST_RETRIES = 12 DOWNLOAD_TIMEOUT = 30