from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from enum import Enum
import os
#: A list of metadata fields which dandi extracts from .nwb files.
#: Additional fields (such as ``number_of_*``) might be added by
#: `get_metadata()`
metadata_nwb_file_fields = (
"experiment_description",
"experimenter",
"identifier", # note: required arg2 of NWBFile
"institution",
"keywords",
"lab",
"related_publications",
"session_description", # note: required arg1 of NWBFile
"session_id",
"session_start_time",
)
metadata_nwb_subject_fields = (
"age",
"date_of_birth",
"genotype",
"sex",
"species",
"subject_id",
)
metadata_nwb_dandi_fields = ("cell_id", "slice_id", "tissue_sample_id", "probe_ids")
metadata_nwb_computed_fields = (
"number_of_electrodes",
"number_of_units",
"nwb_version",
"nd_types",
)
metadata_bids_fields = ("bids_schema_version",)
metadata_nwb_fields = (
metadata_nwb_file_fields
+ metadata_nwb_subject_fields
+ metadata_nwb_dandi_fields
+ metadata_nwb_computed_fields
)
# TODO: include/use schema, for now hardcoding most useful ones to be used
# while listing dandisets
metadata_dandiset_fields = (
"identifier",
"name",
"description",
"license",
"keywords",
"version",
"doi",
"url",
"variables_measured",
"sex",
"organism",
"probe_ids",
"number_of_subjects",
"number_of_cells",
"number_of_tissue_samples",
)
metadata_all_fields = (
metadata_bids_fields + metadata_nwb_fields + metadata_dandiset_fields
)
#: Regular expression for a valid Dandiset identifier. This regex is not
#: anchored.
DANDISET_ID_REGEX = r"[0-9]{6}"
#: Regular expression for a valid published (i.e., non-draft) Dandiset version
#: identifier. This regex is not anchored.
PUBLISHED_VERSION_REGEX = r"[0-9]+\.[0-9]+\.[0-9]+"
#: Regular expression for a valid Dandiset version identifier. This regex is
#: not anchored.
VERSION_REGEX = rf"(?:{PUBLISHED_VERSION_REGEX}|draft)"
[docs]class EmbargoStatus(Enum):
OPEN = "OPEN"
UNEMBARGOING = "UNEMBARGOING"
EMBARGOED = "EMBARGOED"
dandiset_metadata_file = "dandiset.yaml"
dandiset_identifier_regex = f"^{DANDISET_ID_REGEX}$"
[docs]@dataclass(frozen=True)
class DandiInstance:
name: str
gui: str | None
api: str
@property
def redirector(self) -> None:
# For "backwards compatibility"
return None
[docs] def urls(self) -> Iterator[str]:
if self.gui is not None:
yield self.gui
yield self.api
# So it could be easily mapped to external IP (e.g. from within VM)
# to test against instance running outside of current environment
instancehost = os.environ.get("DANDI_INSTANCEHOST", "localhost")
known_instances = {
"dandi": DandiInstance(
"dandi",
"https://dandiarchive.org",
"https://api.dandiarchive.org/api",
),
"dandi-staging": DandiInstance(
"dandi-staging",
"https://gui-staging.dandiarchive.org",
"https://api-staging.dandiarchive.org/api",
),
"dandi-api-local-docker-tests": DandiInstance(
"dandi-api-local-docker-tests",
f"http://{instancehost}:8085",
f"http://{instancehost}:8000/api",
),
}
# to map back url: name
known_instances_rev = {
vv: k for k, v in known_instances.items() for vv in v.urls() if vv
}
# Download (upload?) specific constants
#: Chunk size when iterating a download (and upload) body. Taken from girder-cli
#: TODO: should we make them smaller for download than for upload?
#: ATM used only in download
MAX_CHUNK_SIZE = int(os.environ.get("DANDI_MAX_CHUNK_SIZE", 1024 * 1024 * 8)) # 64
#: The identifier for draft Dandiset versions
DRAFT = "draft"
#: HTTP response status codes that should always be retried (until we run out
#: of retries)
RETRY_STATUSES = (500, 502, 503, 504)
VIDEO_FILE_EXTENSIONS = [".mp4", ".avi", ".wmv", ".mov", ".flv", ".mkv"]
VIDEO_FILE_MODULES = ["processing", "acquisition"]
ZARR_EXTENSIONS = [".ngff", ".zarr"]
#: Maximum allowed depth of a Zarr directory tree
MAX_ZARR_DEPTH = 7
#: MIME type assigned to & used to identify Zarr assets
ZARR_MIME_TYPE = "application/x-zarr"
#: Maximum number of Zarr directory entries to upload at once
ZARR_UPLOAD_BATCH_SIZE = 255
#: Maximum number of Zarr directory entries to delete at once
ZARR_DELETE_BATCH_SIZE = 100
BIDS_DATASET_DESCRIPTION = "dataset_description.json"
# Fields which would be used to compose organized filenames
# TODO: add full description into command --help etc
# Order matters!
dandi_layout_fields = {
# "type" - if not defined, additional
"subject_id": {"format": "sub-{}", "type": "required"},
"session_id": {"format": "_ses-{}"},
"tissue_sample_id": {"format": "_tis-{}"},
"slice_id": {"format": "_slice-{}"},
"cell_id": {"format": "_cell-{}"},
# disambiguation ones
"description": {"format": "_desc-{}", "type": "disambiguation"},
"probe_ids": {"format": "_probe-{}", "type": "disambiguation"},
"obj_id": {
"format": "_obj-{}",
"type": "disambiguation",
}, # will be not id, but checksum of it to shorten
# "session_description"
"modalities": {"format": "_{}", "type": "required_if_not_empty"},
"extension": {"format": "{}", "type": "required"},
}
# verify no typos
assert {v.get("type", "additional") for v in dandi_layout_fields.values()} == {
"required",
"disambiguation",
"additional",
"required_if_not_empty",
}
REQUEST_RETRIES = 12
DOWNLOAD_TIMEOUT = 30