"""
.. versionadded:: 0.36.0
`dandi.files` defines functionality for working with local files & directories
(as opposed to remote resources on a DANDI Archive server) that are of interest
to DANDI. The classes for such files & directories all inherit from
`DandiFile`, which has two immediate subclasses: `DandisetMetadataFile`, for
representing :file:`dandiset.yaml` files, and `LocalAsset`, for representing
files that can be uploaded as assets to DANDI Archive.
"""
from __future__ import annotations
from collections import deque
from collections.abc import Iterator
import os.path
from pathlib import Path
from dandi import get_logger
from dandi.consts import BIDS_DATASET_DESCRIPTION, dandiset_metadata_file
from dandi.exceptions import UnknownAssetError
from ._private import BIDSFileFactory, DandiFileFactory
from .bases import (
DandiFile,
DandisetMetadataFile,
GenericAsset,
LocalAsset,
LocalDirectoryAsset,
LocalFileAsset,
NWBAsset,
VideoAsset,
)
from .bids import (
BIDSAsset,
BIDSDatasetDescriptionAsset,
GenericBIDSAsset,
NWBBIDSAsset,
ZarrBIDSAsset,
)
from .zarr import LocalZarrEntry, ZarrAsset, ZarrStat
__all__ = [
"BIDSAsset",
"BIDSDatasetDescriptionAsset",
"DandiFile",
"DandisetMetadataFile",
"GenericAsset",
"GenericBIDSAsset",
"LocalAsset",
"LocalDirectoryAsset",
"LocalFileAsset",
"LocalZarrEntry",
"NWBAsset",
"NWBBIDSAsset",
"VideoAsset",
"ZarrAsset",
"ZarrBIDSAsset",
"ZarrStat",
"dandi_file",
"find_dandi_files",
"find_bids_dataset_description",
]
lgr = get_logger()
[docs]def find_dandi_files(
*paths: str | Path,
dandiset_path: str | Path | None = None,
allow_all: bool = False,
include_metadata: bool = False,
) -> Iterator[DandiFile]:
"""
Yield all DANDI files at or under the paths in ``paths`` (which may be
either files or directories). Files & directories whose names start with a
period are ignored. Directories are only included in the return value if
they are of a type represented by a `LocalDirectoryAsset` subclass, in
which case they are not recursed into.
:param dandiset_path:
The path to the root of the Dandiset in which the paths are located.
All paths in ``paths`` must be equal to or subpaths of
``dandiset_path``. If `None`, then the Dandiset path for each asset
found is implicitly set to the parent directory.
:param allow_all:
If true, unrecognized assets and the Dandiset's :file:`dandiset.yaml`
file are returned as `GenericAsset` and `DandisetMetadataFile`
instances, respectively. If false, they are not returned at all.
:param include_metadata:
If true, the Dandiset's :file:`dandiset.yaml` file is returned as a
`DandisetMetadataFile` instance. If false, it is not returned at all
(unless ``allow_all`` is true).
"""
# A pair of each file or directory being considered plus the most recent
# BIDS dataset_description.json file at the path (if a directory) or in a
# parent path
path_queue: deque[tuple[Path, BIDSDatasetDescriptionAsset | None]] = deque()
for p in map(Path, paths):
if dandiset_path is not None:
try:
p.relative_to(dandiset_path)
except ValueError:
raise ValueError(
f"Path {str(p)!r} is not inside Dandiset path {str(dandiset_path)!r}"
)
path_queue.append((Path(p), None))
bids_roots = []
while path_queue:
p, bidsdd = path_queue.popleft()
if p.name.startswith("."):
continue
if p.is_dir():
if p.is_symlink():
lgr.warning("%s: Ignoring unsupported symbolic link to directory", p)
elif dandiset_path is not None and p == Path(dandiset_path):
if os.path.lexists(p / BIDS_DATASET_DESCRIPTION):
bids = dandi_file(p / BIDS_DATASET_DESCRIPTION, dandiset_path)
assert isinstance(bids, BIDSDatasetDescriptionAsset)
bidsdd = bids
bids_roots.append(p)
path_queue.extend((q, bidsdd) for q in p.iterdir())
elif any(p.iterdir()):
try:
df = dandi_file(p, dandiset_path, bids_dataset_description=bidsdd)
except UnknownAssetError:
# The directory does not have a recognized file extension
# (ie., it's not a Zarr or any other directory asset type
# we may add later), so traverse through it as a regular
# directory.
if os.path.lexists(p / BIDS_DATASET_DESCRIPTION) and not any(
i in p.parents for i in bids_roots
): # No nested BIDS
bids2 = dandi_file(p / BIDS_DATASET_DESCRIPTION, dandiset_path)
assert isinstance(bids2, BIDSDatasetDescriptionAsset)
bidsdd = bids2
bids_roots.append(p)
path_queue.extend((q, bidsdd) for q in p.iterdir())
else:
yield df
else:
df = dandi_file(p, dandiset_path, bids_dataset_description=bidsdd)
# Don't use isinstance() here, as GenericBIDSAsset's should still
# be returned
if type(df) is GenericAsset and not allow_all:
pass
elif isinstance(df, DandisetMetadataFile) and not (
allow_all or include_metadata
):
pass
else:
yield df
[docs]def dandi_file(
filepath: str | Path,
dandiset_path: str | Path | None = None,
bids_dataset_description: BIDSDatasetDescriptionAsset | None = None,
) -> DandiFile:
"""
Return a `DandiFile` instance of the appropriate type for the file at
``filepath`` inside the Dandiset rooted at ``dandiset_path``. If
``dandiset_path`` is not set, it will default to ``filepath``'s parent
directory.
If ``bids_dataset_description`` is set, the file will be assumed to lie
within the BIDS dataset with the given :file:`dataset_description.json`
file at its root, resulting in a `BIDSAsset`.
If ``filepath`` is a directory, it must be of a type represented by a
`LocalDirectoryAsset` subclass; otherwise, an `UnknownAssetError` exception
will be raised.
A regular file named :file:`dandiset.yaml` will only be represented by a
`DandisetMetadataFile` instance if it is at the root of the Dandiset.
A regular file that is not of a known type will be represented by a
`GenericAsset` instance.
"""
filepath = Path(filepath)
if dandiset_path is not None:
dandiset_path = Path(dandiset_path)
path = filepath.relative_to(dandiset_path).as_posix()
if path == ".":
raise ValueError("Dandi file path cannot equal Dandiset path")
else:
path = filepath.name
if filepath.is_file() and path == dandiset_metadata_file:
return DandisetMetadataFile(filepath=filepath, dandiset_path=dandiset_path)
if bids_dataset_description is None:
factory = DandiFileFactory()
else:
factory = BIDSFileFactory(bids_dataset_description)
return factory(filepath, path, dandiset_path)
[docs]def find_bids_dataset_description(
dirpath: str | Path, dandiset_path: str | Path | None = None
) -> BIDSDatasetDescriptionAsset | None:
"""
.. versionadded:: 0.46.0
Look for the topmost :file:`dataset_description.json` file in the directory
``dirpath`` and each of its parents, stopping when a :file:`dandiset.yaml`
file is found or ``dandiset_path`` is reached.
"""
topmost: BIDSDatasetDescriptionAsset | None = None
dirpath = Path(dirpath)
for d in (dirpath, *dirpath.parents):
bids_marker = d / BIDS_DATASET_DESCRIPTION
dandi_end = d / dandiset_metadata_file
if bids_marker.is_file() or bids_marker.is_symlink():
f = dandi_file(bids_marker, dandiset_path)
assert isinstance(f, BIDSDatasetDescriptionAsset)
topmost = f
elif dandi_end.is_file() or dandi_end.is_symlink():
break
elif dandiset_path is not None and d == Path(dandiset_path):
break
return topmost