Source code for dandi.validate

from __future__ import annotations

from collections.abc import Iterator
import os
from pathlib import Path

from . import __version__
from .consts import dandiset_metadata_file
from .files import find_dandi_files
from .utils import find_parent_directory_containing
from .validate_types import Scope, Severity, ValidationOrigin, ValidationResult

BIDS_TO_DANDI = {
    "subject": "subject_id",
    "session": "session_id",
}


[docs]def validate_bids( *paths: str | Path, schema_version: str | None = None, ) -> list[ValidationResult]: """Validate BIDS paths. Parameters ---------- paths : list(str) Paths to validate. devel_debug : bool, optional Whether to trigger debugging in the BIDS validator. Returns ------- dict Dictionary reporting required patterns not found and existing filenames not matching any patterns. Notes ----- - Eventually this should be migrated to BIDS schema specified errors, see discussion here: https://github.com/bids-standard/bids-specification/issues/1262 """ import bidsschematools from bidsschematools.validator import validate_bids as validate_bids_ validation_result = validate_bids_(paths, exclude_files=["dandiset.yaml"]) our_validation_result = [] origin = ValidationOrigin( name="bidsschematools", version=bidsschematools.__version__, bids_version=validation_result["bids_version"], ) # Storing variable to not re-compute set paths for each individual file. parent_path = None for path in validation_result["path_tracking"]: # Hard-coding exclusion here pending feature + release in: # https://github.com/bids-standard/bids-specification/issues/1272 if path.endswith((".ERRORS", ".ERRORS.json")): continue if parent_path != os.path.dirname(path): parent_path = os.path.dirname(path) dataset_path = find_parent_directory_containing( "dataset_description.json", parent_path ) dandiset_path = find_parent_directory_containing( "dandiset.yaml", parent_path ) our_validation_result.append( ValidationResult( origin=origin, severity=Severity.ERROR, id="BIDS.NON_BIDS_PATH_PLACEHOLDER", scope=Scope.FILE, path=Path(path), message="File does not match any pattern known to BIDS.", dataset_path=dataset_path, dandiset_path=dandiset_path, ) ) for pattern in validation_result["schema_tracking"]: # Future proofing for standard-compliant name. if pattern.get("mandatory") or pattern.get("required"): # We don't have a path for this so we'll need some external logic to make sure # that the dataset path is populated. # dataset_path = find_parent_directory_containing(paths, path) our_validation_result.append( ValidationResult( origin=origin, severity=Severity.ERROR, id="BIDS.MANDATORY_FILE_MISSING_PLACEHOLDER", scope=Scope.DATASET, path_regex=pattern["regex"], message="BIDS-required file is not present.", ) ) # Storing variable to not re-compute set paths for each individual file. parent_path = None for meta in validation_result["match_listing"]: file_path = meta.pop("path") meta = {BIDS_TO_DANDI[k]: v for k, v in meta.items() if k in BIDS_TO_DANDI} if parent_path != os.path.dirname(file_path): parent_path = os.path.dirname(file_path) dataset_path = find_parent_directory_containing( "dataset_description.json", parent_path ) dandiset_path = find_parent_directory_containing( "dandiset.yaml", parent_path ) our_validation_result.append( ValidationResult( origin=origin, id="BIDS.MATCH", scope=Scope.FILE, path=Path(file_path), metadata=meta, dataset_path=dataset_path, dandiset_path=dandiset_path, ) ) return our_validation_result
[docs]def validate( *paths: str | Path, schema_version: str | None = None, devel_debug: bool = False, allow_any_path: bool = False, ) -> Iterator[ValidationResult]: """Validate content Parameters ---------- paths: list(str) Could be individual (.nwb) files or a single dandiset path. Yields ------ path, errors errors for a path """ for p in paths: p = os.path.abspath(p) dandiset_path = find_parent_directory_containing(dandiset_metadata_file, p) if dandiset_path is None: yield ValidationResult( id="DANDI.NO_DANDISET_FOUND", origin=ValidationOrigin(name="dandi", version=__version__), severity=Severity.ERROR, scope=Scope.DANDISET, path=Path(p), message="Path is not inside a Dandiset", ) for df in find_dandi_files( p, dandiset_path=dandiset_path, allow_all=allow_any_path ): yield from df.get_validation_errors( schema_version=schema_version, devel_debug=devel_debug )