Source code for dandi.validate._types

from __future__ import annotations

from enum import Enum, IntEnum, auto, unique
import logging
from pathlib import Path
from typing import Annotated, Any, Union

from pydantic import BaseModel, BeforeValidator, Field
from pydantic.functional_serializers import PlainSerializer

import dandi
from dandi.utils import StrEnum


[docs] class MissingFileContent(StrEnum): """Policy for handling files whose content is missing (e.g. broken symlinks in a datalad dataset without fetched data).""" error = auto() """Emit a concise error for each file with missing content (default).""" only_non_data = "only-non-data" """Skip content-dependent validators (e.g. pynwb, nwbinspector) but still validate non-data aspects such as path layout.""" skip = auto() """Skip the file entirely; emit a WARNING noting that validation was skipped."""
lgr = logging.getLogger(__name__)
[docs] @unique class Standard(StrEnum): """Standards to validate against""" BIDS = auto() DANDI_LAYOUT = "DANDI-LAYOUT" DANDI_SCHEMA = "DANDI-SCHEMA" HED = auto() NWB = auto() OME_ZARR = "OME-ZARR" ZARR = auto() # File formats (For denoting validation failures in file format level) JSON = auto() TSV = auto() YAML = auto()
[docs] @unique class Validator(StrEnum): """Validators that are used to do validation""" bids_validator_deno = "bids-validator-deno" bidsschematools = auto() dandi = auto() dandi_zarr = "dandi.zarr" dandischema = auto() hed_python_validator = "hed-python-validator" nwbinspector = auto() pynwb = auto() tensorstore = auto() zarr = auto()
[docs] class OriginType(StrEnum): """Types of validation result origins""" INTERNAL = auto() """ Validation result is originated from the validator but not necessarily relating to validation of the data""" VALIDATION = auto() """Validation result is originated from validation of the data"""
[docs] class Origin(BaseModel): """ Origin of the validation result """ type: OriginType validator: Validator """The validator conducting the validation""" validator_version: str """The version of the validator""" standard: Standard | None = None """Standard being validated against""" standard_version: str | None = None """Version of the standard""" standard_schema_version: str | None = None """ Version of the schema used in defining or implementing the standard, such as BIDS schema version in BIDS """
# Some commonly used `Origin` instances ORIGIN_VALIDATION_DANDI = Origin( type=OriginType.VALIDATION, validator=Validator.dandi, validator_version=dandi.__version__, ) ORIGIN_VALIDATION_DANDI_LAYOUT = Origin( type=OriginType.VALIDATION, validator=Validator.dandi, validator_version=dandi.__version__, standard=Standard.DANDI_LAYOUT, ) ORIGIN_VALIDATION_DANDI_ZARR = Origin( type=OriginType.VALIDATION, validator=Validator.dandi_zarr, validator_version=dandi.__version__, ) ORIGIN_INTERNAL_DANDI = Origin( type=OriginType.INTERNAL, validator=Validator.dandi, validator_version=dandi.__version__, )
[docs] class Severity(IntEnum): """Severity levels for validation results""" INFO = 10 """Not an indication of problem but information of status or confirmation""" HINT = 20 """Data is valid but could be improved""" WARNING = 30 """Data is not recognized as valid. Changes are needed to ensure validity""" ERROR = 40 """Data is recognized as invalid""" CRITICAL = 50 """ A serious invalidity in data. E.g., an invalidity that prevents validation of other aspects of the data such as when validating against the BIDS standard, the data is without a `BIDSVersion` field or has an invalid `BIDSVersion` field. """
_SeverityName = Enum( # type: ignore[misc] "_SeverityName", [(n, n) for n in Severity.__members__] ) """ Names of the members of `Severity` as an enum This is used for generating JSON schema for `Severity_` """ def _accept_severity_by_name(v: Any) -> Any: """ A validator function to be used in `BeforeValidator` to allow `Severity` member names to be validated as `Severity` values. """ if isinstance(v, str): if v in Severity.__members__: return Severity[v] else: raise ValueError( f"Invalid severity name: {v}. " f"Valid names are: {', '.join(Severity.__members__.keys())}" ) else: return v Severity_ = Annotated[ Severity, BeforeValidator( _accept_severity_by_name, json_schema_input_type=Union[Severity, _SeverityName] ), PlainSerializer( lambda s: _SeverityName[s.name], return_type=_SeverityName, when_used="json" ), ] """ The annotated version of `Severity` with which the values of `Severity` are serialized as their names when serialized to JSON, i.e. serialization done in the mode of "json". """
[docs] class Scope(Enum): FILE = "file" FOLDER = "folder" DANDISET = "dandiset" DATASET = "dataset"
CURRENT_RECORD_VERSION = "1"
[docs] class ValidationResult(BaseModel): record_version: str = CURRENT_RECORD_VERSION """Version of the serialized record format for forward compatibility""" id: str origin: Origin """Origin of the validation result as validator and standard used in producing it""" scope: Scope origin_result: Any | None = Field(default=None, exclude=True) """ The representation of the validation result produced by the used validator, `self.origin.validator`, unchanged """ severity: Severity_ | None = None # asset_paths, if not populated, assumes [.path], but could be smth like # {"path": "task-broken_bold.json", # "asset_paths": ["sub-01/func/sub-01_task-broken_bold.json", # "sub-02/func/sub-02_task-broken_bold.json"]} asset_paths: list[str] | None = None # e.g. path within hdf5 file hierarchy # As a dict we will map asset_paths into location within them within_asset_paths: dict[str, str] | None = None dandiset_path: Path | None = None dataset_path: Path | None = None # TODO: locations analogous to nwbinspector.InspectorMessage.location # but due to multiple possible asset_paths, we might want to have it # as a dict to point to location in some or each affected assets message: str | None = None metadata: dict | None = None # ??? should it become a list e.g. for errors which rely on # multiple files, like mismatch between .nii.gz header and .json sidecar path: Path | None = None path_regex: str | None = None
[docs] def model_post_init(self, __context: Any) -> None: if self.record_version != CURRENT_RECORD_VERSION: lgr.warning( "record_version %r != current %r, loading anyway", self.record_version, CURRENT_RECORD_VERSION, )
@property def purview(self) -> str | None: if self.path is not None: return str(self.path) elif self.path_regex is not None: return self.path_regex elif self.dataset_path is not None: return str(self.dataset_path) else: return None