diff --git a/docs/developer/adrs.md b/docs/developer/adrs.md new file mode 100644 index 0000000..f0af015 --- /dev/null +++ b/docs/developer/adrs.md @@ -0,0 +1,14 @@ +# Architecture Decision Records + +## ADR-001: Allow up to 2 dimensional coordinates/mask as coordinates and masks. + +At the beginning we only allowed scalar or 1 dimensional coordinates or masks in the metadata. +It is because the metadata is stored as plain text in the tiff file. +If we allow arbitrary dimensional coordinates or masks, the metadata size may exceed the size of the image file itself. +There is no strict rule about the size of the metadata in the tiff format itself. +We try to keep the metadata size small as possible to increase the usability and reduce potential tourbles such as storage size or loading latency. + +However, there was need for 2 dimensional coordinates, especially for the coordinate that depends on the pixel position. +For example, when a tiff file is a histogram of (x, y, tof), in order to compute wavelength, we need `Ltotal` of each pixel, which is a 2 dimensional (x, y) coordinate. +This usecase was found in the detector position calibration routine. + diff --git a/docs/developer/index.md b/docs/developer/index.md index 9dc534a..150f3d9 100644 --- a/docs/developer/index.md +++ b/docs/developer/index.md @@ -13,4 +13,5 @@ maxdepth: 2 getting-started coding-conventions dependency-management +adrs ``` diff --git a/src/scitiff/_json_helpers.py b/src/scitiff/_json_helpers.py index 8bc51dc..5265621 100644 --- a/src/scitiff/_json_helpers.py +++ b/src/scitiff/_json_helpers.py @@ -15,7 +15,7 @@ def _idx_endswith(lines: list[str], *suffix: str) -> int: def _join_beautified_array(lines: list[str], cur: str = '') -> str: try: left_bracket = _idx_endswith(lines, '[') - right_bracket = _idx_endswith(lines, ']', '],') + right_bracket = left_bracket + _idx_endswith(lines[left_bracket:], ']', '],') left, array_items, lines = ( lines[: left_bracket + 1], lines[left_bracket + 1 : right_bracket + 1], diff --git a/src/scitiff/_resources/metadata-schema.json b/src/scitiff/_resources/metadata-schema.json index da9f958..4d8e2bb 100644 --- a/src/scitiff/_resources/metadata-schema.json +++ b/src/scitiff/_resources/metadata-schema.json @@ -53,12 +53,16 @@ "properties": { "data": {"$ref": "#/$defs/ImageVariableMetadata"}, "masks": { - "additionalProperties": {"$ref": "#/$defs/ScippVariable"}, + "additionalProperties": { + "anyOf": [{ "$ref": "#/$defs/ScippVariable0D" }, { "$ref": "#/$defs/ScippVariable1D" }, { "$ref": "#/$defs/ScippVariable2D" }] + }, "title": "Masks", "type": "object" }, "coords": { - "additionalProperties": {"$ref": "#/$defs/ScippVariable"}, + "additionalProperties": { + "anyOf": [{ "$ref": "#/$defs/ScippVariable0D" }, { "$ref": "#/$defs/ScippVariable1D" }, { "$ref": "#/$defs/ScippVariable2D" }] + }, "title": "Coords", "type": "object" }, @@ -75,6 +79,11 @@ "ImageVariableMetadata": { "description": "Image Metadata.", "properties": { + "unit": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Unit" + }, + "dtype": {"title": "Dtype", "type": "string"}, "dims": { "maxItems": 5, "minItems": 5, @@ -88,21 +97,22 @@ "prefixItems": [{ "type": "integer" }, { "type": "integer" }, { "type": "integer" }, { "type": "integer" }, { "type": "integer" }], "title": "Shape", "type": "array" - }, - "unit": { - "anyOf": [{ "type": "string" }, { "type": "null" }], - "title": "Unit" - }, - "dtype": {"title": "Dtype", "type": "string"} + } }, - "required": ["dims", "shape", "unit", "dtype"], + "required": ["unit", "dtype", "dims", "shape"], "title": "ImageVariableMetadata", "type": "object" }, "NeutronMetadata": { "properties": { "neutron_type": {"$ref": "#/$defs/NeutronSourceType"}, - "wavelength_range": {"$ref": "#/$defs/ScippVariable"} + "wavelength_range": { + "maxItems": 2, + "minItems": 2, + "prefixItems": [{ "$ref": "#/$defs/ScippVariable0D" }, { "$ref": "#/$defs/ScippVariable0D" }], + "title": "Wavelength Range", + "type": "array" + } }, "required": ["neutron_type", "wavelength_range"], "title": "NeutronMetadata", @@ -134,31 +144,89 @@ "title": "SciTiffMetadata", "type": "object" }, - "ScippVariable": { - "description": "Scipp Variable Metadata with the values.\n\nOnly 1D variable is allowed for metadata.", + "ScippVariable0D": { + "description": "Scipp Variable Metadata with scalar value.", "properties": { + "unit": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Unit" + }, + "dtype": {"title": "Dtype", "type": "string"}, + "dims": {"default": [], "maxItems": 0, "minItems": 0, "title": "Dims", "type": "array"}, + "shape": {"default": [], "maxItems": 0, "minItems": 0, "title": "Shape", "type": "array"}, + "values": { + "anyOf": [{ "type": "number" }, { "type": "string" }], + "title": "Values" + } + }, + "required": ["unit", "dtype", "values"], + "title": "ScippVariable0D", + "type": "object" + }, + "ScippVariable1D": { + "description": "Scipp Variable Metadata with 1D array values.", + "properties": { + "unit": { + "anyOf": [{ "type": "string" }, { "type": "null" }], + "title": "Unit" + }, + "dtype": {"title": "Dtype", "type": "string"}, "dims": { - "items": {"type": "string"}, + "maxItems": 1, + "minItems": 1, + "prefixItems": [{ "type": "string" }], "title": "Dims", "type": "array" }, "shape": { - "items": {"type": "integer"}, + "maxItems": 1, + "minItems": 1, + "prefixItems": [{ "type": "integer" }], "title": "Shape", "type": "array" }, + "values": { + "anyOf": [{ "items": { "type": "number" }, "type": "array" }, { "items": { "type": "string" }, "type": "array" }], + "title": "Values" + } + }, + "required": ["unit", "dtype", "dims", "shape", "values"], + "title": "ScippVariable1D", + "type": "object" + }, + "ScippVariable2D": { + "description": "Scipp Variable Metadata with 2D array values.\n\nFor 2D array, only numbers(float/int) are allowed.", + "properties": { "unit": { "anyOf": [{ "type": "string" }, { "type": "null" }], "title": "Unit" }, "dtype": {"title": "Dtype", "type": "string"}, + "dims": { + "maxItems": 2, + "minItems": 2, + "prefixItems": [{ "type": "string" }, { "type": "string" }], + "title": "Dims", + "type": "array" + }, + "shape": { + "maxItems": 2, + "minItems": 2, + "prefixItems": [{ "type": "integer" }, { "type": "integer" }], + "title": "Shape", + "type": "array" + }, "values": { - "anyOf": [{ "type": "number" }, { "type": "string" }, { "items": { "type": "number" }, "type": "array" }, { "items": { "type": "string" }, "type": "array" }], - "title": "Values" + "items": { + "items": {"type": "number"}, + "type": "array" + }, + "title": "Values", + "type": "array" } }, - "required": ["dims", "shape", "unit", "dtype", "values"], - "title": "ScippVariable", + "required": ["unit", "dtype", "dims", "shape", "values"], + "title": "ScippVariable2D", "type": "object" }, "SourceType": { diff --git a/src/scitiff/_schema.py b/src/scitiff/_schema.py index 373c83f..672f82d 100644 --- a/src/scitiff/_schema.py +++ b/src/scitiff/_schema.py @@ -39,20 +39,40 @@ class ScippVariableMetadata(BaseModel): """Scipp Variable Metadata without the values.""" - dims: tuple[str, ...] - shape: tuple[int, ...] unit: str | None dtype: str -class ScippVariable(ScippVariableMetadata): - """Scipp Variable Metadata with the values. +class ScippVariable0D(ScippVariableMetadata): + """Scipp Variable Metadata with scalar value.""" - Only 1D variable is allowed for metadata. - """ + dims: tuple[()] = Field(default=()) + shape: tuple[()] = Field(default=()) + values: float | str + """The value of the scalar variable""" - values: float | str | list[float] | list[str] - """The values of the variable.""" + +class ScippVariable1D(ScippVariableMetadata): + """Scipp Variable Metadata with 1D array values.""" + + dims: tuple[str] + shape: tuple[int] + values: list[float] | list[str] + """The 1D values list of the variable.""" + + +class ScippVariable2D(ScippVariableMetadata): + """Scipp Variable Metadata with 2D array values. + + For 2D array, only numbers(float/int) are allowed.""" + + dims: tuple[str, str] + shape: tuple[int, int] + values: list[list[float]] = Field(strict=True) + """The 2D values list of the variable.""" + + +ScippVariable = ScippVariable0D | ScippVariable1D | ScippVariable2D class ImageVariableMetadata(ScippVariableMetadata): @@ -103,7 +123,7 @@ class NeutronSourceType(Enum): class NeutronMetadata(BaseModel): neutron_type: NeutronSourceType - wavelength_range: ScippVariable + wavelength_range: tuple[ScippVariable0D, ScippVariable0D] class XRayMetadata(BaseModel): ... diff --git a/src/scitiff/executables.py b/src/scitiff/executables.py index 09eca51..8d5c7c4 100644 --- a/src/scitiff/executables.py +++ b/src/scitiff/executables.py @@ -13,7 +13,7 @@ DAQMetadata, ImageDataArrayMetadata, ImageVariableMetadata, - ScippVariable, + ScippVariable1D, SciTiffMetadata, SciTiffMetadataContainer, ) @@ -36,16 +36,16 @@ def _build_dummy_metadata() -> SciTiffMetadataContainer: unit="counts", ), coords={ - 't': ScippVariable( + 't': ScippVariable1D( dims=('t',), shape=(1,), dtype='int', unit='s', values=[0] ), - 'z': ScippVariable( + 'z': ScippVariable1D( dims=('z',), shape=(1,), dtype='int', unit='m', values=[0] ), - 'y': ScippVariable( + 'y': ScippVariable1D( dims=('y',), shape=(1,), dtype='int', unit='m', values=[0] ), - 'x': ScippVariable( + 'x': ScippVariable1D( dims=('x',), shape=(1,), dtype='int', unit='m', values=[0] ), }, diff --git a/src/scitiff/io.py b/src/scitiff/io.py index 94b60da..5a19c80 100644 --- a/src/scitiff/io.py +++ b/src/scitiff/io.py @@ -19,6 +19,9 @@ ImageDataArrayMetadata, ImageVariableMetadata, ScippVariable, + ScippVariable0D, + ScippVariable1D, + ScippVariable2D, SciTiffMetadata, SciTiffMetadataContainer, ) @@ -61,7 +64,7 @@ def _from_json_dict(dict_repr_var: dict) -> sc.Variable: raise err -def _wrap_unit(unit: str | None) -> str | None: +def _wrap_unit(unit: str | sc.Unit | None) -> str | None: # str(None), which is `None` is interpreted as `N` (neuton) when # it is loaded back from the json file. return str(unit) if unit is not None else None @@ -71,10 +74,10 @@ def _scipp_variable_to_model(var: sc.Variable) -> ScippVariable: # We do not use sc.to_dict directly because # we have to handle serialization of some non-string output # and also we want to utilize the pydantic model for validation. - if var.ndim > 1: + if var.ndim > 2: raise ValueError( - "Only 1-dimensional or scalar variable is allowed for metadata. " - "The variable has more than 1 dimension." + "Only variables with at most 2 dimensions are allowed for metadata. " + "The variable has more than 2 dimensions." ) if var.ndim == 0: # scalar variable values = var.value @@ -88,13 +91,21 @@ def _scipp_variable_to_model(var: sc.Variable) -> ScippVariable: else: values = list(var.values) - return ScippVariable( - dims=var.dims, - dtype=str(var.dtype), - shape=var.shape, - unit=_wrap_unit(var.unit), - values=values, - ) + constructors = [ScippVariable0D, ScippVariable1D, ScippVariable2D] + try: + return constructors[var.ndim]( + dims=var.dims, + dtype=str(var.dtype), + shape=var.shape, + unit=_wrap_unit(var.unit), + values=values, + ) + except pydantic.ValidationError as err: + raise ValueError( + "Failed to construct pydantic model from the variable: ", + var, + "\nPlease check if the coordinate/mask is compatible with the schema.", + ) from err def _scipp_variable_to_metadata_model(var: sc.Variable) -> ImageVariableMetadata: diff --git a/tests/io_test.py b/tests/io_test.py index ca9d385..1a1d3b1 100644 --- a/tests/io_test.py +++ b/tests/io_test.py @@ -31,6 +31,9 @@ @pytest.fixture def sample_image() -> sc.DataArray: pattern = [[i * 400 + j for j in range(4)] for i in range(3)] + pixel_id = sc.arange(dim='pixel-id', start=0, stop=12, unit='dimensionless').fold( + dim='pixel-id', sizes={'x': 4, 'y': 3} + ) sample_img = sc.DataArray( data=sc.array( dims=['t', 'y', 'x'], @@ -44,6 +47,8 @@ def sample_image() -> sc.DataArray: 'timestamp': sc.datetime('now', unit='hour'), 'y': sc.linspace(dim='y', start=0.0, stop=300.0, num=3, unit='mm'), 'x': sc.linspace(dim='x', start=0.0, stop=400.0, num=4, unit='mm'), + # 2D coordinate + 'pixel-id': pixel_id, }, ) return sample_img @@ -114,11 +119,11 @@ def test_export_and_load_scitiff_with_scalar_coord(sample_image, tmp_path) -> No @pytest.fixture -def sample_image_2d_coordinate(sample_image: sc.DataArray) -> sc.DataArray: +def sample_image_3d_coordinate(sample_image: sc.DataArray) -> sc.DataArray: new_image = sample_image.copy() - flattend_x = sc.flatten(sample_image, dims=['y', 'x'], to='pos').coords['x'] - new_image.coords['x'] = sc.fold( - flattend_x, dim='pos', dims=['y', 'x'], shape=[3, 4] + flattend_x = sc.flatten(sample_image, dims=['t', 'y', 'x'], to='lambda').coords['t'] + new_image.coords['lambda'] = sc.fold( + flattend_x, dim='lambda', dims=['y', 'x', 't'], shape=[3, 4, 2] ) return new_image @@ -129,15 +134,35 @@ def test_export_illegal_dimension_raises(sample_image: sc.DataArray) -> None: def test_export_multi_dimension_coordinate_raises( - sample_image_2d_coordinate: sc.DataArray, + sample_image_3d_coordinate: sc.DataArray, ) -> None: with pytest.raises( ValueError, match=re.escape( - 'Only 1-dimensional or scalar variable is allowed for metadata.' + 'Only variables with at most 2 dimensions are allowed for metadata.' ), ): - save_scitiff(sample_image_2d_coordinate, 'test.tiff') + save_scitiff(sample_image_3d_coordinate, 'test.tiff') + + +@pytest.fixture +def sample_image_2d_str_coordinate(sample_image: sc.DataArray) -> sc.DataArray: + new_image = sample_image.copy() + pixel_id = sc.array(dims=['pixel-id'], values=[str(i) for i in range(12)]).fold( + dim='pixel-id', dims=['y', 'x'], shape=[3, 4] + ) + new_image.coords['pixel-id-str'] = pixel_id + return new_image + + +def test_export_2D_string_coordinate_raises( + sample_image_2d_str_coordinate: sc.DataArray, +) -> None: + with pytest.raises( + ValueError, + match=re.escape("Failed to construct pydantic model"), + ): + save_scitiff(sample_image_2d_str_coordinate, 'test.tiff') def test_load_squeeze_false(sample_image, tmp_path) -> None: diff --git a/tests/validate_test.py b/tests/validate_test.py index da6e488..5ef7821 100644 --- a/tests/validate_test.py +++ b/tests/validate_test.py @@ -4,6 +4,7 @@ import scipp as sc from scitiff import validate_scitiff_metadata_container +from scitiff._schema import ScippVariable2D from scitiff.io import extract_metadata @@ -41,3 +42,14 @@ def test_validation(sample_image) -> None: # If the mode is not 'json', the test will fail because it will contain # tuples, which will not validate as an array. ) + + +def multi_dimensional_string_array_invalid_raises() -> None: + with pytest.raises(ValueError, match="Failed to construct pydantic model"): + ScippVariable2D( + dims=('x', 'y'), + shape=(2, 2), + values=[['1', '2'], ['3', '4']], + unit=None, + dtype='string', + )