Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/developer/adrs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Architecture Decision Records

## ADR-001: Allow up to 2 dimensional coordinates/mask as coordinates and masks.

At the beginning we only allowed scalar or 1 dimensional coordinates or masks in the metadata.
It is because the metadata is stored as plain text in the tiff file.
If we allow arbitrary dimensional coordinates or masks, the metadata size may exceed the size of the image file itself.
There is no strict rule about the size of the metadata in the tiff format itself.
We try to keep the metadata size small as possible to increase the usability and reduce potential tourbles such as storage size or loading latency.

However, there was need for 2 dimensional coordinates, especially for the coordinate that depends on the pixel position.
For example, when a tiff file is a histogram of (x, y, tof), in order to compute wavelength, we need `Ltotal` of each pixel, which is a 2 dimensional (x, y) coordinate.
This usecase was found in the detector position calibration routine.

1 change: 1 addition & 0 deletions docs/developer/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ maxdepth: 2
getting-started
coding-conventions
dependency-management
adrs
```
2 changes: 1 addition & 1 deletion src/scitiff/_json_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def _idx_endswith(lines: list[str], *suffix: str) -> int:
def _join_beautified_array(lines: list[str], cur: str = '') -> str:
try:
left_bracket = _idx_endswith(lines, '[')
right_bracket = _idx_endswith(lines, ']', '],')
right_bracket = left_bracket + _idx_endswith(lines[left_bracket:], ']', '],')
left, array_items, lines = (
lines[: left_bracket + 1],
lines[left_bracket + 1 : right_bracket + 1],
Expand Down
104 changes: 86 additions & 18 deletions src/scitiff/_resources/metadata-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,16 @@
"properties": {
"data": {"$ref": "#/$defs/ImageVariableMetadata"},
"masks": {
"additionalProperties": {"$ref": "#/$defs/ScippVariable"},
"additionalProperties": {
"anyOf": [{ "$ref": "#/$defs/ScippVariable0D" }, { "$ref": "#/$defs/ScippVariable1D" }, { "$ref": "#/$defs/ScippVariable2D" }]
},
"title": "Masks",
"type": "object"
},
"coords": {
"additionalProperties": {"$ref": "#/$defs/ScippVariable"},
"additionalProperties": {
"anyOf": [{ "$ref": "#/$defs/ScippVariable0D" }, { "$ref": "#/$defs/ScippVariable1D" }, { "$ref": "#/$defs/ScippVariable2D" }]
},
"title": "Coords",
"type": "object"
},
Expand All @@ -75,6 +79,11 @@
"ImageVariableMetadata": {
"description": "Image Metadata.",
"properties": {
"unit": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Unit"
},
"dtype": {"title": "Dtype", "type": "string"},
"dims": {
"maxItems": 5,
"minItems": 5,
Expand All @@ -88,21 +97,22 @@
"prefixItems": [{ "type": "integer" }, { "type": "integer" }, { "type": "integer" }, { "type": "integer" }, { "type": "integer" }],
"title": "Shape",
"type": "array"
},
"unit": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Unit"
},
"dtype": {"title": "Dtype", "type": "string"}
}
},
"required": ["dims", "shape", "unit", "dtype"],
"required": ["unit", "dtype", "dims", "shape"],
"title": "ImageVariableMetadata",
"type": "object"
},
"NeutronMetadata": {
"properties": {
"neutron_type": {"$ref": "#/$defs/NeutronSourceType"},
"wavelength_range": {"$ref": "#/$defs/ScippVariable"}
"wavelength_range": {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't quite get why this one is needed...

"maxItems": 2,
"minItems": 2,
"prefixItems": [{ "$ref": "#/$defs/ScippVariable0D" }, { "$ref": "#/$defs/ScippVariable0D" }],
"title": "Wavelength Range",
"type": "array"
Comment on lines +110 to +114

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now the wavelength range has strict type of tuple[scalar, scalar]

}
},
"required": ["neutron_type", "wavelength_range"],
"title": "NeutronMetadata",
Expand Down Expand Up @@ -134,31 +144,89 @@
"title": "SciTiffMetadata",
"type": "object"
},
"ScippVariable": {
"description": "Scipp Variable Metadata with the values.\n\nOnly 1D variable is allowed for metadata.",
"ScippVariable0D": {
"description": "Scipp Variable Metadata with scalar value.",
"properties": {
"unit": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Unit"
},
"dtype": {"title": "Dtype", "type": "string"},
"dims": {"default": [], "maxItems": 0, "minItems": 0, "title": "Dims", "type": "array"},
"shape": {"default": [], "maxItems": 0, "minItems": 0, "title": "Shape", "type": "array"},
"values": {
"anyOf": [{ "type": "number" }, { "type": "string" }],
"title": "Values"
}
},
"required": ["unit", "dtype", "values"],
"title": "ScippVariable0D",
"type": "object"
},
"ScippVariable1D": {
"description": "Scipp Variable Metadata with 1D array values.",
"properties": {
"unit": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Unit"
},
"dtype": {"title": "Dtype", "type": "string"},
"dims": {
"items": {"type": "string"},
"maxItems": 1,
"minItems": 1,
"prefixItems": [{ "type": "string" }],
"title": "Dims",
"type": "array"
},
"shape": {
"items": {"type": "integer"},
"maxItems": 1,
"minItems": 1,
"prefixItems": [{ "type": "integer" }],
"title": "Shape",
"type": "array"
},
"values": {
"anyOf": [{ "items": { "type": "number" }, "type": "array" }, { "items": { "type": "string" }, "type": "array" }],
"title": "Values"
}
},
"required": ["unit", "dtype", "dims", "shape", "values"],
"title": "ScippVariable1D",
"type": "object"
},
"ScippVariable2D": {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure I followed: the 0D, 1D and 2D all seem to be identical?

In which place do we actually need the different types?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At first I thought we would want to limit the 2D coordinate only for x, y dimension so I started splitting them into different Models so that 2D one can have literal type of dimensions but I wasn't sure if that's necessary so I removed it...

I guess we can just merge all three of them now. I'll update them.

"description": "Scipp Variable Metadata with 2D array values.\n\nFor 2D array, only numbers(float/int) are allowed.",
"properties": {
"unit": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Unit"
},
"dtype": {"title": "Dtype", "type": "string"},
"dims": {
"maxItems": 2,
"minItems": 2,
"prefixItems": [{ "type": "string" }, { "type": "string" }],
"title": "Dims",
"type": "array"
},
"shape": {
"maxItems": 2,
"minItems": 2,
"prefixItems": [{ "type": "integer" }, { "type": "integer" }],
"title": "Shape",
"type": "array"
},
"values": {
"anyOf": [{ "type": "number" }, { "type": "string" }, { "items": { "type": "number" }, "type": "array" }, { "items": { "type": "string" }, "type": "array" }],
"title": "Values"
"items": {
"items": {"type": "number"},
"type": "array"
},
"title": "Values",
"type": "array"
}
},
"required": ["dims", "shape", "unit", "dtype", "values"],
"title": "ScippVariable",
"required": ["unit", "dtype", "dims", "shape", "values"],
"title": "ScippVariable2D",
"type": "object"
},
"SourceType": {
Expand Down
38 changes: 29 additions & 9 deletions src/scitiff/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,40 @@
class ScippVariableMetadata(BaseModel):
"""Scipp Variable Metadata without the values."""

dims: tuple[str, ...]
shape: tuple[int, ...]
unit: str | None
dtype: str


class ScippVariable(ScippVariableMetadata):
"""Scipp Variable Metadata with the values.
class ScippVariable0D(ScippVariableMetadata):
"""Scipp Variable Metadata with scalar value."""

Only 1D variable is allowed for metadata.
"""
dims: tuple[()] = Field(default=())
shape: tuple[()] = Field(default=())
values: float | str
"""The value of the scalar variable"""

values: float | str | list[float] | list[str]
"""The values of the variable."""

class ScippVariable1D(ScippVariableMetadata):
"""Scipp Variable Metadata with 1D array values."""

dims: tuple[str]
shape: tuple[int]
values: list[float] | list[str]
"""The 1D values list of the variable."""


class ScippVariable2D(ScippVariableMetadata):
"""Scipp Variable Metadata with 2D array values.

For 2D array, only numbers(float/int) are allowed."""

dims: tuple[str, str]
shape: tuple[int, int]
values: list[list[float]] = Field(strict=True)
"""The 2D values list of the variable."""


ScippVariable = ScippVariable0D | ScippVariable1D | ScippVariable2D


class ImageVariableMetadata(ScippVariableMetadata):
Expand Down Expand Up @@ -103,7 +123,7 @@ class NeutronSourceType(Enum):

class NeutronMetadata(BaseModel):
neutron_type: NeutronSourceType
wavelength_range: ScippVariable
wavelength_range: tuple[ScippVariable0D, ScippVariable0D]


class XRayMetadata(BaseModel): ...
Expand Down
10 changes: 5 additions & 5 deletions src/scitiff/executables.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
DAQMetadata,
ImageDataArrayMetadata,
ImageVariableMetadata,
ScippVariable,
ScippVariable1D,
SciTiffMetadata,
SciTiffMetadataContainer,
)
Expand All @@ -36,16 +36,16 @@ def _build_dummy_metadata() -> SciTiffMetadataContainer:
unit="counts",
),
coords={
't': ScippVariable(
't': ScippVariable1D(
dims=('t',), shape=(1,), dtype='int', unit='s', values=[0]
),
'z': ScippVariable(
'z': ScippVariable1D(
dims=('z',), shape=(1,), dtype='int', unit='m', values=[0]
),
'y': ScippVariable(
'y': ScippVariable1D(

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't x and y coords be 2d?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just a dummy metadata as an example in the documentation. I didn't include 2D coordinate since we don't want to encourage people to store 2D coordinates in the metadata.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry I had not seen it was creating some dummy data.

dims=('y',), shape=(1,), dtype='int', unit='m', values=[0]
),
'x': ScippVariable(
'x': ScippVariable1D(
dims=('x',), shape=(1,), dtype='int', unit='m', values=[0]
),
},
Expand Down
33 changes: 22 additions & 11 deletions src/scitiff/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
ImageDataArrayMetadata,
ImageVariableMetadata,
ScippVariable,
ScippVariable0D,
ScippVariable1D,
ScippVariable2D,
SciTiffMetadata,
SciTiffMetadataContainer,
)
Expand Down Expand Up @@ -61,7 +64,7 @@ def _from_json_dict(dict_repr_var: dict) -> sc.Variable:
raise err


def _wrap_unit(unit: str | None) -> str | None:
def _wrap_unit(unit: str | sc.Unit | None) -> str | None:
# str(None), which is `None` is interpreted as `N` (neuton) when
# it is loaded back from the json file.
return str(unit) if unit is not None else None
Expand All @@ -71,10 +74,10 @@ def _scipp_variable_to_model(var: sc.Variable) -> ScippVariable:
# We do not use sc.to_dict directly because
# we have to handle serialization of some non-string output
# and also we want to utilize the pydantic model for validation.
if var.ndim > 1:
if var.ndim > 2:
raise ValueError(
"Only 1-dimensional or scalar variable is allowed for metadata. "
"The variable has more than 1 dimension."
"Only variables with at most 2 dimensions are allowed for metadata. "
"The variable has more than 2 dimensions."
)
if var.ndim == 0: # scalar variable
values = var.value
Expand All @@ -88,13 +91,21 @@ def _scipp_variable_to_model(var: sc.Variable) -> ScippVariable:
else:
values = list(var.values)

return ScippVariable(
dims=var.dims,
dtype=str(var.dtype),
shape=var.shape,
unit=_wrap_unit(var.unit),
values=values,
)
constructors = [ScippVariable0D, ScippVariable1D, ScippVariable2D]
try:
return constructors[var.ndim](
dims=var.dims,
dtype=str(var.dtype),
shape=var.shape,
unit=_wrap_unit(var.unit),
values=values,
)
except pydantic.ValidationError as err:
raise ValueError(
"Failed to construct pydantic model from the variable: ",
var,
"\nPlease check if the coordinate/mask is compatible with the schema.",
) from err


def _scipp_variable_to_metadata_model(var: sc.Variable) -> ImageVariableMetadata:
Expand Down
Loading
Loading