import datetime
import logging
import os
import re
import pytest
from ... import errors
from ..._icat_messaging.serializers.dataset import raw_serialize_dataset_message
from ..._icat_messaging.serializers.dataset import serialize_dataset_message
from ..._icat_messaging.serializers.dataset_parameters.main import (
serialize_dataset_parameters,
)
from ..._icat_messaging.serializers.investigation import (
raw_serialize_investigation_message,
)
from ..._icat_messaging.utils import datetime_utils
[docs]
def test_dataset_message():
proposal = "MX-123"
beamline = "ID30A-3"
start_datetime = datetime.datetime.now().astimezone()
end_datetime = start_datetime + datetime.timedelta(minutes=1)
path = "/data/run1/"
dataset = "Protein Crystal Diffraction Run"
metadata = {
"machine": "mymachine",
"software": "mysoftware_version",
"startDate": start_datetime,
"endDate": end_datetime,
"Sample_name": "Lysozyme Crystal",
}
expected = f"""<?xml version="1.0" encoding="UTF-8"?>
<tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true">
<tns:investigation>mx-123</tns:investigation>
<tns:instrument>id30a-3</tns:instrument>
<tns:name>Protein Crystal Diffraction Run</tns:name>
<tns:location>{os.path.normpath(path)}</tns:location>
<tns:startDate>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:startDate>
<tns:endDate>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:endDate>
<tns:sample>
<tns:name>Lysozyme Crystal</tns:name>
</tns:sample>
<tns:parameter>
<tns:name>Sample_name</tns:name>
<tns:value>Lysozyme Crystal</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>beamlineID</tns:name>
<tns:value>id30a-3</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>datasetName</tns:name>
<tns:value>Protein Crystal Diffraction Run</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>proposal</tns:name>
<tns:value>mx-123</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>location</tns:name>
<tns:value>{os.path.normpath(path)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>startDate</tns:name>
<tns:value>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>endDate</tns:name>
<tns:value>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>machine</tns:name>
<tns:value>mymachine</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>software</tns:name>
<tns:value>mysoftware_version</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>complete</tns:name>
<tns:value>true</tns:value>
</tns:parameter>
</tns:dataset>
"""
data = serialize_dataset_message(
beamline=beamline,
proposal=proposal,
dataset=dataset,
path=path,
metadata=metadata,
indent=" ",
)
assert data.decode("utf-8") == expected
[docs]
def test_dataset_message_unknown_parameter():
proposal = "MX-123"
beamline = "ID30A-3"
start_datetime = datetime.datetime.now().astimezone()
end_datetime = start_datetime + datetime.timedelta(minutes=1)
path = "/data/run1"
dataset = "Protein Crystal Diffraction Run"
metadata = {
"machine": "mymachine",
"software": "mysoftware_version",
"startDate": start_datetime,
"endDate": end_datetime,
"Sample_name": "Lysozyme Crystal",
"InstrumentBeam_incident_energy": (7000, "eV"), # partial validation
"Unknown_key": 42, # fails
}
expected = f"""<?xml version="1.0" encoding="UTF-8"?>
<tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true">
<tns:investigation>mx-123</tns:investigation>
<tns:instrument>id30a-3</tns:instrument>
<tns:name>Protein Crystal Diffraction Run</tns:name>
<tns:location>{os.path.normpath(path)}</tns:location>
<tns:startDate>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:startDate>
<tns:endDate>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:endDate>
<tns:sample>
<tns:name>Lysozyme Crystal</tns:name>
</tns:sample>
<tns:parameter>
<tns:name>Sample_name</tns:name>
<tns:value>Lysozyme Crystal</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>InstrumentBeam_incident_energy</tns:name>
<tns:value>7.0</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>beamlineID</tns:name>
<tns:value>id30a-3</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>datasetName</tns:name>
<tns:value>Protein Crystal Diffraction Run</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>proposal</tns:name>
<tns:value>mx-123</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>location</tns:name>
<tns:value>{os.path.normpath(path)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>startDate</tns:name>
<tns:value>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>endDate</tns:name>
<tns:value>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>machine</tns:name>
<tns:value>mymachine</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>software</tns:name>
<tns:value>mysoftware_version</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>complete</tns:name>
<tns:value>true</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>Unknown_key</tns:name>
<tns:value>42</tns:value>
</tns:parameter>
</tns:dataset>
"""
expected_message = _prepare_regex_pattern(r"""Unknown ICAT fields: ['Unknown_key']
Documentation on ICAT fields: __ICAT_DEF_URL__""")
with pytest.warns(errors.IcatMetadataValidationWarning, match=expected_message):
data = serialize_dataset_message(
beamline=beamline,
proposal=proposal,
dataset=dataset,
path=path,
metadata=metadata,
indent=" ",
)
assert data.decode("utf-8") == expected
with pytest.raises(errors.IcatMetadataValidationError, match=expected_message):
_ = serialize_dataset_message(
beamline=beamline,
proposal=proposal,
dataset=dataset,
path=path,
metadata=metadata,
indent=" ",
strict=True,
)
[docs]
def test_dataset_message_wrong_value():
proposal = "MX-123"
beamline = "ID30A-3"
start_datetime = datetime.datetime.now().astimezone()
end_datetime = start_datetime + datetime.timedelta(minutes=1)
path = "/data/run1"
dataset = "Protein Crystal Diffraction Run"
metadata = {
"machine": "mymachine",
"software": "mysoftware_version",
"startDate": start_datetime,
"endDate": end_datetime,
"Sample_name": "Lysozyme Crystal",
"InstrumentBeam_incident_energy": (7000, "eV"), # partial validation
"InstrumentBeam_distance": "wrong", # fails
}
expected = f"""<?xml version="1.0" encoding="UTF-8"?>
<tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true">
<tns:investigation>mx-123</tns:investigation>
<tns:instrument>id30a-3</tns:instrument>
<tns:name>Protein Crystal Diffraction Run</tns:name>
<tns:location>{os.path.normpath(path)}</tns:location>
<tns:startDate>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:startDate>
<tns:endDate>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:endDate>
<tns:sample>
<tns:name>Lysozyme Crystal</tns:name>
</tns:sample>
<tns:parameter>
<tns:name>Sample_name</tns:name>
<tns:value>Lysozyme Crystal</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>InstrumentBeam_incident_energy</tns:name>
<tns:value>7.0</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>beamlineID</tns:name>
<tns:value>id30a-3</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>datasetName</tns:name>
<tns:value>Protein Crystal Diffraction Run</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>proposal</tns:name>
<tns:value>mx-123</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>location</tns:name>
<tns:value>{os.path.normpath(path)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>startDate</tns:name>
<tns:value>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>endDate</tns:name>
<tns:value>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>machine</tns:name>
<tns:value>mymachine</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>software</tns:name>
<tns:value>mysoftware_version</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>complete</tns:name>
<tns:value>true</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>InstrumentBeam_distance</tns:name>
<tns:value>wrong</tns:value>
</tns:parameter>
</tns:dataset>
"""
expected_message = _prepare_regex_pattern(
r"""ValidationError: 1 validation error for IcatDatasetParameters
instrument.beam.distance (InstrumentBeam_distance)
Value error, could not convert string to float: 'wrong' [type=value_error, input_value='wrong', input_type=str]
For further information visit __VALUE_ERROR_URL__
Documentation on ICAT fields: __ICAT_DEF_URL__"""
)
with pytest.warns(errors.IcatMetadataValidationWarning, match=expected_message):
data = serialize_dataset_message(
beamline=beamline,
proposal=proposal,
dataset=dataset,
path=path,
metadata=metadata,
indent=" ",
)
assert data.decode("utf-8") == expected
with pytest.raises(errors.IcatMetadataValidationError):
_ = serialize_dataset_message(
beamline=beamline,
proposal=proposal,
dataset=dataset,
path=path,
metadata=metadata,
indent=" ",
strict=True,
)
[docs]
def test_dataset_message_xsd_only():
data = {
"datasetId": 1001,
"investigation": "MX-123",
"instrument": "ID30A-3",
"name": "Protein Crystal Diffraction Run",
"location": "/data/run1",
"startDate": "2026-03-08T09:18:49.678825+01:00",
"endDate": "2026-03-08T09:19:49.678825+01:00",
"sample": {
"name": "Lysozyme Crystal",
"type": "Protein Crystal",
"parameter": [
{"name": "temperature", "value": "100K"},
{"name": "buffer", "value": "NaCl 0.1M"},
],
},
"datafile": [
{
"location": "/data/run1/file1.cbf",
"size": 12345,
}
],
}
expected = """<?xml version="1.0" encoding="UTF-8"?>
<tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true">
<tns:datasetId>1001</tns:datasetId>
<tns:investigation>MX-123</tns:investigation>
<tns:instrument>ID30A-3</tns:instrument>
<tns:name>Protein Crystal Diffraction Run</tns:name>
<tns:location>/data/run1</tns:location>
<tns:startDate>2026-03-08T09:18:49.678825+01:00</tns:startDate>
<tns:endDate>2026-03-08T09:19:49.678825+01:00</tns:endDate>
<tns:sample>
<tns:name>Lysozyme Crystal</tns:name>
<tns:type>Protein Crystal</tns:type>
<tns:parameter>
<tns:name>temperature</tns:name>
<tns:value>100K</tns:value>
</tns:parameter>
<tns:parameter>
<tns:name>buffer</tns:name>
<tns:value>NaCl 0.1M</tns:value>
</tns:parameter>
</tns:sample>
<tns:datafile>
<tns:location>/data/run1/file1.cbf</tns:location>
<tns:size>12345</tns:size>
</tns:datafile>
</tns:dataset>
"""
data = raw_serialize_dataset_message(data, indent=" ")
assert data.decode("utf-8") == expected
[docs]
def test_investigation_message_xsd_only():
data = {
"experiment": "MX-123",
"instrument": "ID30A-3",
"startDate": "2026-03-08T09:18:49.678825+01:00",
"investigationId": 4521,
}
expected = """<?xml version="1.0" encoding="UTF-8"?>
<tns:investigation xmlns:tns="http://www.esrf.fr/icat">
<tns:experiment>MX-123</tns:experiment>
<tns:instrument>ID30A-3</tns:instrument>
<tns:startDate>2026-03-08T09:18:49.678825+01:00</tns:startDate>
<tns:investigationId>4521</tns:investigationId>
</tns:investigation>
"""
data = raw_serialize_investigation_message(data, indent=" ")
assert data.decode("utf-8") == expected
[docs]
def test_icat_serialize_warning(caplog):
metadata = {} # will cause a pydantic ValidationError about missing fields
expected_message = _prepare_regex_pattern(
r"""ValidationError: 6 validation errors for IcatDatasetParameters
title (datasetName)
Field required [type=missing, input_value={}, input_type=dict]
For further information visit __MISSING_URL__
proposal (proposal)
Field required [type=missing, input_value={}, input_type=dict]
For further information visit __MISSING_URL__
folder_path (location)
Field required [type=missing, input_value={}, input_type=dict]
For further information visit __MISSING_URL__
start_time (startDate)
Field required [type=missing, input_value={}, input_type=dict]
For further information visit __MISSING_URL__
end_time (endDate)
Field required [type=missing, input_value={}, input_type=dict]
For further information visit __MISSING_URL__
sample (sample)
Field required [type=missing, input_value={}, input_type=dict]
For further information visit __MISSING_URL__
Documentation on ICAT fields: __ICAT_DEF_URL__"""
)
with caplog.at_level(logging.WARNING, logger="pyicat_plus.client.serialize"):
with pytest.warns(errors.IcatMetadataValidationWarning, match=expected_message):
result = serialize_dataset_parameters(metadata)
assert result == metadata
messages = [
record.message
for record in caplog.records
if record.levelno == logging.WARNING and "ValidationError" in record.message
]
assert len(messages) == 1
assert re.match(expected_message, messages[0])
[docs]
def test_icat_serialize_error(caplog):
metadata = {} # will cause a pydantic ValidationError about missing fields
with pytest.raises(errors.IcatMetadataValidationError):
serialize_dataset_parameters(metadata, strict=True)
def _prepare_regex_pattern(template: str) -> str:
pydantic_missing_url = r"https://errors\.pydantic\.dev/.*/v/missing"
pydantic_value_error_url = r"https://errors\.pydantic\.dev/.*/v/value_error"
icat_def_url = r"https://icat-esrf-definitions\.readthedocs\.io/.*/icat\.html"
escaped = re.escape(template)
escaped = escaped.replace("__MISSING_URL__", pydantic_missing_url)
escaped = escaped.replace("__VALUE_ERROR_URL__", pydantic_value_error_url)
escaped = escaped.replace("__ICAT_DEF_URL__", icat_def_url)
return escaped