Source code for pyicat_plus.tests.icat_messaging.test_dataset

import datetime
import logging
import os
import re

import pytest

from ... import errors
from ..._icat_messaging.serializers.dataset import raw_serialize_dataset_message
from ..._icat_messaging.serializers.dataset import serialize_dataset_message
from ..._icat_messaging.serializers.dataset_parameters.main import (
    serialize_dataset_parameters,
)
from ..._icat_messaging.serializers.investigation import (
    raw_serialize_investigation_message,
)
from ..._icat_messaging.utils import datetime_utils


[docs] def test_dataset_message(): proposal = "MX-123" beamline = "ID30A-3" start_datetime = datetime.datetime.now().astimezone() end_datetime = start_datetime + datetime.timedelta(minutes=1) path = "/data/run1/" dataset = "Protein Crystal Diffraction Run" metadata = { "machine": "mymachine", "software": "mysoftware_version", "startDate": start_datetime, "endDate": end_datetime, "Sample_name": "Lysozyme Crystal", } expected = f"""<?xml version="1.0" encoding="UTF-8"?> <tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true"> <tns:investigation>mx-123</tns:investigation> <tns:instrument>id30a-3</tns:instrument> <tns:name>Protein Crystal Diffraction Run</tns:name> <tns:location>{os.path.normpath(path)}</tns:location> <tns:startDate>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:startDate> <tns:endDate>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:endDate> <tns:sample> <tns:name>Lysozyme Crystal</tns:name> </tns:sample> <tns:parameter> <tns:name>Sample_name</tns:name> <tns:value>Lysozyme Crystal</tns:value> </tns:parameter> <tns:parameter> <tns:name>beamlineID</tns:name> <tns:value>id30a-3</tns:value> </tns:parameter> <tns:parameter> <tns:name>datasetName</tns:name> <tns:value>Protein Crystal Diffraction Run</tns:value> </tns:parameter> <tns:parameter> <tns:name>proposal</tns:name> <tns:value>mx-123</tns:value> </tns:parameter> <tns:parameter> <tns:name>location</tns:name> <tns:value>{os.path.normpath(path)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>startDate</tns:name> <tns:value>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>endDate</tns:name> <tns:value>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>machine</tns:name> <tns:value>mymachine</tns:value> </tns:parameter> <tns:parameter> <tns:name>software</tns:name> <tns:value>mysoftware_version</tns:value> </tns:parameter> <tns:parameter> <tns:name>complete</tns:name> <tns:value>true</tns:value> </tns:parameter> </tns:dataset> """ data = serialize_dataset_message( beamline=beamline, proposal=proposal, dataset=dataset, path=path, metadata=metadata, indent=" ", ) assert data.decode("utf-8") == expected
[docs] def test_dataset_message_unknown_parameter(): proposal = "MX-123" beamline = "ID30A-3" start_datetime = datetime.datetime.now().astimezone() end_datetime = start_datetime + datetime.timedelta(minutes=1) path = "/data/run1" dataset = "Protein Crystal Diffraction Run" metadata = { "machine": "mymachine", "software": "mysoftware_version", "startDate": start_datetime, "endDate": end_datetime, "Sample_name": "Lysozyme Crystal", "InstrumentBeam_incident_energy": (7000, "eV"), # partial validation "Unknown_key": 42, # fails } expected = f"""<?xml version="1.0" encoding="UTF-8"?> <tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true"> <tns:investigation>mx-123</tns:investigation> <tns:instrument>id30a-3</tns:instrument> <tns:name>Protein Crystal Diffraction Run</tns:name> <tns:location>{os.path.normpath(path)}</tns:location> <tns:startDate>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:startDate> <tns:endDate>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:endDate> <tns:sample> <tns:name>Lysozyme Crystal</tns:name> </tns:sample> <tns:parameter> <tns:name>Sample_name</tns:name> <tns:value>Lysozyme Crystal</tns:value> </tns:parameter> <tns:parameter> <tns:name>InstrumentBeam_incident_energy</tns:name> <tns:value>7.0</tns:value> </tns:parameter> <tns:parameter> <tns:name>beamlineID</tns:name> <tns:value>id30a-3</tns:value> </tns:parameter> <tns:parameter> <tns:name>datasetName</tns:name> <tns:value>Protein Crystal Diffraction Run</tns:value> </tns:parameter> <tns:parameter> <tns:name>proposal</tns:name> <tns:value>mx-123</tns:value> </tns:parameter> <tns:parameter> <tns:name>location</tns:name> <tns:value>{os.path.normpath(path)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>startDate</tns:name> <tns:value>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>endDate</tns:name> <tns:value>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>machine</tns:name> <tns:value>mymachine</tns:value> </tns:parameter> <tns:parameter> <tns:name>software</tns:name> <tns:value>mysoftware_version</tns:value> </tns:parameter> <tns:parameter> <tns:name>complete</tns:name> <tns:value>true</tns:value> </tns:parameter> <tns:parameter> <tns:name>Unknown_key</tns:name> <tns:value>42</tns:value> </tns:parameter> </tns:dataset> """ expected_message = _prepare_regex_pattern(r"""Unknown ICAT fields: ['Unknown_key'] Documentation on ICAT fields: __ICAT_DEF_URL__""") with pytest.warns(errors.IcatMetadataValidationWarning, match=expected_message): data = serialize_dataset_message( beamline=beamline, proposal=proposal, dataset=dataset, path=path, metadata=metadata, indent=" ", ) assert data.decode("utf-8") == expected with pytest.raises(errors.IcatMetadataValidationError, match=expected_message): _ = serialize_dataset_message( beamline=beamline, proposal=proposal, dataset=dataset, path=path, metadata=metadata, indent=" ", strict=True, )
[docs] def test_dataset_message_wrong_value(): proposal = "MX-123" beamline = "ID30A-3" start_datetime = datetime.datetime.now().astimezone() end_datetime = start_datetime + datetime.timedelta(minutes=1) path = "/data/run1" dataset = "Protein Crystal Diffraction Run" metadata = { "machine": "mymachine", "software": "mysoftware_version", "startDate": start_datetime, "endDate": end_datetime, "Sample_name": "Lysozyme Crystal", "InstrumentBeam_incident_energy": (7000, "eV"), # partial validation "InstrumentBeam_distance": "wrong", # fails } expected = f"""<?xml version="1.0" encoding="UTF-8"?> <tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true"> <tns:investigation>mx-123</tns:investigation> <tns:instrument>id30a-3</tns:instrument> <tns:name>Protein Crystal Diffraction Run</tns:name> <tns:location>{os.path.normpath(path)}</tns:location> <tns:startDate>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:startDate> <tns:endDate>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:endDate> <tns:sample> <tns:name>Lysozyme Crystal</tns:name> </tns:sample> <tns:parameter> <tns:name>Sample_name</tns:name> <tns:value>Lysozyme Crystal</tns:value> </tns:parameter> <tns:parameter> <tns:name>InstrumentBeam_incident_energy</tns:name> <tns:value>7.0</tns:value> </tns:parameter> <tns:parameter> <tns:name>beamlineID</tns:name> <tns:value>id30a-3</tns:value> </tns:parameter> <tns:parameter> <tns:name>datasetName</tns:name> <tns:value>Protein Crystal Diffraction Run</tns:value> </tns:parameter> <tns:parameter> <tns:name>proposal</tns:name> <tns:value>mx-123</tns:value> </tns:parameter> <tns:parameter> <tns:name>location</tns:name> <tns:value>{os.path.normpath(path)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>startDate</tns:name> <tns:value>{datetime_utils.with_timezone_zulu_format(start_datetime)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>endDate</tns:name> <tns:value>{datetime_utils.with_timezone_zulu_format(end_datetime)}</tns:value> </tns:parameter> <tns:parameter> <tns:name>machine</tns:name> <tns:value>mymachine</tns:value> </tns:parameter> <tns:parameter> <tns:name>software</tns:name> <tns:value>mysoftware_version</tns:value> </tns:parameter> <tns:parameter> <tns:name>complete</tns:name> <tns:value>true</tns:value> </tns:parameter> <tns:parameter> <tns:name>InstrumentBeam_distance</tns:name> <tns:value>wrong</tns:value> </tns:parameter> </tns:dataset> """ expected_message = _prepare_regex_pattern( r"""ValidationError: 1 validation error for IcatDatasetParameters instrument.beam.distance (InstrumentBeam_distance) Value error, could not convert string to float: 'wrong' [type=value_error, input_value='wrong', input_type=str] For further information visit __VALUE_ERROR_URL__ Documentation on ICAT fields: __ICAT_DEF_URL__""" ) with pytest.warns(errors.IcatMetadataValidationWarning, match=expected_message): data = serialize_dataset_message( beamline=beamline, proposal=proposal, dataset=dataset, path=path, metadata=metadata, indent=" ", ) assert data.decode("utf-8") == expected with pytest.raises(errors.IcatMetadataValidationError): _ = serialize_dataset_message( beamline=beamline, proposal=proposal, dataset=dataset, path=path, metadata=metadata, indent=" ", strict=True, )
[docs] def test_dataset_message_xsd_only(): data = { "datasetId": 1001, "investigation": "MX-123", "instrument": "ID30A-3", "name": "Protein Crystal Diffraction Run", "location": "/data/run1", "startDate": "2026-03-08T09:18:49.678825+01:00", "endDate": "2026-03-08T09:19:49.678825+01:00", "sample": { "name": "Lysozyme Crystal", "type": "Protein Crystal", "parameter": [ {"name": "temperature", "value": "100K"}, {"name": "buffer", "value": "NaCl 0.1M"}, ], }, "datafile": [ { "location": "/data/run1/file1.cbf", "size": 12345, } ], } expected = """<?xml version="1.0" encoding="UTF-8"?> <tns:dataset xmlns:tns="http://www.esrf.fr/icat" complete="true"> <tns:datasetId>1001</tns:datasetId> <tns:investigation>MX-123</tns:investigation> <tns:instrument>ID30A-3</tns:instrument> <tns:name>Protein Crystal Diffraction Run</tns:name> <tns:location>/data/run1</tns:location> <tns:startDate>2026-03-08T09:18:49.678825+01:00</tns:startDate> <tns:endDate>2026-03-08T09:19:49.678825+01:00</tns:endDate> <tns:sample> <tns:name>Lysozyme Crystal</tns:name> <tns:type>Protein Crystal</tns:type> <tns:parameter> <tns:name>temperature</tns:name> <tns:value>100K</tns:value> </tns:parameter> <tns:parameter> <tns:name>buffer</tns:name> <tns:value>NaCl 0.1M</tns:value> </tns:parameter> </tns:sample> <tns:datafile> <tns:location>/data/run1/file1.cbf</tns:location> <tns:size>12345</tns:size> </tns:datafile> </tns:dataset> """ data = raw_serialize_dataset_message(data, indent=" ") assert data.decode("utf-8") == expected
[docs] def test_investigation_message_xsd_only(): data = { "experiment": "MX-123", "instrument": "ID30A-3", "startDate": "2026-03-08T09:18:49.678825+01:00", "investigationId": 4521, } expected = """<?xml version="1.0" encoding="UTF-8"?> <tns:investigation xmlns:tns="http://www.esrf.fr/icat"> <tns:experiment>MX-123</tns:experiment> <tns:instrument>ID30A-3</tns:instrument> <tns:startDate>2026-03-08T09:18:49.678825+01:00</tns:startDate> <tns:investigationId>4521</tns:investigationId> </tns:investigation> """ data = raw_serialize_investigation_message(data, indent=" ") assert data.decode("utf-8") == expected
[docs] def test_icat_serialize_warning(caplog): metadata = {} # will cause a pydantic ValidationError about missing fields expected_message = _prepare_regex_pattern( r"""ValidationError: 6 validation errors for IcatDatasetParameters title (datasetName) Field required [type=missing, input_value={}, input_type=dict] For further information visit __MISSING_URL__ proposal (proposal) Field required [type=missing, input_value={}, input_type=dict] For further information visit __MISSING_URL__ folder_path (location) Field required [type=missing, input_value={}, input_type=dict] For further information visit __MISSING_URL__ start_time (startDate) Field required [type=missing, input_value={}, input_type=dict] For further information visit __MISSING_URL__ end_time (endDate) Field required [type=missing, input_value={}, input_type=dict] For further information visit __MISSING_URL__ sample (sample) Field required [type=missing, input_value={}, input_type=dict] For further information visit __MISSING_URL__ Documentation on ICAT fields: __ICAT_DEF_URL__""" ) with caplog.at_level(logging.WARNING, logger="pyicat_plus.client.serialize"): with pytest.warns(errors.IcatMetadataValidationWarning, match=expected_message): result = serialize_dataset_parameters(metadata) assert result == metadata messages = [ record.message for record in caplog.records if record.levelno == logging.WARNING and "ValidationError" in record.message ] assert len(messages) == 1 assert re.match(expected_message, messages[0])
[docs] def test_icat_serialize_error(caplog): metadata = {} # will cause a pydantic ValidationError about missing fields with pytest.raises(errors.IcatMetadataValidationError): serialize_dataset_parameters(metadata, strict=True)
def _prepare_regex_pattern(template: str) -> str: pydantic_missing_url = r"https://errors\.pydantic\.dev/.*/v/missing" pydantic_value_error_url = r"https://errors\.pydantic\.dev/.*/v/value_error" icat_def_url = r"https://icat-esrf-definitions\.readthedocs\.io/.*/icat\.html" escaped = re.escape(template) escaped = escaped.replace("__MISSING_URL__", pydantic_missing_url) escaped = escaped.replace("__VALUE_ERROR_URL__", pydantic_value_error_url) escaped = escaped.replace("__ICAT_DEF_URL__", icat_def_url) return escaped