Module psdi_data_conversion.testing.utils

utils.py

This module defines general classes and methods used for unit tests.

Functions

def check_file_match(filename: str, ex_filename: str) ‑> str
Expand source code
def check_file_match(filename: str, ex_filename: str) -> str:
    """Check that the contents of two files match without worrying about whitespace or negligible numerical differences.
    """

    # Read in both files
    text = open(filename, "r").read()
    ex_text = open(get_path_in_project(ex_filename), "r").read()

    # We want to check they're the same without worrying about whitespace (which doesn't matter for this format),
    # so we accomplish this by using the string's `split` method, which splits on whitespace by default
    l_words, l_ex_words = text.split(), ex_text.split()

    # And we also want to avoid spurious false negatives from numerical comparisons (such as one file having
    # negative zero and the other positive zero - yes, this happened), so we convert words to floats if possible

    # We allow greater tolerance for numerical inaccuracy on platforms other than Linux, which is where the expected
    # files were originally created
    rel_tol = 0.001
    abs_tol = 1e-6
    if get_dist() != LINUX_LABEL:
        rel_tol = 0.2
        abs_tol = 0.01

    for word, ex_word in zip(l_words, l_ex_words):
        try:
            val, ex_val = float(word), float(ex_word)

            if not isclose(val, ex_val, rel_tol=rel_tol, abs_tol=abs_tol):
                return (f"File comparison failed: {val} != {ex_val} with rel_tol={rel_tol} and abs_tol={abs_tol} "
                        f"when comparing files {filename} and {ex_filename}")
        except ValueError:
            # If it can't be converted to a float, treat it as a string and require an exact match
            if not word == ex_word:
                return f"File comparison failed: {word} != {ex_word} when comparing files {filename} and {ex_filename}"
    return ""

Check that the contents of two files match without worrying about whitespace or negligible numerical differences.

def get_input_test_data_loc()
Expand source code
def get_input_test_data_loc():
    """Get the realpath of the base directory containing input data for tests"""
    return get_path_in_project(INPUT_TEST_DATA_LOC_IN_PROJECT)

Get the realpath of the base directory containing input data for tests

def get_output_test_data_loc()
Expand source code
def get_output_test_data_loc():
    """Get the realpath of the base directory containing expected output data for tests"""
    return get_path_in_project(OUTPUT_TEST_DATA_LOC_IN_PROJECT)

Get the realpath of the base directory containing expected output data for tests

def get_path_in_project(filename)
Expand source code
def get_path_in_project(filename):
    """Get the realpath to a file contained within the project, given its project-relative path"""

    abs_path = os.path.abspath(os.path.join(get_project_path(), filename))

    return abs_path

Get the realpath to a file contained within the project, given its project-relative path

def get_project_path() ‑> str
Expand source code
@lru_cache(maxsize=1)
def get_project_path() -> str:
    """Gets the absolute path to where the project is on disk, using the package path to find it and checking that it
    contains the expected files

    Returns
    -------
    str
    """

    project_path = os.path.abspath(os.path.join(get_package_path(), ".."))

    # Check that the project path contains the expected test_data folder
    if not os.path.isdir(os.path.join(project_path, TEST_DATA_LOC_IN_PROJECT)):
        raise FileNotFoundError(f"Project path was expected to be '{project_path}', but this does not contain the "
                                f"expected directory '{TEST_DATA_LOC_IN_PROJECT}'")

    return project_path

Gets the absolute path to where the project is on disk, using the package path to find it and checking that it contains the expected files

Returns

str
 
def get_test_data_loc()
Expand source code
def get_test_data_loc():
    """Get the realpath of the base directory containing all data for tests"""
    return get_path_in_project(TEST_DATA_LOC_IN_PROJECT)

Get the realpath of the base directory containing all data for tests

def run_converter_through_cla(filename: str,
to_format: str,
name: str,
input_dir: str,
output_dir: str,
log_file: str,
from_format: str | None = None,
**conversion_kwargs)
Expand source code
def run_converter_through_cla(filename: str,
                              to_format: str,
                              name: str,
                              input_dir: str,
                              output_dir: str,
                              log_file: str,
                              from_format: str | None = None,
                              **conversion_kwargs):
    """Runs a test conversion through the command-line interface

    This function constructs an argument string to be passed to the script, which is called with the
    `run_with_arg_string` function defined below.

    Parameters
    ----------
    filename : str
        The (unqualified) name of the input file to be converted
    to_format : str
        The format to convert the input file to
    name : str
        The name of the converter to use
    input_dir : str
        The directory which contains the input file
    output_dir : str
        The directory which contains the output file
    log_file : str
        The desired name of the log file
    conversion_kwargs : Any
        Additional arguments describing the conversion
    from_format : str | None
        The format of the input file, when it needs to be explicitly specified, otherwise None
    """

    # Start the argument string with the arguments we will always include
    arg_string = f"{filename} -i {input_dir} -t {to_format} -o {output_dir} -w {name} --log-file {log_file}"

    # For from_format and each argument in the conversion kwargs, convert it to the appropriate argument to be provided
    # to the argument string

    if from_format:
        arg_string += f" -f {from_format}"

    for key, val in conversion_kwargs.items():
        if key == "log_mode":
            if val == LOG_NONE:
                arg_string += " -q"
            else:
                arg_string += f" --log-mode {val}"
        elif key == "delete_input":
            if val:
                arg_string += " --delete-input"
        elif key == "strict":
            if val:
                arg_string += " --strict"
        elif key == "max_file_size":
            if val != 0:
                pytest.fail("Test specification imposes a maximum file size, which isn't compatible with the "
                            "command-line application.")
        elif key == "data":
            for subkey, subval in val.items():
                if subkey == "from_flags":
                    arg_string += f" --from-flags {subval}"
                elif subkey == "to_flags":
                    arg_string += f" --to-flags {subval}"
                elif subkey == "from_options":
                    arg_string += f" --from-options '{subval}'"
                elif subkey == "to_options":
                    arg_string += f" --to-options '{subval}'"
                elif subkey == COORD_GEN_KEY:
                    arg_string += f" --coord-gen {subval}"
                    if COORD_GEN_QUAL_KEY in val:
                        arg_string += f" {val[COORD_GEN_QUAL_KEY]}"
                elif subkey == COORD_GEN_QUAL_KEY:
                    # Handled alongside COORD_GEN_KEY above
                    pass
                else:
                    pytest.fail(f"The key 'data[\"{subkey}\"]' was passed to `conversion_kwargs` but could not be "
                                "interpreted")
        else:
            pytest.fail(f"The key '{key}' was passed to `conversion_kwargs` but could not be interpreted")

    run_with_arg_string(arg_string)

Runs a test conversion through the command-line interface

This function constructs an argument string to be passed to the script, which is called with the run_with_arg_string() function defined below.

Parameters

filename : str
The (unqualified) name of the input file to be converted
to_format : str
The format to convert the input file to
name : str
The name of the converter to use
input_dir : str
The directory which contains the input file
output_dir : str
The directory which contains the output file
log_file : str
The desired name of the log file
conversion_kwargs : Any
Additional arguments describing the conversion
from_format : str | None
The format of the input file, when it needs to be explicitly specified, otherwise None
def run_test_conversion_with_cla(test_spec: ConversionTestSpec)
Expand source code
def run_test_conversion_with_cla(test_spec: ConversionTestSpec):
    """Runs a test conversion or series thereof through the command-line application.

    Parameters
    ----------
    test_spec : ConversionTestSpec
        The specification for the test or series of tests to be run
    """
    # Make temporary directories for the input and output files to be stored in
    with TemporaryDirectory("_input") as input_dir, TemporaryDirectory("_output") as output_dir:
        # Iterate over the test spec to run each individual test it defines
        for single_test_spec in test_spec:
            if single_test_spec.skip:
                print(f"Skipping single test spec {single_test_spec}")
                continue
            print(f"Running single test spec: {single_test_spec}")
            _run_single_test_conversion_with_cla(test_spec=single_test_spec,
                                                 input_dir=input_dir,
                                                 output_dir=output_dir)
            print(f"Success for test spec: {single_test_spec}")

Runs a test conversion or series thereof through the command-line application.

Parameters

test_spec : ConversionTestSpec
The specification for the test or series of tests to be run
def run_test_conversion_with_library(test_spec: ConversionTestSpec)
Expand source code
def run_test_conversion_with_library(test_spec: ConversionTestSpec):
    """Runs a test conversion or series thereof through a call to the python library's `run_converter` function.

    Parameters
    ----------
    test_spec : ConversionTestSpec
        The specification for the test or series of tests to be run
    """
    # Make temporary directories for the input and output files to be stored in
    with TemporaryDirectory("_input") as input_dir, TemporaryDirectory("_output") as output_dir:
        # Iterate over the test spec to run each individual test it defines
        for single_test_spec in test_spec:
            if single_test_spec.skip:
                print(f"Skipping single test spec {single_test_spec}")
                continue
            print(f"Running single test spec: {single_test_spec}")
            _run_single_test_conversion_with_library(test_spec=single_test_spec,
                                                     input_dir=input_dir,
                                                     output_dir=output_dir)
            print(f"Success for test spec: {single_test_spec}")

Runs a test conversion or series thereof through a call to the python library's run_converter function.

Parameters

test_spec : ConversionTestSpec
The specification for the test or series of tests to be run
def run_with_arg_string(arg_string: str)
Expand source code
def run_with_arg_string(arg_string: str):
    """Runs the convert script with the provided argument string
    """
    l_args = shlex.split("test " + arg_string)
    with patch.object(sys, 'argv', l_args):
        data_convert_main()

Runs the convert script with the provided argument string

Classes

class ConversionTestInfo (run_type: str,
test_spec: SingleConversionTestSpec,
input_dir: str,
output_dir: str,
success: bool = True,
captured_stdout: str | None = None,
captured_stderr: str | None = None,
exc_info: pytest.ExceptionInfo | None = None)
Expand source code
@dataclass
class ConversionTestInfo:
    """Information about a tested conversion."""

    run_type: str
    """One of "library", "cla", or "gui", describing which type of test run was performed"""

    test_spec: SingleConversionTestSpec
    """The specification of the test conversion which was run to produce this"""

    input_dir: str
    """The directory used to store input data for the test"""

    output_dir: str
    """The directory used to create output data in for the test"""

    success: bool = True
    """Whether or not the conversion was successful"""

    captured_stdout: str | None = None
    """Any output to stdout while the test was run"""

    captured_stderr: str | None = None
    """Any output to stderr while the test was run"""

    exc_info: pytest.ExceptionInfo | None = None
    """If the test conversion raised an exception, that exception's info, otherwise None"""

    @property
    def qualified_in_filename(self):
        """Get the fully-qualified name of the input file"""
        return os.path.realpath(os.path.join(self.input_dir, self.test_spec.filename))

    @property
    def qualified_out_filename(self):
        """Get the fully-qualified name of the output file"""
        return os.path.realpath(os.path.join(self.output_dir, self.test_spec.out_filename))

    @property
    def qualified_log_filename(self):
        """Get the fully-qualified name of the log file"""
        return os.path.realpath(os.path.join(self.output_dir, self.test_spec.log_filename))

    @property
    def qualified_global_log_filename(self):
        """Get the fully-qualified name of the log file"""
        return self.test_spec.global_log_filename

Information about a tested conversion.

Instance variables

var captured_stderr : str | None

Any output to stderr while the test was run

var captured_stdout : str | None

Any output to stdout while the test was run

var exc_info : _pytest._code.code.ExceptionInfo | None

If the test conversion raised an exception, that exception's info, otherwise None

var input_dir : str

The directory used to store input data for the test

var output_dir : str

The directory used to create output data in for the test

prop qualified_global_log_filename
Expand source code
@property
def qualified_global_log_filename(self):
    """Get the fully-qualified name of the log file"""
    return self.test_spec.global_log_filename

Get the fully-qualified name of the log file

prop qualified_in_filename
Expand source code
@property
def qualified_in_filename(self):
    """Get the fully-qualified name of the input file"""
    return os.path.realpath(os.path.join(self.input_dir, self.test_spec.filename))

Get the fully-qualified name of the input file

prop qualified_log_filename
Expand source code
@property
def qualified_log_filename(self):
    """Get the fully-qualified name of the log file"""
    return os.path.realpath(os.path.join(self.output_dir, self.test_spec.log_filename))

Get the fully-qualified name of the log file

prop qualified_out_filename
Expand source code
@property
def qualified_out_filename(self):
    """Get the fully-qualified name of the output file"""
    return os.path.realpath(os.path.join(self.output_dir, self.test_spec.out_filename))

Get the fully-qualified name of the output file

var run_type : str

One of "library", "cla", or "gui", describing which type of test run was performed

var success : bool

Whether or not the conversion was successful

var test_specSingleConversionTestSpec

The specification of the test conversion which was run to produce this

class ConversionTestSpec (name: str,
filename: str | Iterable[str] = 'nacl.cif',
to_format: str | int | Iterable[str | int] = 'pdb',
from_format: str | int | Iterable[str | int] | None = None,
converter_name: str | Iterable[str] = 'Open Babel',
conversion_kwargs: dict[str, Any] | Iterable[dict[str, Any]] = <factory>,
expect_success: bool | Iterable[bool] = True,
skip: bool | Iterable[bool] = False,
callback: Callable[[ConversionTestInfo], str] | Iterable[Callable[[ConversionTestInfo], str]] | None = None,
compatible_with_library: bool = True,
compatible_with_cla: bool = True,
compatible_with_gui: bool = True)
Expand source code
@dataclass
class ConversionTestSpec:
    """Class providing a specification for a test file conversion.

    All attributes of this class can be provided either as a single value or a list of values. In the case that a list
    is provided for one or more attributes, the lists must all be the same length, and they will be iterated through
    (as if using zip on the multiple lists) to test each element in turn.
    """

    name: str
    """The name of this test specification"""

    filename: str | Iterable[str] = "nacl.cif"
    """The name of the input file, relative to the input test data location, or a list thereof"""

    to_format: str | int | Iterable[str | int] = "pdb"
    """The format to test converting the input file to, or a list thereof"""

    from_format: str | int | Iterable[str | int] | None = None
    """The format of the input file, when it needs to be explicitly specified"""

    converter_name: str | Iterable[str] = CONVERTER_DEFAULT
    """The name of the converter to be used for the test, or a list thereof"""

    conversion_kwargs: dict[str, Any] | Iterable[dict[str, Any]] = field(default_factory=dict)
    """Any keyword arguments to be provided to the call to `run_converter`, aside from those listed above, or a list
    thereof"""

    expect_success: bool | Iterable[bool] = True
    """Whether or not to expect the test to succeed"""

    skip: bool | Iterable[bool] = False
    """If set to true, this test will be skipped and not run. Can also be set individually for certain tests within an
    array. This should typically only be used when debugging to skip working tests to more easily focus on non-working
    tests"""

    callback: (Callable[[ConversionTestInfo], str] |
               Iterable[Callable[[ConversionTestInfo], str]] | None) = None
    """Function to be called after the conversion is performed to check in detail whether results are as expected. It
    should take as its only argument a `ConversionTestInfo` and return a string. The string should be empty if the check
    is passed and should explain the failure otherwise."""

    compatible_with_library: bool = True
    """Whether or not this test spec is compatible with being run through the Python library, default True"""

    compatible_with_cla: bool = True
    """Whether or not this test spec is compatible with being run through the command-line application, default True"""

    compatible_with_gui: bool = True
    """Whether or not this test spec is compatible with being run through the GUI, default True"""

    def __post_init__(self):
        """Regularize the lengths of all attribute lists, in case some were provided as single values and others as
        lists, and set up initial values
        """

        # To ease maintainability, we get the list of this class's attributes automatically from its __dict__, excluding
        # any which start with an underscore
        self._l_attr_names: list[str] = [attr_name for attr_name in self.__dict__ if
                                         not (attr_name.startswith("_") or
                                              attr_name == "name" or
                                              attr_name.startswith("compatible"))]

        l_single_val_attrs = []
        self._len: int = 1

        # Check if each attribute of this class is provided as a list, and if any are, make sure that all lists are
        # the same length
        for attr_name in self._l_attr_names:
            val = getattr(self, attr_name)

            val_len = 1

            # Check first if the attr is a str or a dict, which are iterable, but are single-values for the purpose
            # of values here
            if isinstance(val, (str, dict)):
                l_single_val_attrs.append(attr_name)
            else:
                # It's not a str or a dict, so test if we can get the length of it, which indicates it is iterable
                try:
                    val_len = len(val)
                    # If it's a single value in a list, unpack it for now
                    if val_len == 1:
                        # Pylint for some reason thinks `Any` objects aren't subscriptable, but here we know it is
                        val: Iterable[Any]
                        setattr(self, attr_name, val[0])
                except TypeError:
                    l_single_val_attrs.append(attr_name)

            # Check if there are any conflicts with some lists being provided as different lengths
            if (self._len > 1) and (val_len > 1) and (val_len != self._len):
                raise ValueError("All lists of values which are set as attributes for a `ConversionTestSpec` must be "
                                 "the same length.")
            if val_len > 1:
                self._len = val_len

        # At this point, self._len will be either 1 if all attrs are single values, or the length of the lists for attrs
        # that aren't. To keep everything regularised, we make everything a list of this length
        for attr_name in self._l_attr_names:
            if attr_name in l_single_val_attrs:
                setattr(self, attr_name, [getattr(self, attr_name)]*self._len)

        # Check if all tests should be skipped
        self.skip_all = all(self.skip)

    def __len__(self):
        """Get the length from the member - valid only after `__post_init__` has been called"""
        return self._len

    def __iter__(self):
        """Allow to iterate over the class, getting a `SingleConversionTestSpec` for each value
        """
        l_l_attr_vals = zip(*[getattr(self, attr_name) for attr_name in self._l_attr_names])
        for l_attr_vals in l_l_attr_vals:
            yield SingleConversionTestSpec(**dict(zip(self._l_attr_names, l_attr_vals)))

Class providing a specification for a test file conversion.

All attributes of this class can be provided either as a single value or a list of values. In the case that a list is provided for one or more attributes, the lists must all be the same length, and they will be iterated through (as if using zip on the multiple lists) to test each element in turn.

Instance variables

var callback : collections.abc.Callable[[ConversionTestInfo], str] | collections.abc.Iterable[collections.abc.Callable[[ConversionTestInfo], str]] | None

Function to be called after the conversion is performed to check in detail whether results are as expected. It should take as its only argument a ConversionTestInfo and return a string. The string should be empty if the check is passed and should explain the failure otherwise.

var compatible_with_cla : bool

Whether or not this test spec is compatible with being run through the command-line application, default True

var compatible_with_gui : bool

Whether or not this test spec is compatible with being run through the GUI, default True

var compatible_with_library : bool

Whether or not this test spec is compatible with being run through the Python library, default True

var conversion_kwargs : dict[str, typing.Any] | collections.abc.Iterable[dict[str, typing.Any]]

Any keyword arguments to be provided to the call to run_converter, aside from those listed above, or a list thereof

var converter_name : str | collections.abc.Iterable[str]

The name of the converter to be used for the test, or a list thereof

var expect_success : bool | collections.abc.Iterable[bool]

Whether or not to expect the test to succeed

var filename : str | collections.abc.Iterable[str]

The name of the input file, relative to the input test data location, or a list thereof

var from_format : str | int | collections.abc.Iterable[str | int] | None

The format of the input file, when it needs to be explicitly specified

var name : str

The name of this test specification

var skip : bool | collections.abc.Iterable[bool]

If set to true, this test will be skipped and not run. Can also be set individually for certain tests within an array. This should typically only be used when debugging to skip working tests to more easily focus on non-working tests

var to_format : str | int | collections.abc.Iterable[str | int]

The format to test converting the input file to, or a list thereof

class SingleConversionTestSpec (filename: str,
to_format: str | int,
from_format: str | int | None = None,
converter_name: str | Iterable[str] = 'Open Babel',
conversion_kwargs: dict[str, Any] = <factory>,
expect_success: bool = True,
skip: bool = False,
callback: Callable[[ConversionTestInfo], str] | None = None)
Expand source code
@dataclass
class SingleConversionTestSpec:
    """Class providing a specification for a single test file conversion, produced by iterating over a
    `ConversionTestSpec` object
    """

    filename: str
    """The name of the input file, relative to the input test data location"""

    to_format: str | int
    """The format to test converting the input file to"""

    from_format: str | int | None = None
    """The format of the input file, when it needs to be explicitly specified"""

    converter_name: str | Iterable[str] = CONVERTER_DEFAULT
    """The name of the converter to be used for the test"""

    conversion_kwargs: dict[str, Any] = field(default_factory=dict)
    """Any keyword arguments to be provided to the call to `run_converter`, aside from those listed above and
    `input_dir` and `output_dir` (for which temporary directories are used)"""

    expect_success: bool = True
    """Whether or not to expect the test to succeed"""

    skip: bool = False
    """If set to True, this test will be skipped, always returning success"""

    callback: (Callable[[ConversionTestInfo], str] | None) = None
    """Function to be called after the conversion is performed to check in detail whether results are as expected. It
    should take as its only argument a `ConversionTestInfo` and return a string. The string should be empty if the check
    is passed and should explain the failure otherwise."""

    @property
    def out_filename(self) -> str:
        """The unqualified name of the output file which should have been created by the conversion."""
        to_format_name = get_format_info(self.to_format, which=0).name
        if not is_archive(self.filename):
            return f"{os.path.splitext(self.filename)[0]}.{to_format_name}"
        else:
            filename_base, ext = split_archive_ext(os.path.basename(self.filename))
            return f"{filename_base}-{to_format_name}{ext}"

    @property
    def log_filename(self) -> str:
        """The unqualified name of the log file which should have been created by the conversion."""
        return f"{split_archive_ext(self.filename)[0]}{OUTPUT_LOG_EXT}"

    @property
    def global_log_filename(self) -> str:
        """The unqualified name of the global log file which stores info on all conversions."""
        return GLOBAL_LOG_FILENAME

Class providing a specification for a single test file conversion, produced by iterating over a ConversionTestSpec object

Instance variables

var callback : collections.abc.Callable[[ConversionTestInfo], str] | None

Function to be called after the conversion is performed to check in detail whether results are as expected. It should take as its only argument a ConversionTestInfo and return a string. The string should be empty if the check is passed and should explain the failure otherwise.

var conversion_kwargs : dict[str, typing.Any]

Any keyword arguments to be provided to the call to run_converter, aside from those listed above and input_dir and output_dir (for which temporary directories are used)

var converter_name : str | collections.abc.Iterable[str]

The name of the converter to be used for the test

var expect_success : bool

Whether or not to expect the test to succeed

var filename : str

The name of the input file, relative to the input test data location

var from_format : str | int | None

The format of the input file, when it needs to be explicitly specified

prop global_log_filename : str
Expand source code
@property
def global_log_filename(self) -> str:
    """The unqualified name of the global log file which stores info on all conversions."""
    return GLOBAL_LOG_FILENAME

The unqualified name of the global log file which stores info on all conversions.

prop log_filename : str
Expand source code
@property
def log_filename(self) -> str:
    """The unqualified name of the log file which should have been created by the conversion."""
    return f"{split_archive_ext(self.filename)[0]}{OUTPUT_LOG_EXT}"

The unqualified name of the log file which should have been created by the conversion.

prop out_filename : str
Expand source code
@property
def out_filename(self) -> str:
    """The unqualified name of the output file which should have been created by the conversion."""
    to_format_name = get_format_info(self.to_format, which=0).name
    if not is_archive(self.filename):
        return f"{os.path.splitext(self.filename)[0]}.{to_format_name}"
    else:
        filename_base, ext = split_archive_ext(os.path.basename(self.filename))
        return f"{filename_base}-{to_format_name}{ext}"

The unqualified name of the output file which should have been created by the conversion.

var skip : bool

If set to True, this test will be skipped, always returning success

var to_format : str | int

The format to test converting the input file to