Source code for evalutils.validators

from abc import ABC, abstractmethod
from pathlib import Path
from typing import Tuple

from pandas import DataFrame

from .exceptions import ValidationError
from .io import first_int_in_filename_key


[docs]class DataFrameValidator(ABC):
[docs] @abstractmethod def validate(self, *, df: DataFrame): """Validates a single aspect of a DataFrame Parameters ---------- df The DataFrame to be validated Returns ------- None if the DataFrame is valid Raises ------ ValidationError If the DataFrame is not valid """ raise ValidationError
[docs]class UniquePathIndicesValidator(DataFrameValidator): """ Validates that the indicies from the filenames are unique """
[docs] def validate(self, *, df: DataFrame): try: paths = df["path"] except KeyError: raise ValidationError("Column `path` not found in DataFrame.") idx = [first_int_in_filename_key(Path(p)) for p in paths] if len(set(idx)) != len(paths): raise ValidationError( "The first number is each filename is not unique, please " "check that your files are named correctly." )
[docs]class UniqueImagesValidator(DataFrameValidator): """ Validates that each image in the set is unique """
[docs] def validate(self, *, df: DataFrame): try: hashes = df["hash"] except KeyError: raise ValidationError("Column `hash` not found in DataFrame.") if len(set(hashes)) != len(hashes): raise ValidationError( "The images are not unique, please submit a unique image for " "each case." )
[docs]class ExpectedColumnNamesValidator(DataFrameValidator):
[docs] def __init__( self, *, expected: Tuple[str, ...], extra_cols_check: bool = True ): """ Validates that the DataFrame has the expected columns Parameters ---------- expected The expected columns in the DataFrame extra_cols_check Perform the check for extra columns, default is true but you may want to disable this if you're sure that extra columns can be ignored. Raises ------ ValueError If no columns are defined """ if len(expected) == 0: raise ValueError( "You must define what columns you expect to find in the " f"DataFrame in order to use {self.__class__.__name__}." ) self._expected = expected self._extra_cols_check = extra_cols_check super().__init__()
[docs] def validate(self, *, df: DataFrame): undefined_cols = [c for c in self._expected if c not in df.columns] if undefined_cols: raise ValidationError( f"We expected to find the following columns but we didn't: " f"{undefined_cols}. Please check the column labels, and " f"note that this is case sensitive. We only found: " f"{df.columns}." ) extra_cols = [c for c in df.columns if c not in self._expected] if self._extra_cols_check and extra_cols: raise ValidationError( f"We only expected to find the columns {self._expected}. " f"However, we also found that extra columns were defined: " f"{extra_cols}. Please remove them." )
[docs]class NumberOfCasesValidator(DataFrameValidator):
[docs] def __init__(self, *, num_cases: int): """ Validates that there are the correct number of cases in the set. Parameters ---------- num_cases The number of cases that we expect to find. """ if num_cases <= 0: raise ValueError( "The expected number of cases must be greater than zero in " f"{self.__class__.__name__}." ) self._num_cases = num_cases super().__init__()
[docs] def validate(self, *, df: DataFrame): if len(df) != self._num_cases: raise ValidationError( f"We expected to find {self._num_cases}, but we found " f"{len(df)}. Please correct the number of predictions." )