Validation Service¶

validation_service ¶

Validation Service.

Domain service for complex business validations.

Classes:

Name	Description
`ValidationService`	Static methods for validating datasets, samples, and file inputs

Classes¶

ValidationService ¶

Domain service for complex validations.

Implements validation rules that involve multiple entities or business logic that does not belong to a specific entity.

Notes

This is a Domain Service that encapsulates complex validation logic, keeping the entities simple and focused. All methods are static and stateless.

Functions¶

validate_raw_input `staticmethod` ¶

validate_raw_input(content: str) -> Tuple[bool, str]

Validate raw content from sample upload.

Parameters:

Name	Type	Description	Default
`content`	`str`	Content of the samples file in BioRemPP format	required

Returns:

Type	Description
`Tuple[bool, str]`	Tuple (is_valid, error_message) where is_valid is True if the content is valid, and error_message contains the error description if any

Notes

Expected format: - Lines starting with '>' indicate the start of a new sample - Lines starting with 'K' are KO entries

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.INFO)
def validate_raw_input(content: str) -> Tuple[bool, str]:
    """
    Validate raw content from sample upload.

    Parameters
    ----------
    content : str
        Content of the samples file in BioRemPP format

    Returns
    -------
    Tuple[bool, str]
        Tuple (is_valid, error_message) where is_valid is True if
        the content is valid, and error_message contains the
        error description if any

    Notes
    -----
    Expected format:
    - Lines starting with '>' indicate the start of a new sample
    - Lines starting with 'K' are KO entries
    """
    if not content or not content.strip():
        logger.warning("Validation failed: Empty file content")
        return False, "Empty file content"

    lines = content.strip().split("\n")

    # Must have at least 1 sample and 1 KO
    if len(lines) < 2:
        logger.warning(
            "Validation failed: Insufficient content",
            extra={"line_count": len(lines)},
        )
        return (
            False,
            "File must contain at least one sample and one KO",
        )

    # First line must be a sample (starts with '>')
    first_line = lines[0].strip()
    if not first_line.startswith(">"):
        logger.warning(
            "Validation failed: Invalid first line format",
            extra={"first_line": first_line[:50]},
        )
        return (
            False,
            "File must start with sample identifier (>SampleName)",
        )

    # Validate structure line by line
    sample_count = 0
    ko_count = 0
    line_number = 0

    for line in lines:
        line_number += 1
        line = line.strip()

        if not line:
            continue

        if line.startswith(">"):
            sample_count += 1
            # Check if sample name is not empty
            sample_name = line[1:].strip()
            if not sample_name:
                error_msg = f"Line {line_number}: Sample name cannot be empty"
                logger.warning(
                    "Validation failed: Empty sample name",
                    extra={"line_number": line_number},
                )
                return (
                    False,
                    error_msg,
                )

        elif line.startswith("K"):
            ko_count += 1
            # Validate KO format
            try:
                KO(line.strip())
            except ValueError as e:
                error_msg = f"Line {line_number}: {str(e)}"
                logger.warning(
                    "Validation failed: Invalid KO format",
                    extra={
                        "line_number": line_number,
                        "ko_value": line.strip(),
                        "error": str(e),
                    },
                )
                return (
                    False,
                    error_msg,
                )

        else:
            error_msg = f"Line {line_number}: Invalid line format: {line}"
            logger.warning(
                "Validation failed: Invalid line format",
                extra={"line_number": line_number, "line_content": line[:50]},
            )
            return (
                False,
                error_msg,
            )

    if sample_count == 0:
        logger.warning("Validation failed: No samples found in file")
        return False, "No samples found in file"

    if ko_count == 0:
        logger.warning("Validation failed: No KO entries found in file")
        return False, "No KO entries found in file"

    logger.info(
        "Raw input validation successful",
        extra={
            "sample_count": sample_count,
            "ko_count": ko_count,
            "total_lines": len(lines),
        },
    )
    return True, ""

validate_dataset `staticmethod` ¶

validate_dataset(dataset: Dataset) -> Tuple[bool, str]

Validate a complete dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset`	Dataset to be validated	required

Returns:

Type	Description
`Tuple[bool, str]`	Tuple (is_valid, error_message)

Notes

Validates both the dataset structure and each sample individually, ensuring complete consistency.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.INFO)
def validate_dataset(dataset: Dataset) -> Tuple[bool, str]:
    """
    Validate a complete dataset.

    Parameters
    ----------
    dataset : Dataset
        Dataset to be validated

    Returns
    -------
    Tuple[bool, str]
        Tuple (is_valid, error_message)

    Notes
    -----
    Validates both the dataset structure and each sample
    individually, ensuring complete consistency.
    """
    if dataset.total_samples == 0:
        logger.warning("Dataset validation failed: No samples in dataset")
        return False, "Dataset has no samples"

    logger.debug(
        "Validating dataset", extra={"sample_count": dataset.total_samples}
    )

    # Validate each sample
    for sample in dataset.samples:
        try:
            sample.validate()
        except ValueError as e:
            logger.warning(
                "Dataset validation failed: Invalid sample",
                extra={"sample_id": str(sample.id), "error": str(e)},
            )
            return False, str(e)

    logger.info(
        "Dataset validation successful",
        extra={
            "sample_count": dataset.total_samples,
            "total_ko_count": sum(len(s.ko_list) for s in dataset.samples),
        },
    )
    return True, ""

validate_ko_list `staticmethod` ¶

validate_ko_list(ko_list: List[str]) -> Tuple[bool, str, List[KO]]

Validate and convert a list of strings to a list of KOs.

Parameters:

Name	Type	Description	Default
`ko_list`	`List[str]`	List of strings representing KOs	required

Returns:

Type	Description
`Tuple[bool, str, List[KO]]`	Tuple (is_valid, error_message, ko_objects) where ko_objects contains the validated KOs if successful

Notes

This method is useful for validating user inputs before creating domain entities.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.DEBUG)
def validate_ko_list(ko_list: List[str]) -> Tuple[bool, str, List[KO]]:
    """
    Validate and convert a list of strings to a list of KOs.

    Parameters
    ----------
    ko_list : List[str]
        List of strings representing KOs

    Returns
    -------
    Tuple[bool, str, List[KO]]
        Tuple (is_valid, error_message, ko_objects) where ko_objects
        contains the validated KOs if successful

    Notes
    -----
    This method is useful for validating user inputs
    before creating domain entities.
    """
    if not ko_list:
        logger.warning("KO list validation failed: Empty list")
        return False, "KO list cannot be empty", []

    logger.debug("Validating KO list", extra={"ko_count": len(ko_list)})

    validated_kos = []
    for idx, ko_str in enumerate(ko_list):
        try:
            ko = KO(ko_str.strip())
            validated_kos.append(ko)
        except ValueError as e:
            logger.warning(
                "KO list validation failed: Invalid KO",
                extra={"index": idx, "ko_value": ko_str, "error": str(e)},
            )
            return False, str(e), []

    logger.info(
        "KO list validation successful",
        extra={"validated_count": len(validated_kos)},
    )
    return True, "", validated_kos

check_duplicate_samples `staticmethod` ¶

check_duplicate_samples(dataset: Dataset) -> Tuple[bool, List[str]]

Check for duplicate samples in the dataset.

Parameters:

Name	Type	Description	Default
`dataset`	`Dataset`	Dataset to be checked	required

Returns:

Type	Description
`Tuple[bool, List[str]]`	Tuple (has_duplicates, duplicate_ids) where has_duplicates is True if there are duplicates, and duplicate_ids contains the duplicate IDs

Notes

Duplicate samples may indicate an error in the input file or incorrect processing.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.DEBUG)
def check_duplicate_samples(dataset: Dataset) -> Tuple[bool, List[str]]:
    """
    Check for duplicate samples in the dataset.

    Parameters
    ----------
    dataset : Dataset
        Dataset to be checked

    Returns
    -------
    Tuple[bool, List[str]]
        Tuple (has_duplicates, duplicate_ids) where has_duplicates
        is True if there are duplicates, and duplicate_ids contains
        the duplicate IDs

    Notes
    -----
    Duplicate samples may indicate an error in the input file
    or incorrect processing.
    """
    logger.debug(
        "Checking for duplicate samples",
        extra={"total_samples": dataset.total_samples},
    )

    seen_ids = set()
    duplicates = []

    for sample in dataset.samples:
        sample_id_str = str(sample.id)
        if sample_id_str in seen_ids:
            duplicates.append(sample_id_str)
        seen_ids.add(sample_id_str)

    has_duplicates = len(duplicates) > 0

    if has_duplicates:
        logger.warning(
            "Duplicate samples found",
            extra={"duplicate_count": len(duplicates), "duplicate_ids": duplicates},
        )
    else:
        logger.debug("No duplicate samples found")

    return has_duplicates, duplicates

validate_file_size `staticmethod` ¶

validate_file_size(size_bytes: int, max_bytes: int) -> Tuple[bool, str]

Validate file size against maximum limit.

Parameters:

Name	Type	Description	Default
`size_bytes`	`int`	File size in bytes	required
`max_bytes`	`int`	Maximum allowed size in bytes	required

Returns:

Type	Description
`Tuple[bool, str]`	Tuple (is_valid, error_message)

Notes

Provides user-friendly error messages with sizes in MB. Logs validation failures with detailed context.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.DEBUG)
def validate_file_size(size_bytes: int, max_bytes: int) -> Tuple[bool, str]:
    """
    Validate file size against maximum limit.

    Parameters
    ----------
    size_bytes : int
        File size in bytes
    max_bytes : int
        Maximum allowed size in bytes

    Returns
    -------
    Tuple[bool, str]
        Tuple (is_valid, error_message)

    Notes
    -----
    Provides user-friendly error messages with sizes in MB.
    Logs validation failures with detailed context.
    """
    if size_bytes > max_bytes:
        size_mb = size_bytes / (1024 * 1024)
        max_mb = max_bytes / (1024 * 1024)
        error_msg = (
            f"File size ({size_mb:.2f} MB) exceeds maximum "
            f"allowed size ({max_mb:.0f} MB)"
        )
        logger.warning(
            "File size validation failed",
            extra={
                "size_bytes": size_bytes,
                "size_mb": size_mb,
                "max_bytes": max_bytes,
                "max_mb": max_mb,
            },
        )
        return False, error_msg

    logger.debug(
        "File size validation passed",
        extra={"size_bytes": size_bytes, "max_bytes": max_bytes},
    )
    return True, ""

validate_sample_count `staticmethod` ¶

validate_sample_count(sample_count: int, max_samples: int) -> Tuple[bool, str]

Validate number of samples against maximum limit.

Parameters:

Name	Type	Description	Default
`sample_count`	`int`	Number of samples in dataset	required
`max_samples`	`int`	Maximum allowed samples	required

Returns:

Type	Description
`Tuple[bool, str]`	Tuple (is_valid, error_message)

Notes

Provides clear error messages when limit is exceeded. Logs validation context for debugging.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.DEBUG)
def validate_sample_count(sample_count: int, max_samples: int) -> Tuple[bool, str]:
    """
    Validate number of samples against maximum limit.

    Parameters
    ----------
    sample_count : int
        Number of samples in dataset
    max_samples : int
        Maximum allowed samples

    Returns
    -------
    Tuple[bool, str]
        Tuple (is_valid, error_message)

    Notes
    -----
    Provides clear error messages when limit is exceeded.
    Logs validation context for debugging.
    """
    if sample_count > max_samples:
        error_msg = (
            f"Number of samples ({sample_count}) exceeds maximum "
            f"allowed ({max_samples})"
        )
        logger.warning(
            "Sample count validation failed",
            extra={"sample_count": sample_count, "max_samples": max_samples},
        )
        return False, error_msg

    logger.debug(
        "Sample count validation passed",
        extra={"sample_count": sample_count, "max_samples": max_samples},
    )
    return True, ""

validate_ko_count `staticmethod` ¶

validate_ko_count(ko_count: int, max_kos: int) -> Tuple[bool, str]

Validate number of KO entries against maximum limit.

Parameters:

Name	Type	Description	Default
`ko_count`	`int`	Number of KO entries in dataset	required
`max_kos`	`int`	Maximum allowed KO entries	required

Returns:

Type	Description
`Tuple[bool, str]`	Tuple (is_valid, error_message)

Notes

Formats large numbers with commas for readability. Logs detailed context for validation failures.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.DEBUG)
def validate_ko_count(ko_count: int, max_kos: int) -> Tuple[bool, str]:
    """
    Validate number of KO entries against maximum limit.

    Parameters
    ----------
    ko_count : int
        Number of KO entries in dataset
    max_kos : int
        Maximum allowed KO entries

    Returns
    -------
    Tuple[bool, str]
        Tuple (is_valid, error_message)

    Notes
    -----
    Formats large numbers with commas for readability.
    Logs detailed context for validation failures.
    """
    if ko_count > max_kos:
        error_msg = (
            f"Number of KO entries ({ko_count:,}) exceeds maximum "
            f"allowed ({max_kos:,})"
        )
        logger.warning(
            "KO count validation failed",
            extra={"ko_count": ko_count, "max_kos": max_kos},
        )
        return False, error_msg

    logger.debug(
        "KO count validation passed",
        extra={"ko_count": ko_count, "max_kos": max_kos},
    )
    return True, ""

validate_encoding `staticmethod` ¶

validate_encoding(content_bytes: bytes) -> Tuple[bool, str, str]

Validate and decode file content encoding.

Attempts UTF-8 decoding first, falls back to latin-1 if needed.

Parameters:

Name	Type	Description	Default
`content_bytes`	`bytes`	Raw file content	required

Returns:

Type	Description
`Tuple[bool, str, str]`	Tuple (is_valid, decoded_content, error_message) - is_valid: True if decoding succeeded - decoded_content: Decoded string (empty if failed) - error_message: Error description (empty if successful)

Notes

Encoding priority: 1. UTF-8 (preferred) 2. Latin-1 (fallback)

Logs warnings when fallback encoding is used. Returns user-friendly error messages.

Source code in src/domain/services/validation_service.py

@staticmethod
@log_execution(level=logging.DEBUG)
def validate_encoding(content_bytes: bytes) -> Tuple[bool, str, str]:
    """
    Validate and decode file content encoding.

    Attempts UTF-8 decoding first, falls back to latin-1 if needed.

    Parameters
    ----------
    content_bytes : bytes
        Raw file content

    Returns
    -------
    Tuple[bool, str, str]
        Tuple (is_valid, decoded_content, error_message)
        - is_valid: True if decoding succeeded
        - decoded_content: Decoded string (empty if failed)
        - error_message: Error description (empty if successful)

    Notes
    -----
    Encoding priority:
    1. UTF-8 (preferred)
    2. Latin-1 (fallback)

    Logs warnings when fallback encoding is used.
    Returns user-friendly error messages.
    """
    # Try UTF-8 first
    try:
        decoded = content_bytes.decode("utf-8")
        logger.debug("Content decoded as UTF-8")
        return True, decoded, ""
    except UnicodeDecodeError as e:
        logger.warning(f"UTF-8 decoding failed: {e}", extra={"error": str(e)})

    # Try latin-1 as fallback
    try:
        decoded = content_bytes.decode("latin-1")
        logger.warning(
            "Content decoded as latin-1 (not UTF-8)", extra={"encoding": "latin-1"}
        )
        return True, decoded, ""
    except UnicodeDecodeError as e:
        error_msg = "Unable to decode file. Please ensure file is UTF-8 encoded."
        logger.error(f"All encoding attempts failed: {e}", extra={"error": str(e)})
        return False, "", error_msg

Validation Service¶