Skip to content

Unique Validation

ColumnUniqueValueCountToBeBetween

Check the number of unique values in a column to be between min and max.

If the min_value or max_value is not provided then other will be used as the threshold.

If neither min_value nor max_value is provided, then the validation will result in failure.

Parameters:

Name Type Description Default
column str

The column to validate.

required
min_value int or None

The minimum number of unique values allowed.

None
max_value int or None

The maximum number of unique values allowed.

None
threshold float

Threshold for validation. Defaults to 0.0.

required
impact Literal['low', 'medium', 'high']

Impact level of validation. Defaults to "low".

required
kwargs

KwargsType (dict): Additional keyword arguments.

{}
Source code in validoopsie/validation_catalogue/UniqueValidation/column_unique_value_count_to_be_between.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
@base_validation_wrapper
class ColumnUniqueValueCountToBeBetween(BaseValidationParameters):
    """Check the number of unique values in a column to be between min and max.

    If the `min_value` or `max_value` is not provided then other will be used as the
    threshold.

    If neither `min_value` nor `max_value` is provided, then the validation will result
    in failure.

    Parameters:
        column (str): The column to validate.
        min_value (int or None): The minimum number of unique values allowed.
        max_value (int or None): The maximum number of unique values allowed.
        threshold (float, optional): Threshold for validation. Defaults to 0.0.
        impact (Literal["low", "medium", "high"], optional): Impact level of validation.
            Defaults to "low".
        kwargs: KwargsType (dict): Additional keyword arguments.

    """

    def __init__(
        self,
        column: str,
        min_value: int | None = None,
        max_value: int | None = None,
        *args,
        **kwargs,
    ) -> None:
        min_max_arg_check(min_value, max_value)

        super().__init__(column, *args, **kwargs)
        self.min_value = min_value
        self.max_value = max_value

    @property
    def fail_message(self) -> str:
        """Return the fail message, that will be used in the report."""
        return (
            f"The column '{self.column}' has a number of unique values that "
            f"is not between {self.min_value} and {self.max_value}."
        )

    def __call__(self, frame: FrameT) -> FrameT:
        """Validate the number of unique values in the column."""
        unique_value_counts = frame.group_by(self.column).agg(
            nw.col(self.column).count().alias(f"{self.column}-count"),
        )

        return min_max_filter(
            unique_value_counts,
            f"{self.column}-count",
            self.min_value,
            self.max_value,
        )

ColumnUniqueValuesToBeInList

Check if the unique values are in the list.

Parameters:

Name Type Description Default
column str

Column to validate.

required
values list[Union[str, float, int, None]]

List of values to check.

required
threshold float

Threshold for validation. Defaults to 0.0.

required
impact Literal['low', 'medium', 'high']

Impact level of validation. Defaults to "low".

required
kwargs

KwargsType (dict): Additional keyword arguments.

{}
Source code in validoopsie/validation_catalogue/UniqueValidation/column_unique_values_to_be_in_list.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
@base_validation_wrapper
class ColumnUniqueValuesToBeInList(BaseValidationParameters):
    """Check if the unique values are in the list.

    Parameters:
        column (str): Column to validate.
        values (list[Union[str, float, int, None]]): List of values to check.
        threshold (float, optional): Threshold for validation. Defaults to 0.0.
        impact (Literal["low", "medium", "high"], optional): Impact level of validation.
            Defaults to "low".
        kwargs: KwargsType (dict): Additional keyword arguments.

    """

    def __init__(
        self,
        column: str,
        values: list[str | int | float | None],
        *args,
        **kwargs,
    ) -> None:
        super().__init__(column, *args, **kwargs)
        self.values = values

    @property
    def fail_message(self) -> str:
        """Return the fail message, that will be used in the report."""
        return f"The column '{self.column}' has unique values that are not in the list."

    def __call__(self, frame: FrameT) -> FrameT:
        """Check if the unique values are in the list."""
        return (
            frame.group_by(self.column)
            .agg(nw.col(self.column).count().alias(f"{self.column}-count"))
            .filter(
                nw.col(self.column).is_in(self.values) == False,
            )
        )