Skip to content

Null Validation

ColumnBeNull

Check if the values in a column are null.

Parameters:

Name Type Description Default
column str

Column to validate.

required
threshold float

Threshold for validation. Defaults to 0.0.

0.0
impact Literal['low', 'medium', 'high']

Impact level of validation. Defaults to "low".

'low'

Examples:

>>> import pandas as pd
>>> from validoopsie import Validate
>>>
>>> # Validate field contains only nulls
>>> df = pd.DataFrame({
...     "id": [1, 2, 3],
...     "optional_field": [None, None, None]
... })
>>>
>>> vd = (
...     Validate(df)
...     .NullValidation.ColumnBeNull(column="optional_field")
... )
>>> key = "ColumnBeNull_optional_field"
>>> vd.results[key]["result"]["status"]
'Success'
>>>
>>> # When calling validate on successful validation there is no error.
>>> vd.validate()
Source code in validoopsie/validation_catalogue/NullValidation/column_be_null.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
class ColumnBeNull(BaseValidation):
    """Check if the values in a column are null.

    Args:
        column (str): Column to validate.
        threshold (float, optional): Threshold for validation. Defaults to 0.0.
        impact (Literal["low", "medium", "high"], optional): Impact level of validation.
            Defaults to "low".

    Examples:
        >>> import pandas as pd
        >>> from validoopsie import Validate
        >>>
        >>> # Validate field contains only nulls
        >>> df = pd.DataFrame({
        ...     "id": [1, 2, 3],
        ...     "optional_field": [None, None, None]
        ... })
        >>>
        >>> vd = (
        ...     Validate(df)
        ...     .NullValidation.ColumnBeNull(column="optional_field")
        ... )
        >>> key = "ColumnBeNull_optional_field"
        >>> vd.results[key]["result"]["status"]
        'Success'
        >>>
        >>> # When calling validate on successful validation there is no error.
        >>> vd.validate()

    """

    def __init__(
        self,
        column: str,
        impact: Literal["low", "medium", "high"] = "low",
        threshold: float = 0.00,
        **kwargs: KwargsParams,
    ) -> None:
        super().__init__(column, impact, threshold, **kwargs)

    @property
    def fail_message(self) -> str:
        """Return the fail message, that will be used in the report."""
        return f"The column '{self.column}' doesn't have values that are null."

    def __call__(self, frame: Frame) -> Frame:
        """Check if the values in a column are null."""
        return (
            frame.select(self.column)
            .filter(
                nw.col(self.column).is_null() == False,
            )
            .group_by(self.column)
            .agg(nw.col(self.column).count().alias(f"{self.column}-count"))
        )

ColumnNotBeNull

Check if the values in a column are not null.

Parameters:

Name Type Description Default
column str

Column to validate.

required
threshold float

Threshold for validation. Defaults to 0.0.

0.0
impact Literal['low', 'medium', 'high']

Impact level of validation. Defaults to "low".

'low'

Examples:

>>> import pandas as pd
>>> from validoopsie import Validate
>>>
>>> # Validate field has no nulls
>>> df = pd.DataFrame({
...     "id": [1, 2, 3],
...     "required_field": ["a", "b", "c"]
... })
>>>
>>> vd = (
...     Validate(df)
...     .NullValidation.ColumnNotBeNull(column="required_field")
... )
>>> key = "ColumnNotBeNull_required_field"
>>> vd.results[key]["result"]["status"]
'Success'
>>>
>>> # When calling validate on successful validation there is no error.
>>> vd.validate()
Source code in validoopsie/validation_catalogue/NullValidation/column_not_be_null.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
class ColumnNotBeNull(BaseValidation):
    """Check if the values in a column are not null.

    Args:
        column (str): Column to validate.
        threshold (float, optional): Threshold for validation. Defaults to 0.0.
        impact (Literal["low", "medium", "high"], optional): Impact level of validation.
            Defaults to "low".

    Examples:
        >>> import pandas as pd
        >>> from validoopsie import Validate
        >>>
        >>> # Validate field has no nulls
        >>> df = pd.DataFrame({
        ...     "id": [1, 2, 3],
        ...     "required_field": ["a", "b", "c"]
        ... })
        >>>
        >>> vd = (
        ...     Validate(df)
        ...     .NullValidation.ColumnNotBeNull(column="required_field")
        ... )
        >>> key = "ColumnNotBeNull_required_field"
        >>> vd.results[key]["result"]["status"]
        'Success'
        >>>
        >>> # When calling validate on successful validation there is no error.
        >>> vd.validate()

    """

    def __init__(
        self,
        column: str,
        impact: Literal["low", "medium", "high"] = "low",
        threshold: float = 0.00,
        **kwargs: KwargsParams,
    ) -> None:
        super().__init__(column, impact, threshold, **kwargs)

    @property
    def fail_message(self) -> str:
        """Return the fail message, that will be used in the report."""
        return f"The column '{self.column}' has values that are null."

    def __call__(self, frame: Frame) -> Frame:
        """Check if the values in a column are not null."""
        null_count_col = f"{self.column}-count"
        return (
            frame.filter(
                nw.col(self.column).is_null() == True,
            )
            .with_columns(nw.lit(1).alias(null_count_col))
            .group_by(self.column)
            .agg(nw.col(null_count_col).sum())
        )