Skip to main content

bauplan.standard_expectations

This module contains standard expectations that can be used to test data artifacts in a Bauplan pipeline. Using these expectations instead of hand-made ones will make your pipeline easier to maintain, and significantly faster and more memory-efficient.

Each function returns a boolean, so that the wrapping function can assert or print out messages in case of failure.


defexpect_column_accepted_values (...)

Expect all values in the column to come from the list of accepted values.

Returns:

a boolean.

def expect_column_accepted_values(
    table: pa.Table,
    column_name: str,
    accepted_values: list,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_accepted_values

@bauplan.expectation()
@bauplan.python('3.11')
def test_order_status_domain(
data=bauplan.Model('orders'),
):
assert expect_column_accepted_values(
data,
'order_status',
['pending', 'paid', 'shipped', 'cancelled'],
)
return True

defexpect_column_all_null (...)

Expect the column to have all null values.

Returns:

a boolean.

def expect_column_all_null(
    table: pa.Table,
    column_name: str,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_all_null

@bauplan.expectation()
@bauplan.python('3.11')
def test_legacy_user_id_fully_blanked(
data=bauplan.Model('customers'),
):
assert expect_column_all_null(data, 'legacy_user_id')
return True

defexpect_column_all_unique (...)

Expect the column to have all unique values (i.e. no duplicates).

Returns:

a boolean.

def expect_column_all_unique(
    table: pa.Table,
    column_name: str,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_all_unique

@bauplan.expectation()
@bauplan.python('3.11')
def test_trip_id_is_unique(
data=bauplan.Model('normalized_taxi_trips'),
):
assert expect_column_all_unique(data, 'trip_id')
return True

defexpect_column_equal_concatenation (...)

Expect the target column to be equal to the concatenation of the columns in the list.

Returns:

a boolean.

def expect_column_equal_concatenation(
    table: pa.Table,
    target_column: str,
    columns: list,
    separator: str = '',
) -> bool: ...

Parameters

If the columns are not of type pa.string(), the function will attempt to convert them to string. If a custom separator is needed (default: the empty string), it can be passed as an argument.

Example

import bauplan
from bauplan.standard_expectations import expect_column_equal_concatenation

@bauplan.expectation()
@bauplan.python('3.11')
def test_full_name_is_concat(
data=bauplan.Model('customers'),
):
assert expect_column_equal_concatenation(
data,
target_column='full_name',
columns=['first_name', 'last_name'],
separator=' ',
)
return True

defexpect_column_mean_greater_or_equal_than (...)

Expect the mean of a column to be equal or greater than the supplied value.

Returns:

a boolean.

def expect_column_mean_greater_or_equal_than(
    table: pa.Table,
    column_name: str,
    value: float,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_mean_greater_or_equal_than

@bauplan.expectation()
@bauplan.python('3.11')
def test_avg_rating_at_least_three(
data=bauplan.Model('product_reviews'),
):
assert expect_column_mean_greater_or_equal_than(data, 'rating', 3.0)
return True

defexpect_column_mean_greater_than (...)

Expect the mean of a column to be greater than the supplied value.

Returns:

a boolean.

def expect_column_mean_greater_than(
    table: pa.Table,
    column_name: str,
    value: float,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_mean_greater_than

@bauplan.expectation()
@bauplan.python('3.11')
def test_positive_avg_fare(
data=bauplan.Model('normalized_taxi_trips'),
):
assert expect_column_mean_greater_than(data, 'fare_amount', 0.0)
return True

defexpect_column_mean_smaller_or_equal_than (...)

Expect the mean of a column to be equal or smaller than the supplied value.

Returns:

a boolean.

def expect_column_mean_smaller_or_equal_than(
    table: pa.Table,
    column_name: str,
    value: float,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_mean_smaller_or_equal_than

@bauplan.expectation()
@bauplan.python('3.11')
def test_avg_latency_within_slo(
data=bauplan.Model('request_logs'),
):
assert expect_column_mean_smaller_or_equal_than(data, 'latency_ms', 250.0)
return True

defexpect_column_mean_smaller_than (...)

Expect the mean of a column to be smaller than the supplied value.

Returns:

a boolean.

def expect_column_mean_smaller_than(
    table: pa.Table,
    column_name: str,
    value: float,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_mean_smaller_than

@bauplan.expectation()
@bauplan.python('3.11')
def test_error_rate_below_five_percent(
data=bauplan.Model('request_logs'),
):
assert expect_column_mean_smaller_than(data, 'error_rate', 0.05)
return True

defexpect_column_no_nulls (...)

Expect the column to have no null values.

Returns:

a boolean.

def expect_column_no_nulls(
    table: pa.Table,
    column_name: str,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_no_nulls

@bauplan.expectation()
@bauplan.python('3.11')
def test_no_null_pickup_datetime(
data=bauplan.Model('normalized_taxi_trips'),
):
column = 'pickup_datetime'
ok = expect_column_no_nulls(data, column)
assert ok, f'expected {column} to have no nulls'
return ok

defexpect_column_not_unique (...)

Expect the column to have at least one duplicate value.

Returns:

a boolean.

def expect_column_not_unique(
    table: pa.Table,
    column_name: str,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_not_unique

@bauplan.expectation()
@bauplan.python('3.11')
def test_customer_id_repeats_in_orders(
data=bauplan.Model('orders'),
):
assert expect_column_not_unique(data, 'customer_id')
return True

defexpect_column_some_null (...)

Expect the column to have at least one null.

Returns:

a boolean.

def expect_column_some_null(
    table: pa.Table,
    column_name: str,
) -> bool: ...

Parameters

Example

import bauplan
from bauplan.standard_expectations import expect_column_some_null

@bauplan.expectation()
@bauplan.python('3.11')
def test_optional_notes_has_nulls(
data=bauplan.Model('customers'),
):
assert expect_column_some_null(data, 'optional_notes')
return True