bauplan.standard_expectations
This module contains standard expectations that can be used to test data artifacts in a Bauplan pipeline. Using these expectations instead of hand-made ones will make your pipeline easier to maintain, and significantly faster and more memory-efficient.
Each function returns a boolean, so that the wrapping function can assert or print out messages in case of failure.
Expect all values in the column to come from the list of accepted values.
Returns:
a boolean.
def expect_column_accepted_values(
table: pa.Table,
column_name: str,
accepted_values: list,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_accepted_values
@bauplan.expectation()
@bauplan.python('3.11')
def test_order_status_domain(
data=bauplan.Model('orders'),
):
assert expect_column_accepted_values(
data,
'order_status',
['pending', 'paid', 'shipped', 'cancelled'],
)
return True
Expect the column to have all null values.
Returns:
a boolean.
def expect_column_all_null(
table: pa.Table,
column_name: str,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_all_null
@bauplan.expectation()
@bauplan.python('3.11')
def test_legacy_user_id_fully_blanked(
data=bauplan.Model('customers'),
):
assert expect_column_all_null(data, 'legacy_user_id')
return True
Expect the column to have all unique values (i.e. no duplicates).
Returns:
a boolean.
def expect_column_all_unique(
table: pa.Table,
column_name: str,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_all_unique
@bauplan.expectation()
@bauplan.python('3.11')
def test_trip_id_is_unique(
data=bauplan.Model('normalized_taxi_trips'),
):
assert expect_column_all_unique(data, 'trip_id')
return True
Expect the target column to be equal to the concatenation of the columns in the list.
Returns:
a boolean.
def expect_column_equal_concatenation(
table: pa.Table,
target_column: str,
columns: list,
separator: str = '',
) -> bool: ...
If the columns are not of type pa.string(), the function will attempt to convert them to string. If a custom separator is needed (default: the empty string), it can be passed as an argument.
Example
import bauplan
from bauplan.standard_expectations import expect_column_equal_concatenation
@bauplan.expectation()
@bauplan.python('3.11')
def test_full_name_is_concat(
data=bauplan.Model('customers'),
):
assert expect_column_equal_concatenation(
data,
target_column='full_name',
columns=['first_name', 'last_name'],
separator=' ',
)
return True
Expect the mean of a column to be equal or greater than the supplied value.
Returns:
a boolean.
def expect_column_mean_greater_or_equal_than(
table: pa.Table,
column_name: str,
value: float,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_mean_greater_or_equal_than
@bauplan.expectation()
@bauplan.python('3.11')
def test_avg_rating_at_least_three(
data=bauplan.Model('product_reviews'),
):
assert expect_column_mean_greater_or_equal_than(data, 'rating', 3.0)
return True
Expect the mean of a column to be greater than the supplied value.
Returns:
a boolean.
def expect_column_mean_greater_than(
table: pa.Table,
column_name: str,
value: float,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_mean_greater_than
@bauplan.expectation()
@bauplan.python('3.11')
def test_positive_avg_fare(
data=bauplan.Model('normalized_taxi_trips'),
):
assert expect_column_mean_greater_than(data, 'fare_amount', 0.0)
return True
Expect the mean of a column to be equal or smaller than the supplied value.
Returns:
a boolean.
def expect_column_mean_smaller_or_equal_than(
table: pa.Table,
column_name: str,
value: float,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_mean_smaller_or_equal_than
@bauplan.expectation()
@bauplan.python('3.11')
def test_avg_latency_within_slo(
data=bauplan.Model('request_logs'),
):
assert expect_column_mean_smaller_or_equal_than(data, 'latency_ms', 250.0)
return True
Expect the mean of a column to be smaller than the supplied value.
Returns:
a boolean.
def expect_column_mean_smaller_than(
table: pa.Table,
column_name: str,
value: float,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_mean_smaller_than
@bauplan.expectation()
@bauplan.python('3.11')
def test_error_rate_below_five_percent(
data=bauplan.Model('request_logs'),
):
assert expect_column_mean_smaller_than(data, 'error_rate', 0.05)
return True
Expect the column to have no null values.
Returns:
a boolean.
def expect_column_no_nulls(
table: pa.Table,
column_name: str,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_no_nulls
@bauplan.expectation()
@bauplan.python('3.11')
def test_no_null_pickup_datetime(
data=bauplan.Model('normalized_taxi_trips'),
):
column = 'pickup_datetime'
ok = expect_column_no_nulls(data, column)
assert ok, f'expected {column} to have no nulls'
return ok
Expect the column to have at least one duplicate value.
Returns:
a boolean.
def expect_column_not_unique(
table: pa.Table,
column_name: str,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_not_unique
@bauplan.expectation()
@bauplan.python('3.11')
def test_customer_id_repeats_in_orders(
data=bauplan.Model('orders'),
):
assert expect_column_not_unique(data, 'customer_id')
return True
Expect the column to have at least one null.
Returns:
a boolean.
def expect_column_some_null(
table: pa.Table,
column_name: str,
) -> bool: ...
Example
import bauplan
from bauplan.standard_expectations import expect_column_some_null
@bauplan.expectation()
@bauplan.python('3.11')
def test_optional_notes_has_nulls(
data=bauplan.Model('customers'),
):
assert expect_column_some_null(data, 'optional_notes')
return True