Skip to main content

bauplan.schema


classAPIError

bauplan.schema.APIError(
*,
code: int,
type: str,
message: str,
context: dict[str,
typing.Any]
)-> None

classAPIMetadata

bauplan.schema.APIMetadata(
*,
status_code: int,
ref: Optional[Annotated[Union[bauplan.schema.Branch,
bauplan.schema.Tag,
bauplan.schema.DetachedRef],
FieldInfo(
annotation=NoneType,
required=True,
discriminator='type'
)]] = None,
username: Optional[str] = None,
error: Optional[str] = None,
pagination_token: Optional[str] = None,
request_id: str,
request_ts: int,
request_ms: int
)-> None

classAPIResponse

bauplan.schema.APIResponse(
*,
metadata: bauplan.schema.APIMetadata,
ref: Optional[Annotated[Union[bauplan.schema.Branch,
bauplan.schema.Tag,
bauplan.schema.DetachedRef],
FieldInfo(
annotation=NoneType,
required=True,
discriminator='type'
)]] = None
)-> None

classAPIResponseWithData

bauplan.schema.APIResponseWithData(
*,
metadata: bauplan.schema.APIMetadata,
ref: Optional[Annotated[Union[bauplan.schema.Branch,
bauplan.schema.Tag,
bauplan.schema.DetachedRef],
FieldInfo(
annotation=NoneType,
required=True,
discriminator='type'
)]] = None,
data: Any
)-> None

classAPIResponseWithError

bauplan.schema.APIResponseWithError(
*,
metadata: bauplan.schema.APIMetadata,
ref: Optional[Annotated[Union[bauplan.schema.Branch,
bauplan.schema.Tag,
bauplan.schema.DetachedRef],
FieldInfo(
annotation=NoneType,
required=True,
discriminator='type'
)]] = None,
error: bauplan.schema.APIError
)-> None

classActor

bauplan.schema.Actor(
*,
name: str,
email: str | None
)-> None

classBranch

bauplan.schema.Branch(
*,
name: str,
hash: str | None = None,
type: Literal['BRANCH'] = 'BRANCH'
)-> None

Bases: Ref


classCacheDir

bauplan.schema.CacheDir(
*,
dirpath: pathlib.Path
)-> None

EXPERIMENTAL AND SUBJECT TO CHANGE.

CacheDir is a model for a standard bauplan directory ($HOME/.bauplan) for caching of files on the local filesystem. This is partially a convenience interface for other models such as JobContext, and partially a convenience for the user to easily clean up any cache files they no longer want (or a previous process failed to clean up).

def cleanup

Remove the temporary cache directory and its contents.

def clear_job_cache

Remove all directories with the '.job_snapshot' prefix from the bauplan cache.

Parameters

base_dirpathOptional(Path)
Default: PosixPath('/home/runner/.bauplan')

def save

Make the temporary cache directory persistent, preventing automatic cleanup.


classCommit

A commit is a record of a change in the data lake.

Attributes

ref(

APIRef

)
The reference (branch or tag) associated with the commit.
message(

Optional[str]

)
The commit message.
authors(

List[Actor]

)
A list of authors associated with the commit.
authored_date(

datetime

)
The date and time when the commit was authored.
committer(

Actor

)
The committer of the commit.
committed_date(

datetime

)
The date and time when the commit was committed.
parent_ref(

APIRef

)
The reference to the parent commit.
parent_hashes(

List[str]

)
A list of parent commit hashes.
properties(

Dict[str, str]

)
A dictionary of properties associated with the commit.
signed_off_by(

List[Actor]

)
A list of actors who signed off on the commit.

classDAGEdge

bauplan.schema.DAGEdge(
*,
source_model: Optional[str],
destination_model: str
)-> None

A dependency between DAGNode instances, representing dataflow.


classDAGNode

bauplan.schema.DAGNode(
*,
id: str,
name: str
)-> None

A bauplan function that produces a Model.

Attributes

id
The model ID
name
The model name

classDetachedRef

bauplan.schema.DetachedRef(
*,
name: str,
hash: str | None = None,
type: Literal['DETACHED'] = 'DETACHED'
)-> None

Bases: Ref


classEntry

bauplan.schema.Entry(
*,
name: str,
namespace: str,
kind: str
)-> None

classGetBranchesResponse

An Iterable containing Branch objects returned by get_branches method.

Example:

response = client.get_branches()
for branch in response:
print(branch.name)

classGetCommitsResponse

An Iterable containing Commit objects returned by get_commits method.


classGetNamespacesResponse


classGetTablesResponse

An Iterable containing TableWithMetadata objects returned by get_tables method.

Example:

response = client.get_tables(namespace='my_namespace', ref='main')
for table in response:
print(table.name, table.records)

classGetTagsResponse


classJob

bauplan.schema.Job(
*,
id: str,
kind: Union[str,
bauplan.schema.JobKind],
user: str,
human_readable_status: str,
created_at: Optional[datetime.datetime],
finished_at: Optional[datetime.datetime],
status: bauplan.schema.JobState
)-> None

EXPERIMENTAL AND SUBJECT TO CHANGE.

Job is a model for a job in the Bauplan system. It is tracked as a result of a code snapshot run.

def finished_after

Check if the job finished within the given timedelta from now.

Parameters

startREQUIRED(datetime)

def finished_before

Check if the job finished before the given timedelta from now.

Parameters

endREQUIRED(datetime)

def finished_between

Check if the job finished between two datetimes.

Parameters

startREQUIRED(datetime)
endREQUIRED(datetime)

def from_proto

Parameters

job_pbREQUIRED(common_pb.JobInfo)

def has_finished_range

Parameters

after_timeOptional(Optional[datetime])
before_timeOptional(Optional[datetime])

def has_id

Check if the job has the specified ID or ID prefix.

Parameters

check_idREQUIRED(str)

def has_started_range

Check if the job started within the specified time range.

Parameters

after_timeOptional(Optional[datetime])
before_timeOptional(Optional[datetime])

def has_status

Check if the job has specified status.

Parameters

check_statusOptional(Union[str, JobState])
Default: <JobState.COMPLETE: 3>

def started_after

Check if the job started after the given datetime.

Parameters

startREQUIRED(datetime)

def started_before

Check if the job started before the given datetime.

Parameters

endREQUIRED(datetime)

def started_between

Check if the job started between two datetimes.

Parameters

startREQUIRED(datetime)
endREQUIRED(datetime)

classJobContext

bauplan.schema.JobContext(
*,
id: str,
project_id: Optional[str],
project_name: Optional[str],
ref: Optional[bauplan.schema.Ref],
tx_ref: Optional[bauplan.schema.Ref],
logs: List[bauplan.schema.JobLogEvent],
dag_nodes: List[bauplan.schema.DAGNode],
dag_edges: List[bauplan.schema.DAGEdge],
snapshot_dict: Dict[str,
str],
snapshot_dirpath: Optional[pathlib.Path]
)-> None

EXPERIMENTAL AND SUBJECT TO CHANGE.

JobContext is a model for immediate working context of a particular job. This currently includes: (1) Ref, (2) Code Snapshot, (3) Logs. A JobContext should enable a variety of workflows for iterating on an existing Job.

def cleanup_cache

Clean up the cache directory if it exists.

def save_cache

Save the cache directory if it exists.


classJobKind

Models a job's "kind" or job type. May be one of: UNSPECIFIED, CODE_SNAPSHOT_RUN, QUERY, IMPORT_PLAN_CREATE, IMPORT_PLAN_APPLY, TABLE_PLAN_CREATE, TABLE_PLAN_CREATE_APPLY, or TABLE_IMPORT.


classJobLogEvent

bauplan.schema.JobLogEvent(
*,
stream: Optional[bauplan.schema.JobLogStream],
level: Optional[bauplan.schema.JobLogLevel],
message: str
)-> None

EXPERIMENTAL AND SUBJECT TO CHANGE.

JobLogEvent is a model for a particular log message from a particular job.

When you output logs within a Python model, they are persisted as JobLogEvents.


classJobLogLevel


classJobLogList

bauplan.schema.JobLogList(
*,
events: List[bauplan.schema.JobLogEvent]
)-> None

EXPERIMENTAL AND SUBJECT TO CHANGE.

JobLogList is a model for all of the logs from a particular job. This model is primarily provided as a convenience for "common" interactions with a job's log messages.

def error_messages


classJobLogStream


classJobState


classNamespace

bauplan.schema.Namespace(
*,
name: str,
ref: Optional[Annotated[Union[bauplan.schema.Branch,
bauplan.schema.Tag,
bauplan.schema.DetachedRef],
FieldInfo(
annotation=NoneType,
required=True,
discriminator='type'
)]] = None
)-> None

classPartitionField

bauplan.schema.PartitionField(
*,
name: str,
transform: str
)-> None

classRef

bauplan.schema.Ref(
*,
name: str,
hash: str | None = None,
type: str | None = None
)-> None

A branch or a tag

Examples:

ref = Ref(name='main', hash='abc123')

Attributes

name(

str

)
The name of the branch or tag.
hash(

Optional[str]

)
The hash of the branch or tag. This is optional and may be None.
type(

Optional[str]

)
The type of the ref, either 'BRANCH', 'TAG', or 'DETACHED'.

def from_dict

Parameters

dataREQUIRED(Dict)

def from_string

Parameters

refREQUIRED(str)

classTable

bauplan.schema.Table(
*,
name: str,
namespace: str,
kind: str = 'TABLE'
)-> None

Bases: Entry


classTableField

bauplan.schema.TableField(
*,
id: int,
name: str,
required: bool,
type: str
)-> None

classTableWithMetadata

bauplan.schema.TableWithMetadata(
*,
name: str,
namespace: str,
kind: str = 'TABLE',
id: str,
records: Optional[int],
size: Optional[int],
last_updated_ms: int,
fields: List[bauplan.schema.TableField],
snapshots: Optional[int],
partitions: List[bauplan.schema.PartitionField],
metadata_location: str,
current_snapshot_id: Optional[int],
current_schema_id: Optional[int],
raw: Optional[Dict]
)-> None

Bases: Table


classTag

bauplan.schema.Tag(
*,
name: str,
hash: str | None = None,
type: Literal['TAG'] = 'TAG'
)-> None

Bases: Ref


def proto_datetime_to_py_datetime

Parameters

tsREQUIRED(protobuf.timestamp_pb2.Timestamp)