client
The Client module contains the main classes used to interact with the Arraylake service.
For asyncio interaction, use the AsyncClient
. For regular, non-async interaction, use the Client
.
Example usage:
from arraylake import Client
client = Client()
repo = client.get_repo("my-org/my-repo")
AsyncClient
Asyncio Client for interacting with ArrayLake
Arguments:
service_uri: [Optional] The service URI to target. token: [Optional] API token for service account authentication.
list_repos
async def list_repos(
org: str,
filter_metadata: RepoMetadataT | None = None) -> Sequence[RepoModel]
List all repositories for the specified org
Arguments:
org
- Name of the orgfilter_metadata
- Optional metadata to filter the repos by. If provided, only repos with the specified metadata will be returned. Filtering is inclusive and will return repos that match all of the provided metadata.
get_repo_object
async def get_repo_object(name: str) -> RepoModel
Get the repo configuration object.
See get_repo
for an instantiated repo.
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])
get_repo
async def get_repo(
name: str,
*,
checkout: Optional[bool] = None,
read_only: bool | None = None,
config: RepoConfig = None,
virtual_container_nicknames: dict[str, str] | None = None
) -> repo_v1.AsyncRepo | IcechunkRepository
Get a repo by name
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])checkout
- Automatically checkout the repo after instantiation. Defaults to True for V1 repos and False for icechunk repos.read_only
- Open the repo in read-only mode.config
- Optional config for the repo. For Icechunk repos, this is the RepositoryConfig. Config settings passed here will take precedence over the stored repo config when opening the repo.virtual_container_nicknames
- A dictionary mapping virtual chunk container names to bucket nicknames. Credentials will be fetched for these buckets based on the auth configuration and used for the virtual containers. Used for Icechunk repos only.
Returns:
A V1 AsyncRepo object or an IcechunkRepository object.
get_or_create_repo
async def get_or_create_repo(
name: str,
*,
checkout: Optional[bool] = None,
bucket_config_nickname: Optional[str] = None,
kind: Optional[RepoKind] = None,
prefix: Optional[str] = None,
import_existing: bool = False,
description: Optional[str] = None,
metadata: Optional[RepoMetadataT] = None,
config: RepoConfig = None,
virtual_container_nicknames: dict[str, str] | None = None
) -> repo_v1.AsyncRepo | IcechunkRepository
Get a repo by name. Create the repo if it doesn't already exist.
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])checkout
- Whether to checkout the repo after instantiation. If the repo does not exist, checkout is ignored. Ignored if specified for a Icechunk repo.bucket_config_nickname
- The created repo will use this bucket for its chunks. If the repo exists, bucket_config_nickname is ignored.kind
- The kind of repo to get or create e.g. Arraylake V1 or Icechunk V2prefix
- Optional prefix for Icechunk store. Only used for Icechunk V2 repos. If not provided, a random ID + the repo name will be used.import_existing
- If True, the Icechunk repo will be imported if it already exists.description
- Optional description for the repo.metadata
- Optional dictionary of metadata to tag the repo with. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars.config
- Optional config for the repo. For Icechunk repos, this is the RepositoryConfig. Config settings passed here will take precedence over the stored repo config when opening the repo.virtual_container_nicknames
- A dictionary mapping virtual chunk container names to bucket nicknames. Credentials will be fetched for these buckets based on the auth configuration and used for the virtual containers. Used for Icechunk repos only.
Returns:
A V1 AsyncRepo object or IcechunkRepository
create_repo
async def create_repo(
name: str,
*,
bucket_config_nickname: Optional[str] = None,
kind: Optional[RepoKind] = None,
prefix: Optional[str] = None,
import_existing: bool = False,
description: Optional[str] = None,
metadata: Optional[RepoMetadataT] = None,
config: RepoConfig = None,
virtual_container_nicknames: dict[str, str] | None = None
) -> repo_v1.AsyncRepo | IcechunkRepository
Create a new repo
Arguments:
name
- Full name of the repo to create (of the form [ORG]/[REPO])bucket_config_nickname
- An optional bucket to use for the chunkstorekind
- The kind of repo to get or create e.g. Arraylake V1 or Icechunk V2prefix
- Optional prefix for Icechunk store. Only used for Icechunk V2 repos. If not provided, a random ID + the repo name will be used.import_existing
- If True, the Icechunk repo will be imported if it already exists.description
- Optional description for the repo.metadata
- Optional dictionary of metadata to tag the repo with. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars.config
- Optional config for the repo. For Icechunk repos, this is the RepositoryConfig, and the config will be saved alongside the repo upon creation.virtual_container_nicknames
- A dictionary mapping virtual chunk container names to bucket nicknames. Credentials will be fetched for these buckets based on the auth configuration and used for the virtual containers. Used for Icechunk repos only.
get_icechunk_storage
async def get_icechunk_storage(name: str, credentials_override=None)
Gets the icechunk storage object for the repo.
Arguments:
repo_name
- Full name of the repo (of the form [ORG]/[REPO]) credentials_override: Optional credentials to use for the storage object. If not provided, the credentials will be fetched from the bucket config.
Returns:
icechunk.Storage object for the repo.
get_icechunk_container_credentials_from_bucket
async def get_icechunk_container_credentials_from_bucket(
org: str, bucket_config_nickname: str)
Get the icechunk virtual container credentials for a given bucket.
Arguments:
org
- The organization the bucket belongs to.bucket_config_nickname
- Nickname of the bucket to get credentials for.
Returns:
icechunk.Credentials.S3
- The icechunk virtual chunk credentials for the bucket.
containers_credentials_for_buckets
async def containers_credentials_for_buckets(
org: str,
containers_to_buckets_map: dict[str, str] = {},
**kwargs: str)
Builds a map of credentials for icechunk virtual chunk containers from the provided bucket nicknames and calls icechunk.containers_credentials on this mapping.
Arguments:
org
- The organization the buckets belong to. containers_to_buckets_map: A dictionary mapping virtual chunk container names to bucket nicknames.
Returns:
A dictionary mapping container names to icechunk virtual chunk credentials.
modify_repo
async def modify_repo(name: str,
description: Optional[str] = None,
add_metadata: Optional[RepoMetadataT] = None,
remove_metadata: Optional[list[str]] = None,
update_metadata: Optional[RepoMetadataT] = None) -> None
Modify a repo's metadata or description.
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])description
- Optional description for the repo.add_metadata
- Optional dictionary of metadata to add to the repo. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars. Cannot use if the key already exists in the metadata.remove_metadata
- List of metadata keys to remove from the repo.update_metadata
- Optional dictionary of metadata to update on the repo. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars.
delete_repo
async def delete_repo(name: str,
*,
imsure: bool = False,
imreallysure: bool = False) -> None
Delete a repo
Arguments:
name
- Full name of the repo to delete (of the form [ORG]/[REPO]) imsure, imreallysure: confirm you intend to delete this bucket config
create_bucket_config
async def create_bucket_config(
*,
org: str,
nickname: str,
uri: str,
extra_config: dict | None = None,
auth_config: dict | None = None) -> BucketResponse
Create a new bucket config entry
NOTE: This does not create any actual buckets in the object store.
Arguments:
org
- Name of the orgnickname
- bucket nickname (example: ours3-bucket`)uri
- The URI of the object store, of the form platform://bucket_name[/prefix].extra_config
- dictionary of additional config to set on bucket configauth_config
- dictionary of auth parameters, must include "method" key, default is{"method": "anonymous"}
set_default_bucket_config
async def set_default_bucket_config(*, org: str, nickname: str) -> None
Set the organization's default bucket for any new repos
Arguments:
nickname
- Nickname of the bucket config to set as default.
get_bucket_config
async def get_bucket_config(*, org: str, nickname: str) -> BucketResponse
Get a bucket's configuration
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket config to retrieve.
list_bucket_configs
async def list_bucket_configs(org: str) -> list[BucketResponse]
List all bucket config entries
Arguments:
org
- Name of the organization.
list_repos_for_bucket_config
async def list_repos_for_bucket_config(*, org: str,
nickname: str) -> list[RepoModel]
List repos using a given bucket
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket configuration.
delete_bucket_config
async def delete_bucket_config(*,
org: str,
nickname: str,
imsure: bool = False,
imreallysure: bool = False) -> None
Delete a bucket config entry
NOTE: If a bucket config is in use by one or more repos, it cannot be deleted. This does not actually delete any buckets in the object store.
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket config to delete. imsure, imreallysure: confirm you intend to delete this bucket config
login
async def login(*, browser: bool = False) -> None
Login to ArrayLake
Arguments:
org
- Name of the org (only required if your default organization uses SSO)browser
- if True, open the browser to the login page
logout
async def logout() -> None
Log out of ArrayLake
Arguments:
org
- Name of the org (only required if your default organization uses SSO)browser
- if True, open the browser to the logout page
get_api_client_from_token
async def get_api_client_from_token(org: str, token: str) -> ApiClientResponse
Fetch the user corresponding to the provided token
get_permission_check
async def get_permission_check(org: str, principal_id: str, resource: str,
action: OrgActions | RepoActions) -> bool
Verify whether the provided principal has permission to perform the action against the resource
get_services
def get_services(org: str) -> AsyncComputeClient
Get the compute client services for the given org.
Arguments:
org
- Name of the org
Client
Client for interacting with ArrayLake.
Arguments:
service_uri
str - [Optional] The service URI to target.token
str - [Optional] API token for service account authentication.
list_repos
def list_repos(
org: str,
filter_metadata: RepoMetadataT | None = None) -> Sequence[RepoModel]
List all repositories for the specified org
Arguments:
org
- Name of the orgfilter_metadata
- Optional metadata to filter the repos by. If provided, only repos with the specified metadata will be returned. Filtering is inclusive and will return repos that match all of the provided metadata.
get_repo
def get_repo(
name: str,
*,
checkout: Optional[bool] = None,
read_only: bool | None = None,
config: RepoConfig = None,
virtual_container_nicknames: dict[str, str] | None = None
) -> repo_v1.Repo | IcechunkRepository
Get a repo by name
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])checkout
- Automatically checkout the repo after instantiation. Ignored if specified for a Icechunk repo.read_only
- Open the repo in read-only mode.config
- Optional config for the repo. For Icechunk repos, this is the RepositoryConfig. Config settings passed here will take precedence over the stored repo config when opening the repo.virtual_container_nicknames
- A dictionary mapping virtual chunk container names to bucket nicknames. Credentials will be fetched for these buckets based on the auth configuration and used for the virtual containers. Used for Icechunk repos only.
get_or_create_repo
def get_or_create_repo(
name: str,
*,
checkout: Optional[bool] = None,
bucket_config_nickname: Optional[str] = None,
kind: Optional[RepoKind] = None,
prefix: Optional[str] = None,
import_existing: bool = False,
description: Optional[str] = None,
metadata: Optional[RepoMetadataT] = None,
config: RepoConfig = None,
virtual_container_nicknames: dict[str, str] | None = None
) -> repo_v1.Repo | IcechunkRepository
Get a repo by name. Create the repo if it doesn't already exist.
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])checkout
- Whether to checkout the repo after instantiation. If the repo does not exist, checkout is ignored. Ignored if specified for a Icechunk repo.bucket_config_nickname
- The created repo will use this bucket for its chunks. If the repo exists, bucket_config_nickname is ignored.kind
- The kind of repo to get or create e.g. Arraylake V1 or Icechunk V2prefix
- Optional prefix for Icechunk store. Only used for Icechunk repos. If not provided, a random ID + the repo name will be used.import_existing
- If True, the Icechunk repo will be imported if it already exists.description
- Optional description for the repo.metadata
- Optional dictionary of metadata to tag the repo with. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars.config
- Optional config for the repo. For Icechunk repos, this is the RepositoryConfig. Config settings passed here will take precedence over the stored repo config when opening the repo. When creating a new repo, the config will be saved alongside the repo.virtual_container_nicknames
- A dictionary mapping virtual chunk container names to bucket nicknames. Credentials will be fetched for these buckets based on the auth configuration and used for the virtual containers. Used for Icechunk repos only.
create_repo
def create_repo(
name: str,
*,
bucket_config_nickname: Optional[str] = None,
kind: Optional[RepoKind] = None,
prefix: Optional[str] = None,
import_existing: bool = False,
description: Optional[str] = None,
metadata: Optional[RepoMetadataT] = None,
config: RepoConfig = None,
virtual_container_nicknames: dict[str, str] | None = None
) -> repo_v1.Repo | IcechunkRepository
Create a new repo
Arguments:
name
- Full name of the repo to create (of the form [ORG]/[REPO])bucket_config_nickname
- An optional bucket to use for the chunkstorekind
- the kind of repo to create (v1
oricechunk
)prefix
- Optional prefix for Icechunk store. Only used for Icechunk V2 repos. If not provided, a random ID + the repo name will be used.import_existing
- If True, the Icechunk repo will be imported if it already exists.description
- Optional description for the repo.metadata
- Optional dictionary of metadata to tag the repo with. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars.config
- Optional config for the repo. For Icechunk repos, this is the RepositoryConfig, and the config will be saved alongside the repo upon creation.virtual_container_nicknames
- A dictionary mapping virtual chunk container names to bucket nicknames. Credentials will be fetched for these buckets based on the auth configuration and used for the virtual containers. Used for Icechunk repos only.
get_icechunk_storage
def get_icechunk_storage(name: str, *, credentials_override=None)
Gets the icechunk storage object for the repo.
Example usage:
from arraylake import Client
client = Client()
storage = client.get_icechunk_storage("my-org/my-repo")
icechunk.Repository.exists(storage)
icechunk.Repository.fetch_config(storage)
repo = icechunk.Repository.open(storage)
Arguments:
repo_name
- Full name of the repo (of the form [ORG]/[REPO])credentials_override
- Optional credentials to use for the storage object. If not provided, the credentials will be fetched from the bucket config.
Returns:
icechunk.Storage object for the repo.
get_icechunk_container_credentials_from_bucket
def get_icechunk_container_credentials_from_bucket(
org: str, bucket_config_nickname: str)
Get the icechunk virtual chunk credentials for a given bucket.
Arguments:
org
- The organization the bucket belongs to.bucket_config_nickname
- Nickname of the bucket to get credentials for.
Returns:
icechunk.Credentials.S3
- The icechunk virtual chunk credentials for the bucket.
containers_credentials_for_buckets
def containers_credentials_for_buckets(
org: str,
containers_to_buckets_map: dict[str, str] = {},
**kwargs: str) -> dict[str, S3Credentials]
Builds a map of credentials for icechunk virtual chunk containers from the provided bucket nicknames and calls icechunk.containers_credentials on this mapping.
Example usage:
import icechunk as ic
from arraylake import Client
client = Client()
storage = client.get_icechunk_storage("my-org/my-repo")
config = ic.Repository.fetch_config(storage)
container_names = [container.name for container in config.virtual_chunk_containers()]
container_creds = client.containers_credentials_for_buckets("my-org", conatiner_name="my-bucket")
repo = ic.Repository.open(storage, config=config, virtual_chunk_credentials=container_creds)
Arguments:
org
- The organization the bucket belongs to. containers_to_buckets_map: A dictionary mapping virtual chunk container names to bucket nicknames.
Returns:
A dictionary mapping container names to icechunk virtual chunk credentials.
modify_repo
def modify_repo(name: str,
description: Optional[str] = None,
add_metadata: Optional[RepoMetadataT] = None,
remove_metadata: Optional[list[str]] = None,
update_metadata: Optional[RepoMetadataT] = None) -> None
Modify a repo's metadata or description.
Arguments:
name
- Full name of the repo (of the form [ORG]/[REPO])description
- Optional description for the repo.add_metadata
- Optional dictionary of metadata to add to the repo. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars. Cannot use if the key already exists in the metadata.remove_metadata
- List of metadata keys to remove from the repo.update_metadata
- Optional dictionary of metadata to update on the repo. Dictionary values can be a scalar (string, int, float, bool, or None) or a list of scalars.
delete_repo
def delete_repo(name: str,
*,
imsure: bool = False,
imreallysure: bool = False) -> None
Delete a repo
Arguments:
name
- Full name of the repo to delete (of the form [ORG]/[REPO])
create_bucket_config
def create_bucket_config(*,
org: str,
nickname: str,
uri: str,
extra_config: dict | None = None,
auth_config: dict | None = None) -> BucketResponse
Create a new bucket config entry
NOTE: This does not create any actual buckets in the object store.
Arguments:
org
- Name of the orgnickname
- bucket nickname (example: our-s3-bucket)uri
- The URI of the object store, of the form platform://bucket_name[/prefix].extra_config
- dictionary of additional config to set on bucket configauth_config
- dictionary of auth parameters, must include "method" key, default is{"method": "anonymous"}
set_default_bucket_config
def set_default_bucket_config(*, org: str, nickname: str) -> None
Set the organization's default bucket config for any new repos
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket config to set as default.
get_bucket_config
def get_bucket_config(*, org: str, nickname: str) -> BucketResponse
Get a bucket's configuration
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket config to retrieve.
list_bucket_configs
def list_bucket_configs(org: str) -> list[BucketResponse]
List all buckets for the specified org
Arguments:
org
- Name of the org
list_repos_for_bucket_config
def list_repos_for_bucket_config(
*, org: str, nickname: str) -> list[repo_v1.Repo | IcechunkRepository]
List repos using a given bucket config
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket.
delete_bucket_config
def delete_bucket_config(*,
org: str,
nickname: str,
imsure: bool = False,
imreallysure: bool = False) -> None
Delete a bucket config entry
NOTE: If a bucket config is in use by one or more repos, it cannot be deleted. This does not actually delete any buckets in the object store.
Arguments:
org
- Name of the orgnickname
- Nickname of the bucket config to delete. imsure, imreallysure: confirm you intend to delete this bucket config
login
def login(*, browser: bool = False) -> None
Login to ArrayLake
Arguments:
org
- Name of the org (only required if your default organization uses SSO)browser
- if True, open the browser to the login page
logout
def logout() -> None
Log out of ArrayLake
Arguments:
org
- Name of the org (only required if your default organization uses SSO)browser
- if True, open the browser to the logout page
get_services
def get_services(org: str) -> ComputeClient
Get the compute client services for the given org.
Arguments:
org
- Name of the org