Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@
History
=======

1.1.5 (2026-06-29)
------------------

* Added support for DataMasque deployments on Snowpark Container Services (SPCS):

* Added ``spcs_pat`` to ``DataMasqueInstanceConfig`` for authenticating through the SPCS app gateway.
* Added ``SpcsGatewayAuthError``, raised when the gateway rejects the PAT.
* Added the ``spcs`` option to ``SnowflakeStageLocation``.
* Made several ``SnowflakeConnectionConfig`` fields optional, since SPCS-staged connections leave them unset.

1.1.3 (2026-06-26)
------------------

Expand Down
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ Authentication is performed on the first request if ``authenticate()`` is not ca
and is automatically retried once on a 401 response.
``client.healthcheck()`` is available as a lightweight readiness probe that does not consume credentials.

For a DataMasque instance hosted on Snowpark Container Services (SPCS)
(a ``*.snowflakecomputing.app`` ``base_url``),
pass a Snowflake Programmatic Access Token as ``spcs_pat`` on ``DataMasqueInstanceConfig``.
See the `usage docs <https://datamasque-python.readthedocs.io/en/latest/usage.html>`_ for details.

Error handling
==============

Expand Down
3 changes: 3 additions & 0 deletions datamasque/client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
DataMasqueTransportError,
)
from datamasque.client.models.dm_instance import DataMasqueInstanceConfig
from datamasque.client.spcs import install_spcs_gateway_auth

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -137,6 +138,8 @@ def __init__(self, connection_config: DataMasqueInstanceConfig) -> None:
self.verify_ssl = connection_config.verify_ssl
self.token_source = connection_config.token_source
self._session = _build_session(self.verify_ssl)
if connection_config.spcs_pat:
install_spcs_gateway_auth(self._session, connection_config.spcs_pat)

@contextmanager
def _maybe_suppress_insecure_warning(self) -> Iterator[None]:
Expand Down
16 changes: 16 additions & 0 deletions datamasque/client/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,22 @@ class IfmAuthError(DataMasqueIfmError):
"""Raised when the IFM client cannot obtain or refresh a JWT (e.g. invalid credentials, missing scope)."""


class SpcsGatewayAuthError(DataMasqueException):
"""
Raised when a Snowflake SPCS app gateway rejects the configured `spcs_pat`.

The message includes the Snowflake-provided detail, request id,
and a hint at the likely cause
(for example an expired token, or a network policy that excludes your IP).

Deliberately a direct subclass of `DataMasqueException` rather than
`DataMasqueApiError`:
the client's 401 re-authenticate-and-retry path keys off `DataMasqueApiError`,
so keeping this outside that subtree
ensures a gateway rejection aborts immediately instead of looping.
"""


class RunNotCancellableError(DataMasqueUserError):
"""
Raised when `cancel_run` is called against a run that is no longer eligible for cancellation.
Expand Down
13 changes: 9 additions & 4 deletions datamasque/client/models/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ class SnowflakeStageLocation(str, Enum):
local = "local" # Not supported for production use
aws_s3 = "aws_s3"
azure_blob_storage = "azure_blob_storage"
spcs = "spcs" # DataMasque running inside Snowflake SPCS; staged on the container's own storage


class SseSelection(Enum):
Expand Down Expand Up @@ -233,10 +234,14 @@ class SnowflakeConnectionConfig(ConnectionConfig):
"""

database: str
user: str
snowflake_account_id: str
snowflake_warehouse: str
snowflake_storage_integration_name: str
# Optional because DataMasque-in-SPCS connections leave these unset: the agent uses the
# container's OAuth token + SNOWFLAKE_HOST/SNOWFLAKE_ACCOUNT env and the app-owned QUERY_WAREHOUSE,
# so user/account/storage-integration/warehouse are null for stage_location=spcs. Mirrors the app's
# canonical model (agent .../schemas/connection/connection.py), which types these `| None = None`.
user: Optional[str] = None
snowflake_account_id: Optional[str] = None
snowflake_warehouse: Optional[str] = None
snowflake_storage_integration_name: Optional[str] = None
host: str = ""
port: Optional[int] = None
db_schema: Optional[str] = Field(default=None, alias="schema")
Expand Down
13 changes: 13 additions & 0 deletions datamasque/client/models/dm_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ class DataMasqueInstanceConfig(BaseModel):
the client prepends it with `Token ` when sending the `Authorization` header.
The client calls `token_source` on each authentication attempt,
so the callable is free to fetch and refresh tokens out-of-band (e.g. from a secrets manager).

`spcs_pat` is an optional Snowflake Programmatic Access Token
for reaching a DataMasque instance hosted on Snowpark Container Services (SPCS).
It layers underneath whichever DataMasque auth method
(`password` or `token_source`) you choose.
"""

model_config = ConfigDict(arbitrary_types_allowed=True)
Expand All @@ -29,6 +34,14 @@ class DataMasqueInstanceConfig(BaseModel):
password: Optional[str] = None
verify_ssl: bool = True
token_source: Optional[Callable[[], str]] = None
spcs_pat: Optional[str] = None
"""Snowflake Programmatic Access Token
for a DataMasque instance hosted on Snowpark Container Services (SPCS),
where `base_url` ends in `.snowflakecomputing.app`.

Create the token in Snowsight (User profile → Programmatic access tokens)
for an account that can reach the SPCS app.
Leave unset for instances that are not hosted on SPCS."""

@model_validator(mode="after")
def _validate_auth_source(self) -> "DataMasqueInstanceConfig":
Expand Down
179 changes: 179 additions & 0 deletions datamasque/client/spcs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
"""
Snowflake SPCS app gateway authentication for `DataMasqueClient`.

When a DataMasque instance is hosted on Snowpark Container Services (SPCS),
its app ingress (`*.snowflakecomputing.app`) fronts every request
with a Snowflake gateway that must be cleared first.
We authenticate to the gateway with a Programmatic Access Token (PAT),
sent on `X-SF-SPCS-Authorization: Snowflake Token="<PAT>"`.
The gateway accepts the PAT on this alternate header
and strips it before forwarding to the container,
so DataMasque's own `Authorization: Token <key>` flow rides through untouched.

`install_spcs_gateway_auth` attaches this behaviour to a client's `requests.Session`:
it sets the header on the session
(so it is sent on every request, including the unauthenticated login)
and registers a response hook
that turns a gateway-originated rejection into a clear `SpcsGatewayAuthError`.
"""

import re
from typing import Any, Optional

import requests

from datamasque.client.exceptions import SpcsGatewayAuthError

SPCS_GATEWAY_AUTH_HEADER = "X-SF-SPCS-Authorization"

# Body-shape discriminators for SPCS gateway error responses.
# The gateway emits JSON with `responseType` (ERROR_<UPPER_SNAKE>), `requestId`
# (canonical UUID), and `detail` (free text). All three must be present and
# match these patterns for the body to count as gateway-originated.
_GATEWAY_RESPONSE_TYPE_RE = re.compile(r"^ERROR_[A-Z][A-Z0-9_]+$")
_UUID_RE = re.compile(
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
re.IGNORECASE,
)

# Header-shape discriminators for "this response transited a Snowflake SPCS
# gateway". The Server header and the `sfc-ss-` cookie name prefix both appear
# on every gateway-handled response (success and error alike) and aren't
# plausible to spoof by accident.
_SPCS_GATEWAY_SERVER_VALUE = "_"
_SPCS_COOKIE_PREFIX = "sfc-ss-"


def _has_spcs_gateway_header_signature(response: requests.Response) -> bool:
"""
True if at least one header-level Snowflake gateway marker is present.

Looks for either `Server: _` (the gateway's literal Server header value)
or any `Set-Cookie` carrying the `sfc-ss-` cookie name prefix.
Either is sufficient — both indicate the response was emitted by,
or transited, Snowflake's SPCS ingress.
"""
if response.headers.get("server", "").strip() == _SPCS_GATEWAY_SERVER_VALUE:
return True
# `Set-Cookie` may appear multiple times; `requests` flattens duplicates
# via a comma-separated value in `.headers`, but our prefix substring
# check is order- and count-insensitive.
return _SPCS_COOKIE_PREFIX in response.headers.get("set-cookie", "")


def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]:
"""
Return the parsed body iff it is a structurally-valid gateway error.

All four conditions must hold:
1. The body parses as JSON and is a dict.
2. Keys `responseType`, `requestId`, `detail` are all present and string-typed.
3. `responseType` matches `^ERROR_<UPPER_SNAKE>$`.
4. `requestId` is a canonical 8-4-4-4-12 UUID.

Returns the parsed dict (truthy) on match, `None` on miss.
"""
try:
data = response.json()
except ValueError:
return None
if not isinstance(data, dict):
return None
response_type = data.get("responseType")
request_id = data.get("requestId")
detail = data.get("detail")
if not (isinstance(response_type, str) and isinstance(request_id, str) and isinstance(detail, str)):
return None
if _GATEWAY_RESPONSE_TYPE_RE.match(response_type) and _UUID_RE.match(request_id):
return data
return None


def _hint_for_gateway_detail(detail: str) -> str:
"""Map common Snowflake gateway `detail` strings to a one-line cause hint."""
d = (detail or "").lower()
if "network policy" in d:
return (
"PAT requires a network policy attached to the user (or account) "
"that permits your current public IP. Run `CREATE NETWORK POLICY "
"... ALLOWED_IP_LIST = ('<your.ip>/32')` and `ALTER USER <pat-user> "
"SET NETWORK_POLICY = <policy>`."
)
if "invalid" in d and "token" in d:
return (
"PAT is malformed, expired, or revoked. Create a new PAT in Snowsight "
"(User profile -> Programmatic access tokens) and update `spcs_pat`."
)
if "expired" in d:
return "PAT has expired. Create a fresh one in Snowsight and update `spcs_pat`."
if "authentication" in d or "unauthorized" in d:
return (
"Generic auth rejection. Verify the PAT was created by a user that "
"has access to this SPCS app, and that any account-level network "
"policy includes your current public IP."
)
return "Unknown gateway rejection — see the Snowflake `detail` string above and the Snowflake PAT docs."


def _check_spcs_gateway_response(response: requests.Response) -> None:
"""
Raise `SpcsGatewayAuthError` iff `response` is a gateway-originated rejection.

Two-layer discriminator — both must hold:
* **Body originated at the gateway**:
strict shape match on the JSON body
(multiple fields, typed, with format constraints)
via `_is_spcs_gateway_error_body`.
* **Response transited an SPCS gateway**:
header signature confirms via `_has_spcs_gateway_header_signature`.

Either layer alone could in principle false-positive on an unrelated upstream
that happened to emit one of those signals;
the conjunction is what makes the check robust.
Legitimate DataMasque 401s (DRF `{"detail": "..."}`)
have the gateway header signature but fail the body shape —
so they correctly flow through
to the client's normal re-auth-and-retry path untouched.
"""
if response.status_code not in (401, 403):
return
if not _has_spcs_gateway_header_signature(response):
return
data = _is_spcs_gateway_error_body(response)
if data is None:
return

response_type = data["responseType"]
request_id = data["requestId"]
detail = data["detail"]
hint = _hint_for_gateway_detail(detail)
raise SpcsGatewayAuthError(
f"SPCS gateway rejected the PAT (HTTP {response.status_code}, "
f"{response_type}). The request never reached DataMasque.\n"
f' Snowflake said: "{detail}"\n'
f" Snowflake reqId: {request_id}\n"
f" Likely cause: {hint}\n"
f" Fix in Snowsight on the account hosting this SPCS app, then retry."
)


def _spcs_gateway_response_hook(response: requests.Response, *args: Any, **kwargs: Any) -> None:
"""`requests` response hook: raise on a gateway-originated auth rejection."""
_check_spcs_gateway_response(response)


def install_spcs_gateway_auth(session: requests.Session, pat: str) -> None:
"""
Configure `session` to authenticate to a Snowflake SPCS app gateway.

Sets the `X-SF-SPCS-Authorization` header on the session
(so it rides on every request, including the unauthenticated login)
and registers a response hook
that raises `SpcsGatewayAuthError` on a gateway rejection.

Scoping is automatic:
the client's session only ever talks to its own `base_url`,
so there is no need to match per-request hosts.
"""
session.headers[SPCS_GATEWAY_AUTH_HEADER] = f'Snowflake Token="{pat}"'
session.hooks["response"].append(_spcs_gateway_response_hook)
8 changes: 8 additions & 0 deletions docs/client.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,14 @@ datamasque.client.files module
:undoc-members:
:show-inheritance:

datamasque.client.spcs module
-----------------------------

.. automodule:: datamasque.client.spcs
:members:
:undoc-members:
:show-inheritance:

datamasque.client.ifm module
----------------------------

Expand Down
28 changes: 28 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,31 @@ To use DataMasque Python in a project:

for connection in client.list_connections():
print(connection.name)

Connecting to an SPCS-hosted instance
=====================================

When DataMasque is hosted on Snowpark Container Services (SPCS),
its `base_url` ends in `.snowflakecomputing.app`
and requests must first clear the Snowflake gateway.
Pass a Snowflake Programmatic Access Token (PAT) as `spcs_pat`
and the client clears the gateway for you,
independently of your DataMasque `username`/`password` (or `token_source`) auth.

.. code-block:: python

config = DataMasqueInstanceConfig(
base_url="https://my-app.snowflakecomputing.app",
username="api_user",
password="api_password",
spcs_pat="<snowflake-programmatic-access-token>",
)
client = DataMasqueClient(config)
client.authenticate()

Create the PAT in Snowsight (User profile → Programmatic access tokens)
for an account that can reach the SPCS app.
If the gateway rejects the PAT
(for example it has expired, or a network policy excludes your IP),
the client raises `SpcsGatewayAuthError`
with the Snowflake-provided detail and a hint at the likely cause.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "datamasque-python"
version = "1.1.3"
version = "1.1.5"
description = "Official Python client for the DataMasque data-masking API."
authors = [
{ name = "DataMasque Ltd" },
Expand Down
30 changes: 30 additions & 0 deletions tests/test_connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,6 +952,36 @@ def test_snowflake_connection_model_validate_with_stage_location():
assert conn.password is None


def test_snowflake_connection_model_validate_with_spcs_stage_location():
"""
A Snowflake connection staged in SPCS must deserialise (regression for ui-testing MR !185).

When DataMasque runs inside Snowflake SPCS it saves connections with
`snowflake_stage_location=spcs`. Listing connections deserialises every
one, so an unknown stage value used to raise `ValidationError` and break
`create_or_update_connection` for unrelated connections on a shared instance.
"""
payload = {
"id": "a1b2c3d4-0000-0000-0000-000000000000",
"name": "snowflake_spcs",
"mask_type": "database",
"db_type": "snowflake",
"user": "snowman",
"database": "icicle",
"snowflake_account_id": "ABCDEF-123456",
"snowflake_warehouse": "warehouse1",
"snowflake_storage_integration_name": "mysi",
"snowflake_stage_location": "spcs",
}

conn = SnowflakeConnectionConfig.model_validate(payload)

assert conn.snowflake_stage_location is SnowflakeStageLocation.spcs
# SPCS staging carries no external-storage fields.
assert conn.s3_bucket_name is None
assert conn.snowflake_azure_container_name is None


def test_snowflake_connection_model_validate_without_stage_location():
payload = {
"id": "id-3",
Expand Down
Loading