diff --git a/HISTORY.rst b/HISTORY.rst index 8b68ea0..65f9f87 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,16 @@ History ======= +1.1.5 (2026-06-29) +------------------ + +* Added support for DataMasque deployments on Snowpark Container Services (SPCS): + + * Added ``spcs_pat`` to ``DataMasqueInstanceConfig`` for authenticating through the SPCS app gateway. + * Added ``SpcsGatewayAuthError``, raised when the gateway rejects the PAT. + * Added the ``spcs`` option to ``SnowflakeStageLocation``. + * Made several ``SnowflakeConnectionConfig`` fields optional, since SPCS-staged connections leave them unset. + 1.1.3 (2026-06-26) ------------------ diff --git a/README.rst b/README.rst index ace7e0b..c512312 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,11 @@ Authentication is performed on the first request if ``authenticate()`` is not ca and is automatically retried once on a 401 response. ``client.healthcheck()`` is available as a lightweight readiness probe that does not consume credentials. +For a DataMasque instance hosted on Snowpark Container Services (SPCS) +(a ``*.snowflakecomputing.app`` ``base_url``), +pass a Snowflake Programmatic Access Token as ``spcs_pat`` on ``DataMasqueInstanceConfig``. +See the `usage docs `_ for details. + Error handling ============== diff --git a/datamasque/client/base.py b/datamasque/client/base.py index ffd726a..9aa3bea 100644 --- a/datamasque/client/base.py +++ b/datamasque/client/base.py @@ -21,6 +21,7 @@ DataMasqueTransportError, ) from datamasque.client.models.dm_instance import DataMasqueInstanceConfig +from datamasque.client.spcs import install_spcs_gateway_auth logger = logging.getLogger(__name__) @@ -137,6 +138,8 @@ def __init__(self, connection_config: DataMasqueInstanceConfig) -> None: self.verify_ssl = connection_config.verify_ssl self.token_source = connection_config.token_source self._session = _build_session(self.verify_ssl) + if connection_config.spcs_pat: + install_spcs_gateway_auth(self._session, connection_config.spcs_pat) @contextmanager def _maybe_suppress_insecure_warning(self) -> Iterator[None]: diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py index 39c90fa..9aa6db0 100644 --- a/datamasque/client/exceptions.py +++ b/datamasque/client/exceptions.py @@ -84,6 +84,22 @@ class IfmAuthError(DataMasqueIfmError): """Raised when the IFM client cannot obtain or refresh a JWT (e.g. invalid credentials, missing scope).""" +class SpcsGatewayAuthError(DataMasqueException): + """ + Raised when a Snowflake SPCS app gateway rejects the configured `spcs_pat`. + + The message includes the Snowflake-provided detail, request id, + and a hint at the likely cause + (for example an expired token, or a network policy that excludes your IP). + + Deliberately a direct subclass of `DataMasqueException` rather than + `DataMasqueApiError`: + the client's 401 re-authenticate-and-retry path keys off `DataMasqueApiError`, + so keeping this outside that subtree + ensures a gateway rejection aborts immediately instead of looping. + """ + + class RunNotCancellableError(DataMasqueUserError): """ Raised when `cancel_run` is called against a run that is no longer eligible for cancellation. diff --git a/datamasque/client/models/connection.py b/datamasque/client/models/connection.py index a287ac1..eaa9c09 100644 --- a/datamasque/client/models/connection.py +++ b/datamasque/client/models/connection.py @@ -56,6 +56,7 @@ class SnowflakeStageLocation(str, Enum): local = "local" # Not supported for production use aws_s3 = "aws_s3" azure_blob_storage = "azure_blob_storage" + spcs = "spcs" # DataMasque running inside Snowflake SPCS; staged on the container's own storage class SseSelection(Enum): @@ -233,10 +234,14 @@ class SnowflakeConnectionConfig(ConnectionConfig): """ database: str - user: str - snowflake_account_id: str - snowflake_warehouse: str - snowflake_storage_integration_name: str + # Optional because DataMasque-in-SPCS connections leave these unset: the agent uses the + # container's OAuth token + SNOWFLAKE_HOST/SNOWFLAKE_ACCOUNT env and the app-owned QUERY_WAREHOUSE, + # so user/account/storage-integration/warehouse are null for stage_location=spcs. Mirrors the app's + # canonical model (agent .../schemas/connection/connection.py), which types these `| None = None`. + user: Optional[str] = None + snowflake_account_id: Optional[str] = None + snowflake_warehouse: Optional[str] = None + snowflake_storage_integration_name: Optional[str] = None host: str = "" port: Optional[int] = None db_schema: Optional[str] = Field(default=None, alias="schema") diff --git a/datamasque/client/models/dm_instance.py b/datamasque/client/models/dm_instance.py index 5026134..7256981 100644 --- a/datamasque/client/models/dm_instance.py +++ b/datamasque/client/models/dm_instance.py @@ -20,6 +20,11 @@ class DataMasqueInstanceConfig(BaseModel): the client prepends it with `Token ` when sending the `Authorization` header. The client calls `token_source` on each authentication attempt, so the callable is free to fetch and refresh tokens out-of-band (e.g. from a secrets manager). + + `spcs_pat` is an optional Snowflake Programmatic Access Token + for reaching a DataMasque instance hosted on Snowpark Container Services (SPCS). + It layers underneath whichever DataMasque auth method + (`password` or `token_source`) you choose. """ model_config = ConfigDict(arbitrary_types_allowed=True) @@ -29,6 +34,14 @@ class DataMasqueInstanceConfig(BaseModel): password: Optional[str] = None verify_ssl: bool = True token_source: Optional[Callable[[], str]] = None + spcs_pat: Optional[str] = None + """Snowflake Programmatic Access Token + for a DataMasque instance hosted on Snowpark Container Services (SPCS), + where `base_url` ends in `.snowflakecomputing.app`. + + Create the token in Snowsight (User profile → Programmatic access tokens) + for an account that can reach the SPCS app. + Leave unset for instances that are not hosted on SPCS.""" @model_validator(mode="after") def _validate_auth_source(self) -> "DataMasqueInstanceConfig": diff --git a/datamasque/client/spcs.py b/datamasque/client/spcs.py new file mode 100644 index 0000000..c7caf42 --- /dev/null +++ b/datamasque/client/spcs.py @@ -0,0 +1,179 @@ +""" +Snowflake SPCS app gateway authentication for `DataMasqueClient`. + +When a DataMasque instance is hosted on Snowpark Container Services (SPCS), +its app ingress (`*.snowflakecomputing.app`) fronts every request +with a Snowflake gateway that must be cleared first. +We authenticate to the gateway with a Programmatic Access Token (PAT), +sent on `X-SF-SPCS-Authorization: Snowflake Token=""`. +The gateway accepts the PAT on this alternate header +and strips it before forwarding to the container, +so DataMasque's own `Authorization: Token ` flow rides through untouched. + +`install_spcs_gateway_auth` attaches this behaviour to a client's `requests.Session`: +it sets the header on the session +(so it is sent on every request, including the unauthenticated login) +and registers a response hook +that turns a gateway-originated rejection into a clear `SpcsGatewayAuthError`. +""" + +import re +from typing import Any, Optional + +import requests + +from datamasque.client.exceptions import SpcsGatewayAuthError + +SPCS_GATEWAY_AUTH_HEADER = "X-SF-SPCS-Authorization" + +# Body-shape discriminators for SPCS gateway error responses. +# The gateway emits JSON with `responseType` (ERROR_), `requestId` +# (canonical UUID), and `detail` (free text). All three must be present and +# match these patterns for the body to count as gateway-originated. +_GATEWAY_RESPONSE_TYPE_RE = re.compile(r"^ERROR_[A-Z][A-Z0-9_]+$") +_UUID_RE = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", + re.IGNORECASE, +) + +# Header-shape discriminators for "this response transited a Snowflake SPCS +# gateway". The Server header and the `sfc-ss-` cookie name prefix both appear +# on every gateway-handled response (success and error alike) and aren't +# plausible to spoof by accident. +_SPCS_GATEWAY_SERVER_VALUE = "_" +_SPCS_COOKIE_PREFIX = "sfc-ss-" + + +def _has_spcs_gateway_header_signature(response: requests.Response) -> bool: + """ + True if at least one header-level Snowflake gateway marker is present. + + Looks for either `Server: _` (the gateway's literal Server header value) + or any `Set-Cookie` carrying the `sfc-ss-` cookie name prefix. + Either is sufficient — both indicate the response was emitted by, + or transited, Snowflake's SPCS ingress. + """ + if response.headers.get("server", "").strip() == _SPCS_GATEWAY_SERVER_VALUE: + return True + # `Set-Cookie` may appear multiple times; `requests` flattens duplicates + # via a comma-separated value in `.headers`, but our prefix substring + # check is order- and count-insensitive. + return _SPCS_COOKIE_PREFIX in response.headers.get("set-cookie", "") + + +def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]: + """ + Return the parsed body iff it is a structurally-valid gateway error. + + All four conditions must hold: + 1. The body parses as JSON and is a dict. + 2. Keys `responseType`, `requestId`, `detail` are all present and string-typed. + 3. `responseType` matches `^ERROR_$`. + 4. `requestId` is a canonical 8-4-4-4-12 UUID. + + Returns the parsed dict (truthy) on match, `None` on miss. + """ + try: + data = response.json() + except ValueError: + return None + if not isinstance(data, dict): + return None + response_type = data.get("responseType") + request_id = data.get("requestId") + detail = data.get("detail") + if not (isinstance(response_type, str) and isinstance(request_id, str) and isinstance(detail, str)): + return None + if _GATEWAY_RESPONSE_TYPE_RE.match(response_type) and _UUID_RE.match(request_id): + return data + return None + + +def _hint_for_gateway_detail(detail: str) -> str: + """Map common Snowflake gateway `detail` strings to a one-line cause hint.""" + d = (detail or "").lower() + if "network policy" in d: + return ( + "PAT requires a network policy attached to the user (or account) " + "that permits your current public IP. Run `CREATE NETWORK POLICY " + "... ALLOWED_IP_LIST = ('/32')` and `ALTER USER " + "SET NETWORK_POLICY = `." + ) + if "invalid" in d and "token" in d: + return ( + "PAT is malformed, expired, or revoked. Create a new PAT in Snowsight " + "(User profile -> Programmatic access tokens) and update `spcs_pat`." + ) + if "expired" in d: + return "PAT has expired. Create a fresh one in Snowsight and update `spcs_pat`." + if "authentication" in d or "unauthorized" in d: + return ( + "Generic auth rejection. Verify the PAT was created by a user that " + "has access to this SPCS app, and that any account-level network " + "policy includes your current public IP." + ) + return "Unknown gateway rejection — see the Snowflake `detail` string above and the Snowflake PAT docs." + + +def _check_spcs_gateway_response(response: requests.Response) -> None: + """ + Raise `SpcsGatewayAuthError` iff `response` is a gateway-originated rejection. + + Two-layer discriminator — both must hold: + * **Body originated at the gateway**: + strict shape match on the JSON body + (multiple fields, typed, with format constraints) + via `_is_spcs_gateway_error_body`. + * **Response transited an SPCS gateway**: + header signature confirms via `_has_spcs_gateway_header_signature`. + + Either layer alone could in principle false-positive on an unrelated upstream + that happened to emit one of those signals; + the conjunction is what makes the check robust. + Legitimate DataMasque 401s (DRF `{"detail": "..."}`) + have the gateway header signature but fail the body shape — + so they correctly flow through + to the client's normal re-auth-and-retry path untouched. + """ + if response.status_code not in (401, 403): + return + if not _has_spcs_gateway_header_signature(response): + return + data = _is_spcs_gateway_error_body(response) + if data is None: + return + + response_type = data["responseType"] + request_id = data["requestId"] + detail = data["detail"] + hint = _hint_for_gateway_detail(detail) + raise SpcsGatewayAuthError( + f"SPCS gateway rejected the PAT (HTTP {response.status_code}, " + f"{response_type}). The request never reached DataMasque.\n" + f' Snowflake said: "{detail}"\n' + f" Snowflake reqId: {request_id}\n" + f" Likely cause: {hint}\n" + f" Fix in Snowsight on the account hosting this SPCS app, then retry." + ) + + +def _spcs_gateway_response_hook(response: requests.Response, *args: Any, **kwargs: Any) -> None: + """`requests` response hook: raise on a gateway-originated auth rejection.""" + _check_spcs_gateway_response(response) + + +def install_spcs_gateway_auth(session: requests.Session, pat: str) -> None: + """ + Configure `session` to authenticate to a Snowflake SPCS app gateway. + + Sets the `X-SF-SPCS-Authorization` header on the session + (so it rides on every request, including the unauthenticated login) + and registers a response hook + that raises `SpcsGatewayAuthError` on a gateway rejection. + + Scoping is automatic: + the client's session only ever talks to its own `base_url`, + so there is no need to match per-request hosts. + """ + session.headers[SPCS_GATEWAY_AUTH_HEADER] = f'Snowflake Token="{pat}"' + session.hooks["response"].append(_spcs_gateway_response_hook) diff --git a/docs/client.rst b/docs/client.rst index e8c7764..87180d0 100644 --- a/docs/client.rst +++ b/docs/client.rst @@ -60,6 +60,14 @@ datamasque.client.files module :undoc-members: :show-inheritance: +datamasque.client.spcs module +----------------------------- + +.. automodule:: datamasque.client.spcs + :members: + :undoc-members: + :show-inheritance: + datamasque.client.ifm module ---------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 0d92cfe..b43f9c3 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -19,3 +19,31 @@ To use DataMasque Python in a project: for connection in client.list_connections(): print(connection.name) + +Connecting to an SPCS-hosted instance +===================================== + +When DataMasque is hosted on Snowpark Container Services (SPCS), +its `base_url` ends in `.snowflakecomputing.app` +and requests must first clear the Snowflake gateway. +Pass a Snowflake Programmatic Access Token (PAT) as `spcs_pat` +and the client clears the gateway for you, +independently of your DataMasque `username`/`password` (or `token_source`) auth. + +.. code-block:: python + + config = DataMasqueInstanceConfig( + base_url="https://my-app.snowflakecomputing.app", + username="api_user", + password="api_password", + spcs_pat="", + ) + client = DataMasqueClient(config) + client.authenticate() + +Create the PAT in Snowsight (User profile → Programmatic access tokens) +for an account that can reach the SPCS app. +If the gateway rejects the PAT +(for example it has expired, or a network policy excludes your IP), +the client raises `SpcsGatewayAuthError` +with the Snowflake-provided detail and a hint at the likely cause. diff --git a/pyproject.toml b/pyproject.toml index 3ff0444..d892caf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datamasque-python" -version = "1.1.3" +version = "1.1.5" description = "Official Python client for the DataMasque data-masking API." authors = [ { name = "DataMasque Ltd" }, diff --git a/tests/test_connections.py b/tests/test_connections.py index 757ff5f..29386c0 100644 --- a/tests/test_connections.py +++ b/tests/test_connections.py @@ -952,6 +952,36 @@ def test_snowflake_connection_model_validate_with_stage_location(): assert conn.password is None +def test_snowflake_connection_model_validate_with_spcs_stage_location(): + """ + A Snowflake connection staged in SPCS must deserialise (regression for ui-testing MR !185). + + When DataMasque runs inside Snowflake SPCS it saves connections with + `snowflake_stage_location=spcs`. Listing connections deserialises every + one, so an unknown stage value used to raise `ValidationError` and break + `create_or_update_connection` for unrelated connections on a shared instance. + """ + payload = { + "id": "a1b2c3d4-0000-0000-0000-000000000000", + "name": "snowflake_spcs", + "mask_type": "database", + "db_type": "snowflake", + "user": "snowman", + "database": "icicle", + "snowflake_account_id": "ABCDEF-123456", + "snowflake_warehouse": "warehouse1", + "snowflake_storage_integration_name": "mysi", + "snowflake_stage_location": "spcs", + } + + conn = SnowflakeConnectionConfig.model_validate(payload) + + assert conn.snowflake_stage_location is SnowflakeStageLocation.spcs + # SPCS staging carries no external-storage fields. + assert conn.s3_bucket_name is None + assert conn.snowflake_azure_container_name is None + + def test_snowflake_connection_model_validate_without_stage_location(): payload = { "id": "id-3", diff --git a/tests/test_spcs.py b/tests/test_spcs.py new file mode 100644 index 0000000..b57b752 --- /dev/null +++ b/tests/test_spcs.py @@ -0,0 +1,157 @@ +"""Tests for Snowflake SPCS app gateway auth (`spcs_pat` on the client).""" + +from unittest.mock import patch + +import pytest +import requests_mock + +from datamasque.client import DataMasqueClient +from datamasque.client.exceptions import SpcsGatewayAuthError +from datamasque.client.models.dm_instance import DataMasqueInstanceConfig +from datamasque.client.spcs import ( + SPCS_GATEWAY_AUTH_HEADER, + _hint_for_gateway_detail, +) + +BASE_URL = "https://my-app.snowflakecomputing.app" +PAT = "PAT123" +EXPECTED_HEADER = 'Snowflake Token="PAT123"' +VALID_UUID = "12345678-1234-1234-1234-123456789abc" + +# Headers/body that together mark a response as a gateway-originated rejection. +GATEWAY_HEADERS = {"Server": "_"} + + +def _gateway_error_body(detail="Invalid token", response_type="ERROR_INVALID_TOKEN"): + return {"responseType": response_type, "requestId": VALID_UUID, "detail": detail} + + +@pytest.fixture +def spcs_config(): + return DataMasqueInstanceConfig( + base_url=BASE_URL, + username="api_user", + password="api_password", + spcs_pat=PAT, + ) + + +@pytest.fixture +def spcs_client(spcs_config): + client = DataMasqueClient(spcs_config) + client.token = "Token dm-token" # pretend we're already authenticated with DM + return client + + +def test_spcs_header_present_on_authenticated_request(spcs_client): + with requests_mock.Mocker() as m: + m.get(f"{BASE_URL}/api/anything/", json={}, status_code=200) + spcs_client.make_request("GET", "/api/anything/") + + assert m.last_request.headers[SPCS_GATEWAY_AUTH_HEADER] == EXPECTED_HEADER + # DM's own auth header rides alongside, untouched. + assert m.last_request.headers["Authorization"] == "Token dm-token" + + +def test_spcs_header_present_on_login_request(spcs_config): + """The header must ride on the unauthenticated login POST too (it must clear the gateway).""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + client.authenticate() + + assert m.last_request.headers[SPCS_GATEWAY_AUTH_HEADER] == EXPECTED_HEADER + # Login is unauthenticated — no DM Authorization header on this request. + assert "Authorization" not in m.last_request.headers + + +def test_no_spcs_pat_means_no_header_and_no_hook(client): + """The default client (no spcs_pat) is entirely unaffected.""" + assert SPCS_GATEWAY_AUTH_HEADER not in client._session.headers + assert client._session.hooks["response"] == [] + + client.token = "Token dm-token" + with requests_mock.Mocker() as m: + m.get("http://test-server/api/anything/", json={}, status_code=200) + client.make_request("GET", "/api/anything/") + assert SPCS_GATEWAY_AUTH_HEADER not in m.last_request.headers + + +def test_gateway_401_raises_and_does_not_retry(spcs_config): + """A gateway rejection aborts immediately — no re-auth, no retry loop.""" + with patch.object(DataMasqueClient, "authenticate") as mock_auth: + client = DataMasqueClient(spcs_config) + client.token = "Token dm-token" + with requests_mock.Mocker() as m: + # A single response: a second call would 404 and fail the test loudly. + m.get( + f"{BASE_URL}/api/anything/", + json=_gateway_error_body(), + status_code=401, + headers=GATEWAY_HEADERS, + ) + with pytest.raises(SpcsGatewayAuthError) as exc: + client.make_request("GET", "/api/anything/") + + assert m.call_count == 1 + mock_auth.assert_not_called() + # The helpful hint is surfaced. + assert "PAT is malformed, expired, or revoked" in str(exc.value) + assert VALID_UUID in str(exc.value) + + +def test_normal_dm_401_still_retries(spcs_config): + """A genuine DataMasque 401 (no gateway signature) flows to the normal re-auth retry.""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + m.get( + f"{BASE_URL}/api/anything/", + [ + # DRF-shaped 401, no gateway Server header → not a gateway rejection. + {"json": {"detail": "Authentication credentials were not provided."}, "status_code": 401}, + {"json": {"ok": True}, "status_code": 200}, + ], + ) + # Should NOT raise SpcsGatewayAuthError; should re-auth and succeed. + response = client.make_request("GET", "/api/anything/") + + assert response.status_code == 200 + assert client.token == "Token k" # re-auth happened + + +def test_gateway_signature_without_body_shape_passes_through(spcs_config): + """Gateway header present but DM-shaped body → treated as a normal DM 401, not a gateway rejection.""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + m.get( + f"{BASE_URL}/api/anything/", + [ + {"json": {"detail": "nope"}, "status_code": 401, "headers": GATEWAY_HEADERS}, + {"json": {"ok": True}, "status_code": 200}, + ], + ) + response = client.make_request("GET", "/api/anything/") # must not raise + + assert response.status_code == 200 + + +@pytest.mark.parametrize( + "detail, expected_substring", + [ + ("Request failed network policy check", "network policy"), + ("Invalid token supplied", "malformed, expired, or revoked"), + ("The token has expired", "PAT has expired"), + ("Unauthorized request", "Generic auth rejection"), + ("Something totally unexpected", "Unknown gateway rejection"), + ], +) +def test_hint_for_gateway_detail(detail, expected_substring): + assert expected_substring in _hint_for_gateway_detail(detail) + + +def test_spcs_pat_coexists_with_password_and_token_source(): + """`spcs_pat` is orthogonal to the password/token_source XOR — both combos validate.""" + DataMasqueInstanceConfig(base_url=BASE_URL, username="u", password="p", spcs_pat=PAT) + DataMasqueInstanceConfig(base_url=BASE_URL, username="u", token_source=lambda: "t", spcs_pat=PAT) diff --git a/uv.lock b/uv.lock index 294561e..38ff7b9 100644 --- a/uv.lock +++ b/uv.lock @@ -428,7 +428,7 @@ toml = [ [[package]] name = "datamasque-python" -version = "1.1.3" +version = "1.1.4" source = { editable = "." } dependencies = [ { name = "pydantic" },