From 6474840afa179362fc07823f050daa100678dc1a Mon Sep 17 00:00:00 2001 From: Beau Butler Date: Fri, 26 Jun 2026 11:07:12 +1200 Subject: [PATCH 1/5] feat: add spcs_pat for Snowflake SPCS gateway auth --- HISTORY.rst | 9 ++ README.rst | 10 ++ datamasque/client/base.py | 3 + datamasque/client/exceptions.py | 16 +++ datamasque/client/models/dm_instance.py | 18 +++ datamasque/client/spcs.py | 176 ++++++++++++++++++++++++ docs/client.rst | 8 ++ docs/usage.rst | 28 ++++ pyproject.toml | 2 +- tests/test_spcs.py | 157 +++++++++++++++++++++ uv.lock | 2 +- 11 files changed, 427 insertions(+), 2 deletions(-) create mode 100644 datamasque/client/spcs.py create mode 100644 tests/test_spcs.py diff --git a/HISTORY.rst b/HISTORY.rst index 8b68ea0..ccac819 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,6 +2,15 @@ History ======= +1.1.4 (2026-06-29) +------------------ + +* Added ``spcs_pat`` to ``DataMasqueInstanceConfig`` for authenticating when + DataMasque is in Snowflake SPCS (Native App). +* Added ``SpcsGatewayAuthError``, raised when the SPCS gateway rejects the PAT + before the request reaches DataMasque (with the Snowflake detail and a hint at + the likely cause). + 1.1.3 (2026-06-26) ------------------ diff --git a/README.rst b/README.rst index ace7e0b..2e6353b 100644 --- a/README.rst +++ b/README.rst @@ -42,6 +42,13 @@ Authentication is performed on the first request if ``authenticate()`` is not ca and is automatically retried once on a 401 response. ``client.healthcheck()`` is available as a lightweight readiness probe that does not consume credentials. +For a DataMasque instance hosted behind Snowflake SPCS (Snowpark Container Services) app ingress +(a ``*.snowflakecomputing.app`` ``base_url``), +pass a Snowflake Programmatic Access Token as ``spcs_pat`` on ``DataMasqueInstanceConfig``; +the client sends it on the ``X-SF-SPCS-Authorization`` header to clear the Snowflake gateway, +which strips it before forwarding so your DataMasque auth is unaffected. +See the `usage docs `_ for details. + Error handling ============== @@ -60,6 +67,9 @@ All methods raise subclasses of ``DataMasqueException`` on failure: raised by ``start_masking_run`` when the server rejects the run. - ``DataMasqueUserError`` — raised by user-management methods when the input is invalid. +- ``SpcsGatewayAuthError`` — + raised when a Snowflake SPCS app gateway rejects the configured ``spcs_pat`` + before the request reaches DataMasque. Documentation ============= diff --git a/datamasque/client/base.py b/datamasque/client/base.py index ffd726a..9aa3bea 100644 --- a/datamasque/client/base.py +++ b/datamasque/client/base.py @@ -21,6 +21,7 @@ DataMasqueTransportError, ) from datamasque.client.models.dm_instance import DataMasqueInstanceConfig +from datamasque.client.spcs import install_spcs_gateway_auth logger = logging.getLogger(__name__) @@ -137,6 +138,8 @@ def __init__(self, connection_config: DataMasqueInstanceConfig) -> None: self.verify_ssl = connection_config.verify_ssl self.token_source = connection_config.token_source self._session = _build_session(self.verify_ssl) + if connection_config.spcs_pat: + install_spcs_gateway_auth(self._session, connection_config.spcs_pat) @contextmanager def _maybe_suppress_insecure_warning(self) -> Iterator[None]: diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py index 39c90fa..a7ed612 100644 --- a/datamasque/client/exceptions.py +++ b/datamasque/client/exceptions.py @@ -84,6 +84,22 @@ class IfmAuthError(DataMasqueIfmError): """Raised when the IFM client cannot obtain or refresh a JWT (e.g. invalid credentials, missing scope).""" +class SpcsGatewayAuthError(DataMasqueException): + """ + Raised when a Snowflake SPCS app gateway rejects the configured ``spcs_pat``. + + Only relevant when the client is configured with ``spcs_pat`` for an + instance behind Snowflake SPCS app ingress. The message includes the + Snowflake-provided detail, request id, and a hint at the likely cause + (e.g. an expired PAT or a network policy that excludes your IP). + + Deliberately a direct subclass of `DataMasqueException` rather than + `DataMasqueApiError`: the client's 401 re-authenticate-and-retry path keys + off `DataMasqueApiError`/HTTP status, so keeping this outside that subtree + ensures a gateway rejection aborts immediately instead of looping. + """ + + class RunNotCancellableError(DataMasqueUserError): """ Raised when `cancel_run` is called against a run that is no longer eligible for cancellation. diff --git a/datamasque/client/models/dm_instance.py b/datamasque/client/models/dm_instance.py index 5026134..321adc8 100644 --- a/datamasque/client/models/dm_instance.py +++ b/datamasque/client/models/dm_instance.py @@ -20,6 +20,14 @@ class DataMasqueInstanceConfig(BaseModel): the client prepends it with `Token ` when sending the `Authorization` header. The client calls `token_source` on each authentication attempt, so the callable is free to fetch and refresh tokens out-of-band (e.g. from a secrets manager). + + `spcs_pat` is an optional Snowflake Programmatic Access Token for reaching a + DataMasque instance hosted behind Snowflake SPCS (Snowpark Container Services) + app ingress, where `base_url` ends in `.snowflakecomputing.app`. It is sent on + every request via the `X-SF-SPCS-Authorization` header to clear the Snowflake + gateway, which strips it before forwarding — so it is independent of, and + layers underneath, whichever DataMasque auth method (`password` or + `token_source`) you choose. """ model_config = ConfigDict(arbitrary_types_allowed=True) @@ -29,6 +37,16 @@ class DataMasqueInstanceConfig(BaseModel): password: Optional[str] = None verify_ssl: bool = True token_source: Optional[Callable[[], str]] = None + spcs_pat: Optional[str] = None + """Snowflake Programmatic Access Token for a DataMasque instance hosted behind + Snowflake SPCS app ingress (a ``*.snowflakecomputing.app`` ``base_url``). + + Mint the PAT in Snowsight (User profile → Programmatic access tokens) for an + account that can reach the SPCS app. The client sends it on the + ``X-SF-SPCS-Authorization`` header so the Snowflake gateway lets the request + through to DataMasque; the gateway strips the header before forwarding, leaving + DataMasque's own ``Authorization`` flow untouched. Leave it unset for + instances that are not behind an SPCS gateway.""" @model_validator(mode="after") def _validate_auth_source(self) -> "DataMasqueInstanceConfig": diff --git a/datamasque/client/spcs.py b/datamasque/client/spcs.py new file mode 100644 index 0000000..900566d --- /dev/null +++ b/datamasque/client/spcs.py @@ -0,0 +1,176 @@ +""" +Snowflake SPCS app gateway authentication for :class:`DataMasqueClient`. + +When a DataMasque instance is hosted behind Snowflake SPCS (Snowpark Container +Services) app ingress (``*.snowflakecomputing.app``), every request must first +clear the Snowflake gateway. We authenticate to the gateway with a Programmatic +Access Token (PAT) sent on ``X-SF-SPCS-Authorization: Snowflake Token=""``. +The gateway accepts the PAT on this alternate header and strips it before +forwarding to the container, so DataMasque's own ``Authorization: Token `` +flow rides through untouched. + +:func:`install_spcs_gateway_auth` attaches this behaviour to a client's +``requests.Session``: it sets the header on the session (so it is sent on every +request, including the unauthenticated login) and registers a response hook that +turns a gateway-originated rejection into a clear :class:`SpcsGatewayAuthError`. +""" + +import re +from typing import Any, Optional + +import requests + +from datamasque.client.exceptions import SpcsGatewayAuthError + +SPCS_GATEWAY_AUTH_HEADER = "X-SF-SPCS-Authorization" + +# Body-shape discriminators for SPCS gateway error responses. +# The gateway emits JSON with `responseType` (ERROR_), `requestId` +# (canonical UUID), and `detail` (free text). All three must be present and +# match these patterns for the body to count as gateway-originated. +_GATEWAY_RESPONSE_TYPE_RE = re.compile(r"^ERROR_[A-Z][A-Z0-9_]+$") +_UUID_RE = re.compile( + r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", + re.IGNORECASE, +) + +# Header-shape discriminators for "this response transited a Snowflake SPCS +# gateway". The Server header and the `sfc-ss-` cookie name prefix both appear +# on every gateway-handled response (success and error alike) and aren't +# plausible to spoof by accident. +_SPCS_GATEWAY_SERVER_VALUE = "_" +_SPCS_COOKIE_PREFIX = "sfc-ss-" + + +def _has_spcs_gateway_header_signature(response: requests.Response) -> bool: + """ + True if at least one header-level Snowflake gateway marker is present. + + Looks for either ``Server: _`` (the gateway's literal Server header value) + or any ``Set-Cookie`` carrying the ``sfc-ss-`` cookie name prefix. Either is + sufficient — both indicate the response was emitted by, or transited, + Snowflake's SPCS ingress. + """ + if response.headers.get("server", "").strip() == _SPCS_GATEWAY_SERVER_VALUE: + return True + # `Set-Cookie` may appear multiple times; `requests` flattens duplicates + # via a comma-separated value in `.headers`, but our prefix substring + # check is order- and count-insensitive. + if _SPCS_COOKIE_PREFIX in response.headers.get("set-cookie", ""): + return True + return False + + +def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]: + """ + Return the parsed body iff it is a structurally-valid gateway error. + + All four conditions must hold: + 1. The body parses as JSON and is a dict. + 2. Keys ``responseType``, ``requestId``, ``detail`` are all present and string-typed. + 3. ``responseType`` matches ``^ERROR_$``. + 4. ``requestId`` is a canonical 8-4-4-4-12 UUID. + + Returns the parsed dict (truthy) on match, ``None`` on miss. + """ + try: + data = response.json() + except ValueError: + return None + if not isinstance(data, dict): + return None + response_type = data.get("responseType") + request_id = data.get("requestId") + detail = data.get("detail") + if not (isinstance(response_type, str) and isinstance(request_id, str) and isinstance(detail, str)): + return None + if not _GATEWAY_RESPONSE_TYPE_RE.match(response_type): + return None + if not _UUID_RE.match(request_id): + return None + return data + + +def _hint_for_gateway_detail(detail: str) -> str: + """Map common Snowflake gateway ``detail`` strings to a one-line cause hint.""" + d = (detail or "").lower() + if "network policy" in d: + return ( + "PAT requires a network policy attached to the user (or account) " + "that permits your current public IP. Run `CREATE NETWORK POLICY " + "... ALLOWED_IP_LIST = ('/32')` and `ALTER USER " + "SET NETWORK_POLICY = `." + ) + if "invalid" in d and "token" in d: + return ( + "PAT is malformed, expired, or revoked. Re-mint a PAT in Snowsight " + "(User profile -> Programmatic access tokens) and update `spcs_pat`." + ) + if "expired" in d: + return "PAT has expired. Mint a fresh one in Snowsight and update `spcs_pat`." + if "authentication" in d or "unauthorized" in d: + return ( + "Generic auth rejection. Verify the PAT was minted by a user that " + "has access to this SPCS app, and that any account-level network " + "policy includes your current public IP." + ) + return "Unknown gateway rejection — see the Snowflake `detail` string above and the Snowflake PAT docs." + + +def _check_spcs_gateway_response(response: requests.Response) -> None: + """ + Raise :class:`SpcsGatewayAuthError` iff ``response`` is a gateway-originated rejection. + + Two-layer discriminator — both must hold: + * **Body originated at the gateway**: strict shape match on the JSON body + (multiple fields, typed, with format constraints) via + :func:`_is_spcs_gateway_error_body`. + * **Response transited an SPCS gateway**: header signature confirms via + :func:`_has_spcs_gateway_header_signature`. + + Either layer alone could in principle false-positive on an unrelated + upstream that happened to emit one of those signals; the conjunction is what + makes the check robust. Legitimate DataMasque 401s (DRF ``{"detail": "..."}``) + have the gateway header signature but fail the body shape — so they correctly + flow through to the client's normal re-auth-and-retry path untouched. + """ + if response.status_code not in (401, 403): + return + if not _has_spcs_gateway_header_signature(response): + return + data = _is_spcs_gateway_error_body(response) + if data is None: + return + + response_type = data["responseType"] + request_id = data["requestId"] + detail = data["detail"] + hint = _hint_for_gateway_detail(detail) + raise SpcsGatewayAuthError( + f"SPCS gateway rejected the PAT (HTTP {response.status_code}, " + f"{response_type}). The request never reached DataMasque.\n" + f" Snowflake said: {detail!r}\n" + f" Snowflake reqId: {request_id}\n" + f" Likely cause: {hint}\n" + f" Fix in Snowsight on the account hosting this SPCS app, then retry." + ) + + +def _spcs_gateway_response_hook(response: requests.Response, *args: Any, **kwargs: Any) -> None: + """``requests`` response hook: raise on a gateway-originated auth rejection.""" + _check_spcs_gateway_response(response) + + +def install_spcs_gateway_auth(session: requests.Session, pat: str) -> None: + """ + Configure ``session`` to authenticate to a Snowflake SPCS app gateway. + + Sets the ``X-SF-SPCS-Authorization`` header on the session (so it rides on + every request, including the unauthenticated login) and registers a response + hook that raises :class:`SpcsGatewayAuthError` on a gateway rejection. + + Scoping is automatic: the client's session only ever talks to its own + ``base_url``, so there is no need to match per-request hosts. + """ + session.headers[SPCS_GATEWAY_AUTH_HEADER] = f'Snowflake Token="{pat}"' + session.hooks["response"].append(_spcs_gateway_response_hook) diff --git a/docs/client.rst b/docs/client.rst index e8c7764..87180d0 100644 --- a/docs/client.rst +++ b/docs/client.rst @@ -60,6 +60,14 @@ datamasque.client.files module :undoc-members: :show-inheritance: +datamasque.client.spcs module +----------------------------- + +.. automodule:: datamasque.client.spcs + :members: + :undoc-members: + :show-inheritance: + datamasque.client.ifm module ---------------------------- diff --git a/docs/usage.rst b/docs/usage.rst index 0d92cfe..e6d9438 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -19,3 +19,31 @@ To use DataMasque Python in a project: for connection in client.list_connections(): print(connection.name) + +Connecting to an SPCS-hosted instance +===================================== + +When DataMasque is hosted behind Snowflake SPCS (Snowpark Container Services) +app ingress, its ``base_url`` ends in ``.snowflakecomputing.app`` and every +request must first clear the Snowflake gateway. Pass a Snowflake Programmatic +Access Token (PAT) as ``spcs_pat`` and the client sends it on the +``X-SF-SPCS-Authorization`` header automatically; the gateway strips that header +before forwarding, so your DataMasque ``username``/``password`` (or +``token_source``) auth is unaffected. + +.. code-block:: python + + config = DataMasqueInstanceConfig( + base_url="https://my-app.snowflakecomputing.app", + username="api_user", + password="api_password", + spcs_pat="", + ) + client = DataMasqueClient(config) + client.authenticate() + +Mint the PAT in Snowsight (User profile → Programmatic access tokens) for an +account that can reach the SPCS app. If the gateway rejects the PAT (for example +it has expired, or a network policy excludes your IP), the client raises +``SpcsGatewayAuthError`` with the Snowflake-provided detail and a hint at the +likely cause. diff --git a/pyproject.toml b/pyproject.toml index 3ff0444..97636ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datamasque-python" -version = "1.1.3" +version = "1.1.4" description = "Official Python client for the DataMasque data-masking API." authors = [ { name = "DataMasque Ltd" }, diff --git a/tests/test_spcs.py b/tests/test_spcs.py new file mode 100644 index 0000000..b57b752 --- /dev/null +++ b/tests/test_spcs.py @@ -0,0 +1,157 @@ +"""Tests for Snowflake SPCS app gateway auth (`spcs_pat` on the client).""" + +from unittest.mock import patch + +import pytest +import requests_mock + +from datamasque.client import DataMasqueClient +from datamasque.client.exceptions import SpcsGatewayAuthError +from datamasque.client.models.dm_instance import DataMasqueInstanceConfig +from datamasque.client.spcs import ( + SPCS_GATEWAY_AUTH_HEADER, + _hint_for_gateway_detail, +) + +BASE_URL = "https://my-app.snowflakecomputing.app" +PAT = "PAT123" +EXPECTED_HEADER = 'Snowflake Token="PAT123"' +VALID_UUID = "12345678-1234-1234-1234-123456789abc" + +# Headers/body that together mark a response as a gateway-originated rejection. +GATEWAY_HEADERS = {"Server": "_"} + + +def _gateway_error_body(detail="Invalid token", response_type="ERROR_INVALID_TOKEN"): + return {"responseType": response_type, "requestId": VALID_UUID, "detail": detail} + + +@pytest.fixture +def spcs_config(): + return DataMasqueInstanceConfig( + base_url=BASE_URL, + username="api_user", + password="api_password", + spcs_pat=PAT, + ) + + +@pytest.fixture +def spcs_client(spcs_config): + client = DataMasqueClient(spcs_config) + client.token = "Token dm-token" # pretend we're already authenticated with DM + return client + + +def test_spcs_header_present_on_authenticated_request(spcs_client): + with requests_mock.Mocker() as m: + m.get(f"{BASE_URL}/api/anything/", json={}, status_code=200) + spcs_client.make_request("GET", "/api/anything/") + + assert m.last_request.headers[SPCS_GATEWAY_AUTH_HEADER] == EXPECTED_HEADER + # DM's own auth header rides alongside, untouched. + assert m.last_request.headers["Authorization"] == "Token dm-token" + + +def test_spcs_header_present_on_login_request(spcs_config): + """The header must ride on the unauthenticated login POST too (it must clear the gateway).""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + client.authenticate() + + assert m.last_request.headers[SPCS_GATEWAY_AUTH_HEADER] == EXPECTED_HEADER + # Login is unauthenticated — no DM Authorization header on this request. + assert "Authorization" not in m.last_request.headers + + +def test_no_spcs_pat_means_no_header_and_no_hook(client): + """The default client (no spcs_pat) is entirely unaffected.""" + assert SPCS_GATEWAY_AUTH_HEADER not in client._session.headers + assert client._session.hooks["response"] == [] + + client.token = "Token dm-token" + with requests_mock.Mocker() as m: + m.get("http://test-server/api/anything/", json={}, status_code=200) + client.make_request("GET", "/api/anything/") + assert SPCS_GATEWAY_AUTH_HEADER not in m.last_request.headers + + +def test_gateway_401_raises_and_does_not_retry(spcs_config): + """A gateway rejection aborts immediately — no re-auth, no retry loop.""" + with patch.object(DataMasqueClient, "authenticate") as mock_auth: + client = DataMasqueClient(spcs_config) + client.token = "Token dm-token" + with requests_mock.Mocker() as m: + # A single response: a second call would 404 and fail the test loudly. + m.get( + f"{BASE_URL}/api/anything/", + json=_gateway_error_body(), + status_code=401, + headers=GATEWAY_HEADERS, + ) + with pytest.raises(SpcsGatewayAuthError) as exc: + client.make_request("GET", "/api/anything/") + + assert m.call_count == 1 + mock_auth.assert_not_called() + # The helpful hint is surfaced. + assert "PAT is malformed, expired, or revoked" in str(exc.value) + assert VALID_UUID in str(exc.value) + + +def test_normal_dm_401_still_retries(spcs_config): + """A genuine DataMasque 401 (no gateway signature) flows to the normal re-auth retry.""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + m.get( + f"{BASE_URL}/api/anything/", + [ + # DRF-shaped 401, no gateway Server header → not a gateway rejection. + {"json": {"detail": "Authentication credentials were not provided."}, "status_code": 401}, + {"json": {"ok": True}, "status_code": 200}, + ], + ) + # Should NOT raise SpcsGatewayAuthError; should re-auth and succeed. + response = client.make_request("GET", "/api/anything/") + + assert response.status_code == 200 + assert client.token == "Token k" # re-auth happened + + +def test_gateway_signature_without_body_shape_passes_through(spcs_config): + """Gateway header present but DM-shaped body → treated as a normal DM 401, not a gateway rejection.""" + client = DataMasqueClient(spcs_config) + with requests_mock.Mocker() as m: + m.post(f"{BASE_URL}/api/auth/token/login/", json={"key": "k"}, status_code=200) + m.get( + f"{BASE_URL}/api/anything/", + [ + {"json": {"detail": "nope"}, "status_code": 401, "headers": GATEWAY_HEADERS}, + {"json": {"ok": True}, "status_code": 200}, + ], + ) + response = client.make_request("GET", "/api/anything/") # must not raise + + assert response.status_code == 200 + + +@pytest.mark.parametrize( + "detail, expected_substring", + [ + ("Request failed network policy check", "network policy"), + ("Invalid token supplied", "malformed, expired, or revoked"), + ("The token has expired", "PAT has expired"), + ("Unauthorized request", "Generic auth rejection"), + ("Something totally unexpected", "Unknown gateway rejection"), + ], +) +def test_hint_for_gateway_detail(detail, expected_substring): + assert expected_substring in _hint_for_gateway_detail(detail) + + +def test_spcs_pat_coexists_with_password_and_token_source(): + """`spcs_pat` is orthogonal to the password/token_source XOR — both combos validate.""" + DataMasqueInstanceConfig(base_url=BASE_URL, username="u", password="p", spcs_pat=PAT) + DataMasqueInstanceConfig(base_url=BASE_URL, username="u", token_source=lambda: "t", spcs_pat=PAT) diff --git a/uv.lock b/uv.lock index 294561e..38ff7b9 100644 --- a/uv.lock +++ b/uv.lock @@ -428,7 +428,7 @@ toml = [ [[package]] name = "datamasque-python" -version = "1.1.3" +version = "1.1.4" source = { editable = "." } dependencies = [ { name = "pydantic" }, From ca61d124752e5dacc3b2ca4ae69233da0e8151d2 Mon Sep 17 00:00:00 2001 From: Beau Butler Date: Fri, 26 Jun 2026 12:02:11 +1200 Subject: [PATCH 2/5] feat: add spcs to SnowflakeStageLocation DataMasque running inside Snowflake SPCS saves connections with `snowflake_stage_location=spcs`, a value the client's enum did not model. Because `create_or_update_connection` lists and deserialises every connection to find a match, a single SPCS-staged Snowflake connection on a shared instance made the client raise `ValidationError` during setup of any other connection. Add `spcs` to `SnowflakeStageLocation` (no associated storage fields; the container stages on its own storage). Server-side validation already covers the per-stage required fields, so no client-side validator change is needed. Lets ui-testing retract the create_connection workaround in MR !185 (DM-3656). --- HISTORY.rst | 4 ++++ datamasque/client/models/connection.py | 1 + tests/test_connections.py | 30 ++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index ccac819..f22eac3 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -10,6 +10,10 @@ History * Added ``SpcsGatewayAuthError``, raised when the SPCS gateway rejects the PAT before the request reaches DataMasque (with the Snowflake detail and a hint at the likely cause). +* Added ``spcs`` to ``SnowflakeStageLocation`` so connections staged inside + Snowflake SPCS deserialise correctly. Previously, listing connections on an + instance that held an SPCS-staged Snowflake connection raised a + ``ValidationError`` on the unknown stage value. 1.1.3 (2026-06-26) ------------------ diff --git a/datamasque/client/models/connection.py b/datamasque/client/models/connection.py index a287ac1..8cb6279 100644 --- a/datamasque/client/models/connection.py +++ b/datamasque/client/models/connection.py @@ -56,6 +56,7 @@ class SnowflakeStageLocation(str, Enum): local = "local" # Not supported for production use aws_s3 = "aws_s3" azure_blob_storage = "azure_blob_storage" + spcs = "spcs" # DataMasque running inside Snowflake SPCS; staged on the container's own storage class SseSelection(Enum): diff --git a/tests/test_connections.py b/tests/test_connections.py index 757ff5f..29386c0 100644 --- a/tests/test_connections.py +++ b/tests/test_connections.py @@ -952,6 +952,36 @@ def test_snowflake_connection_model_validate_with_stage_location(): assert conn.password is None +def test_snowflake_connection_model_validate_with_spcs_stage_location(): + """ + A Snowflake connection staged in SPCS must deserialise (regression for ui-testing MR !185). + + When DataMasque runs inside Snowflake SPCS it saves connections with + `snowflake_stage_location=spcs`. Listing connections deserialises every + one, so an unknown stage value used to raise `ValidationError` and break + `create_or_update_connection` for unrelated connections on a shared instance. + """ + payload = { + "id": "a1b2c3d4-0000-0000-0000-000000000000", + "name": "snowflake_spcs", + "mask_type": "database", + "db_type": "snowflake", + "user": "snowman", + "database": "icicle", + "snowflake_account_id": "ABCDEF-123456", + "snowflake_warehouse": "warehouse1", + "snowflake_storage_integration_name": "mysi", + "snowflake_stage_location": "spcs", + } + + conn = SnowflakeConnectionConfig.model_validate(payload) + + assert conn.snowflake_stage_location is SnowflakeStageLocation.spcs + # SPCS staging carries no external-storage fields. + assert conn.s3_bucket_name is None + assert conn.snowflake_azure_container_name is None + + def test_snowflake_connection_model_validate_without_stage_location(): payload = { "id": "id-3", From 841f62a06784aeb04cf00f3e3186c49fa951f72f Mon Sep 17 00:00:00 2001 From: Beau Butler Date: Mon, 29 Jun 2026 14:04:04 +1200 Subject: [PATCH 3/5] feat: make Snowflake connection fields optional for SPCS staging DataMasque-in-SPCS connections leave user, snowflake_account_id, snowflake_warehouse, and snowflake_storage_integration_name unset: the agent uses the container's OAuth token plus the SNOWFLAKE_HOST/ SNOWFLAKE_ACCOUNT env and the app-owned QUERY_WAREHOUSE. Listing such a connection previously raised a ValidationError on the missing required fields. Mirror the app's canonical model, which types these as optional. --- HISTORY.rst | 5 +++++ datamasque/client/models/connection.py | 12 ++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index f22eac3..972d7ec 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -14,6 +14,11 @@ History Snowflake SPCS deserialise correctly. Previously, listing connections on an instance that held an SPCS-staged Snowflake connection raised a ``ValidationError`` on the unknown stage value. +* Made ``user``, ``snowflake_account_id``, ``snowflake_warehouse``, and + ``snowflake_storage_integration_name`` optional on ``SnowflakeConnectionConfig``. + DataMasque hosted inside Snowflake SPCS leaves these unset (it uses the + container's own OAuth token, host/account environment, and app-owned + warehouse), so requiring them made such connections fail to deserialise. 1.1.3 (2026-06-26) ------------------ diff --git a/datamasque/client/models/connection.py b/datamasque/client/models/connection.py index 8cb6279..eaa9c09 100644 --- a/datamasque/client/models/connection.py +++ b/datamasque/client/models/connection.py @@ -234,10 +234,14 @@ class SnowflakeConnectionConfig(ConnectionConfig): """ database: str - user: str - snowflake_account_id: str - snowflake_warehouse: str - snowflake_storage_integration_name: str + # Optional because DataMasque-in-SPCS connections leave these unset: the agent uses the + # container's OAuth token + SNOWFLAKE_HOST/SNOWFLAKE_ACCOUNT env and the app-owned QUERY_WAREHOUSE, + # so user/account/storage-integration/warehouse are null for stage_location=spcs. Mirrors the app's + # canonical model (agent .../schemas/connection/connection.py), which types these `| None = None`. + user: Optional[str] = None + snowflake_account_id: Optional[str] = None + snowflake_warehouse: Optional[str] = None + snowflake_storage_integration_name: Optional[str] = None host: str = "" port: Optional[int] = None db_schema: Optional[str] = Field(default=None, alias="schema") From 851990f69428bc0f115c646cec7edccc095f3fac Mon Sep 17 00:00:00 2001 From: Beau Butler Date: Mon, 29 Jun 2026 18:05:21 +1200 Subject: [PATCH 4/5] docs: clean up docs and comments for readability --- HISTORY.rst | 20 ++--- README.rst | 9 +- datamasque/client/exceptions.py | 14 +-- datamasque/client/models/dm_instance.py | 27 +++--- datamasque/client/spcs.py | 111 ++++++++++++------------ docs/usage.rst | 24 ++--- 6 files changed, 95 insertions(+), 110 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 972d7ec..b767549 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -5,20 +5,12 @@ History 1.1.4 (2026-06-29) ------------------ -* Added ``spcs_pat`` to ``DataMasqueInstanceConfig`` for authenticating when - DataMasque is in Snowflake SPCS (Native App). -* Added ``SpcsGatewayAuthError``, raised when the SPCS gateway rejects the PAT - before the request reaches DataMasque (with the Snowflake detail and a hint at - the likely cause). -* Added ``spcs`` to ``SnowflakeStageLocation`` so connections staged inside - Snowflake SPCS deserialise correctly. Previously, listing connections on an - instance that held an SPCS-staged Snowflake connection raised a - ``ValidationError`` on the unknown stage value. -* Made ``user``, ``snowflake_account_id``, ``snowflake_warehouse``, and - ``snowflake_storage_integration_name`` optional on ``SnowflakeConnectionConfig``. - DataMasque hosted inside Snowflake SPCS leaves these unset (it uses the - container's own OAuth token, host/account environment, and app-owned - warehouse), so requiring them made such connections fail to deserialise. +* Added support for DataMasque deployments on Snowpark Container Services (SPCS): + + * Added ``spcs_pat`` to ``DataMasqueInstanceConfig`` for authenticating through the SPCS app gateway. + * Added ``SpcsGatewayAuthError``, raised when the gateway rejects the PAT. + * Added the ``spcs`` option to ``SnowflakeStageLocation``. + * Made several ``SnowflakeConnectionConfig`` fields optional, since SPCS-staged connections leave them unset. 1.1.3 (2026-06-26) ------------------ diff --git a/README.rst b/README.rst index 2e6353b..c512312 100644 --- a/README.rst +++ b/README.rst @@ -42,11 +42,9 @@ Authentication is performed on the first request if ``authenticate()`` is not ca and is automatically retried once on a 401 response. ``client.healthcheck()`` is available as a lightweight readiness probe that does not consume credentials. -For a DataMasque instance hosted behind Snowflake SPCS (Snowpark Container Services) app ingress +For a DataMasque instance hosted on Snowpark Container Services (SPCS) (a ``*.snowflakecomputing.app`` ``base_url``), -pass a Snowflake Programmatic Access Token as ``spcs_pat`` on ``DataMasqueInstanceConfig``; -the client sends it on the ``X-SF-SPCS-Authorization`` header to clear the Snowflake gateway, -which strips it before forwarding so your DataMasque auth is unaffected. +pass a Snowflake Programmatic Access Token as ``spcs_pat`` on ``DataMasqueInstanceConfig``. See the `usage docs `_ for details. Error handling @@ -67,9 +65,6 @@ All methods raise subclasses of ``DataMasqueException`` on failure: raised by ``start_masking_run`` when the server rejects the run. - ``DataMasqueUserError`` — raised by user-management methods when the input is invalid. -- ``SpcsGatewayAuthError`` — - raised when a Snowflake SPCS app gateway rejects the configured ``spcs_pat`` - before the request reaches DataMasque. Documentation ============= diff --git a/datamasque/client/exceptions.py b/datamasque/client/exceptions.py index a7ed612..9aa6db0 100644 --- a/datamasque/client/exceptions.py +++ b/datamasque/client/exceptions.py @@ -86,16 +86,16 @@ class IfmAuthError(DataMasqueIfmError): class SpcsGatewayAuthError(DataMasqueException): """ - Raised when a Snowflake SPCS app gateway rejects the configured ``spcs_pat``. + Raised when a Snowflake SPCS app gateway rejects the configured `spcs_pat`. - Only relevant when the client is configured with ``spcs_pat`` for an - instance behind Snowflake SPCS app ingress. The message includes the - Snowflake-provided detail, request id, and a hint at the likely cause - (e.g. an expired PAT or a network policy that excludes your IP). + The message includes the Snowflake-provided detail, request id, + and a hint at the likely cause + (for example an expired token, or a network policy that excludes your IP). Deliberately a direct subclass of `DataMasqueException` rather than - `DataMasqueApiError`: the client's 401 re-authenticate-and-retry path keys - off `DataMasqueApiError`/HTTP status, so keeping this outside that subtree + `DataMasqueApiError`: + the client's 401 re-authenticate-and-retry path keys off `DataMasqueApiError`, + so keeping this outside that subtree ensures a gateway rejection aborts immediately instead of looping. """ diff --git a/datamasque/client/models/dm_instance.py b/datamasque/client/models/dm_instance.py index 321adc8..7256981 100644 --- a/datamasque/client/models/dm_instance.py +++ b/datamasque/client/models/dm_instance.py @@ -21,13 +21,10 @@ class DataMasqueInstanceConfig(BaseModel): The client calls `token_source` on each authentication attempt, so the callable is free to fetch and refresh tokens out-of-band (e.g. from a secrets manager). - `spcs_pat` is an optional Snowflake Programmatic Access Token for reaching a - DataMasque instance hosted behind Snowflake SPCS (Snowpark Container Services) - app ingress, where `base_url` ends in `.snowflakecomputing.app`. It is sent on - every request via the `X-SF-SPCS-Authorization` header to clear the Snowflake - gateway, which strips it before forwarding — so it is independent of, and - layers underneath, whichever DataMasque auth method (`password` or - `token_source`) you choose. + `spcs_pat` is an optional Snowflake Programmatic Access Token + for reaching a DataMasque instance hosted on Snowpark Container Services (SPCS). + It layers underneath whichever DataMasque auth method + (`password` or `token_source`) you choose. """ model_config = ConfigDict(arbitrary_types_allowed=True) @@ -38,15 +35,13 @@ class DataMasqueInstanceConfig(BaseModel): verify_ssl: bool = True token_source: Optional[Callable[[], str]] = None spcs_pat: Optional[str] = None - """Snowflake Programmatic Access Token for a DataMasque instance hosted behind - Snowflake SPCS app ingress (a ``*.snowflakecomputing.app`` ``base_url``). - - Mint the PAT in Snowsight (User profile → Programmatic access tokens) for an - account that can reach the SPCS app. The client sends it on the - ``X-SF-SPCS-Authorization`` header so the Snowflake gateway lets the request - through to DataMasque; the gateway strips the header before forwarding, leaving - DataMasque's own ``Authorization`` flow untouched. Leave it unset for - instances that are not behind an SPCS gateway.""" + """Snowflake Programmatic Access Token + for a DataMasque instance hosted on Snowpark Container Services (SPCS), + where `base_url` ends in `.snowflakecomputing.app`. + + Create the token in Snowsight (User profile → Programmatic access tokens) + for an account that can reach the SPCS app. + Leave unset for instances that are not hosted on SPCS.""" @model_validator(mode="after") def _validate_auth_source(self) -> "DataMasqueInstanceConfig": diff --git a/datamasque/client/spcs.py b/datamasque/client/spcs.py index 900566d..c7caf42 100644 --- a/datamasque/client/spcs.py +++ b/datamasque/client/spcs.py @@ -1,18 +1,20 @@ """ -Snowflake SPCS app gateway authentication for :class:`DataMasqueClient`. - -When a DataMasque instance is hosted behind Snowflake SPCS (Snowpark Container -Services) app ingress (``*.snowflakecomputing.app``), every request must first -clear the Snowflake gateway. We authenticate to the gateway with a Programmatic -Access Token (PAT) sent on ``X-SF-SPCS-Authorization: Snowflake Token=""``. -The gateway accepts the PAT on this alternate header and strips it before -forwarding to the container, so DataMasque's own ``Authorization: Token `` -flow rides through untouched. - -:func:`install_spcs_gateway_auth` attaches this behaviour to a client's -``requests.Session``: it sets the header on the session (so it is sent on every -request, including the unauthenticated login) and registers a response hook that -turns a gateway-originated rejection into a clear :class:`SpcsGatewayAuthError`. +Snowflake SPCS app gateway authentication for `DataMasqueClient`. + +When a DataMasque instance is hosted on Snowpark Container Services (SPCS), +its app ingress (`*.snowflakecomputing.app`) fronts every request +with a Snowflake gateway that must be cleared first. +We authenticate to the gateway with a Programmatic Access Token (PAT), +sent on `X-SF-SPCS-Authorization: Snowflake Token=""`. +The gateway accepts the PAT on this alternate header +and strips it before forwarding to the container, +so DataMasque's own `Authorization: Token ` flow rides through untouched. + +`install_spcs_gateway_auth` attaches this behaviour to a client's `requests.Session`: +it sets the header on the session +(so it is sent on every request, including the unauthenticated login) +and registers a response hook +that turns a gateway-originated rejection into a clear `SpcsGatewayAuthError`. """ import re @@ -46,19 +48,17 @@ def _has_spcs_gateway_header_signature(response: requests.Response) -> bool: """ True if at least one header-level Snowflake gateway marker is present. - Looks for either ``Server: _`` (the gateway's literal Server header value) - or any ``Set-Cookie`` carrying the ``sfc-ss-`` cookie name prefix. Either is - sufficient — both indicate the response was emitted by, or transited, - Snowflake's SPCS ingress. + Looks for either `Server: _` (the gateway's literal Server header value) + or any `Set-Cookie` carrying the `sfc-ss-` cookie name prefix. + Either is sufficient — both indicate the response was emitted by, + or transited, Snowflake's SPCS ingress. """ if response.headers.get("server", "").strip() == _SPCS_GATEWAY_SERVER_VALUE: return True # `Set-Cookie` may appear multiple times; `requests` flattens duplicates # via a comma-separated value in `.headers`, but our prefix substring # check is order- and count-insensitive. - if _SPCS_COOKIE_PREFIX in response.headers.get("set-cookie", ""): - return True - return False + return _SPCS_COOKIE_PREFIX in response.headers.get("set-cookie", "") def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]: @@ -67,11 +67,11 @@ def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]: All four conditions must hold: 1. The body parses as JSON and is a dict. - 2. Keys ``responseType``, ``requestId``, ``detail`` are all present and string-typed. - 3. ``responseType`` matches ``^ERROR_$``. - 4. ``requestId`` is a canonical 8-4-4-4-12 UUID. + 2. Keys `responseType`, `requestId`, `detail` are all present and string-typed. + 3. `responseType` matches `^ERROR_$`. + 4. `requestId` is a canonical 8-4-4-4-12 UUID. - Returns the parsed dict (truthy) on match, ``None`` on miss. + Returns the parsed dict (truthy) on match, `None` on miss. """ try: data = response.json() @@ -84,15 +84,13 @@ def _is_spcs_gateway_error_body(response: requests.Response) -> Optional[dict]: detail = data.get("detail") if not (isinstance(response_type, str) and isinstance(request_id, str) and isinstance(detail, str)): return None - if not _GATEWAY_RESPONSE_TYPE_RE.match(response_type): - return None - if not _UUID_RE.match(request_id): - return None - return data + if _GATEWAY_RESPONSE_TYPE_RE.match(response_type) and _UUID_RE.match(request_id): + return data + return None def _hint_for_gateway_detail(detail: str) -> str: - """Map common Snowflake gateway ``detail`` strings to a one-line cause hint.""" + """Map common Snowflake gateway `detail` strings to a one-line cause hint.""" d = (detail or "").lower() if "network policy" in d: return ( @@ -103,14 +101,14 @@ def _hint_for_gateway_detail(detail: str) -> str: ) if "invalid" in d and "token" in d: return ( - "PAT is malformed, expired, or revoked. Re-mint a PAT in Snowsight " + "PAT is malformed, expired, or revoked. Create a new PAT in Snowsight " "(User profile -> Programmatic access tokens) and update `spcs_pat`." ) if "expired" in d: - return "PAT has expired. Mint a fresh one in Snowsight and update `spcs_pat`." + return "PAT has expired. Create a fresh one in Snowsight and update `spcs_pat`." if "authentication" in d or "unauthorized" in d: return ( - "Generic auth rejection. Verify the PAT was minted by a user that " + "Generic auth rejection. Verify the PAT was created by a user that " "has access to this SPCS app, and that any account-level network " "policy includes your current public IP." ) @@ -119,20 +117,23 @@ def _hint_for_gateway_detail(detail: str) -> str: def _check_spcs_gateway_response(response: requests.Response) -> None: """ - Raise :class:`SpcsGatewayAuthError` iff ``response`` is a gateway-originated rejection. + Raise `SpcsGatewayAuthError` iff `response` is a gateway-originated rejection. Two-layer discriminator — both must hold: - * **Body originated at the gateway**: strict shape match on the JSON body - (multiple fields, typed, with format constraints) via - :func:`_is_spcs_gateway_error_body`. - * **Response transited an SPCS gateway**: header signature confirms via - :func:`_has_spcs_gateway_header_signature`. - - Either layer alone could in principle false-positive on an unrelated - upstream that happened to emit one of those signals; the conjunction is what - makes the check robust. Legitimate DataMasque 401s (DRF ``{"detail": "..."}``) - have the gateway header signature but fail the body shape — so they correctly - flow through to the client's normal re-auth-and-retry path untouched. + * **Body originated at the gateway**: + strict shape match on the JSON body + (multiple fields, typed, with format constraints) + via `_is_spcs_gateway_error_body`. + * **Response transited an SPCS gateway**: + header signature confirms via `_has_spcs_gateway_header_signature`. + + Either layer alone could in principle false-positive on an unrelated upstream + that happened to emit one of those signals; + the conjunction is what makes the check robust. + Legitimate DataMasque 401s (DRF `{"detail": "..."}`) + have the gateway header signature but fail the body shape — + so they correctly flow through + to the client's normal re-auth-and-retry path untouched. """ if response.status_code not in (401, 403): return @@ -149,7 +150,7 @@ def _check_spcs_gateway_response(response: requests.Response) -> None: raise SpcsGatewayAuthError( f"SPCS gateway rejected the PAT (HTTP {response.status_code}, " f"{response_type}). The request never reached DataMasque.\n" - f" Snowflake said: {detail!r}\n" + f' Snowflake said: "{detail}"\n' f" Snowflake reqId: {request_id}\n" f" Likely cause: {hint}\n" f" Fix in Snowsight on the account hosting this SPCS app, then retry." @@ -157,20 +158,22 @@ def _check_spcs_gateway_response(response: requests.Response) -> None: def _spcs_gateway_response_hook(response: requests.Response, *args: Any, **kwargs: Any) -> None: - """``requests`` response hook: raise on a gateway-originated auth rejection.""" + """`requests` response hook: raise on a gateway-originated auth rejection.""" _check_spcs_gateway_response(response) def install_spcs_gateway_auth(session: requests.Session, pat: str) -> None: """ - Configure ``session`` to authenticate to a Snowflake SPCS app gateway. + Configure `session` to authenticate to a Snowflake SPCS app gateway. - Sets the ``X-SF-SPCS-Authorization`` header on the session (so it rides on - every request, including the unauthenticated login) and registers a response - hook that raises :class:`SpcsGatewayAuthError` on a gateway rejection. + Sets the `X-SF-SPCS-Authorization` header on the session + (so it rides on every request, including the unauthenticated login) + and registers a response hook + that raises `SpcsGatewayAuthError` on a gateway rejection. - Scoping is automatic: the client's session only ever talks to its own - ``base_url``, so there is no need to match per-request hosts. + Scoping is automatic: + the client's session only ever talks to its own `base_url`, + so there is no need to match per-request hosts. """ session.headers[SPCS_GATEWAY_AUTH_HEADER] = f'Snowflake Token="{pat}"' session.hooks["response"].append(_spcs_gateway_response_hook) diff --git a/docs/usage.rst b/docs/usage.rst index e6d9438..b43f9c3 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -23,13 +23,12 @@ To use DataMasque Python in a project: Connecting to an SPCS-hosted instance ===================================== -When DataMasque is hosted behind Snowflake SPCS (Snowpark Container Services) -app ingress, its ``base_url`` ends in ``.snowflakecomputing.app`` and every -request must first clear the Snowflake gateway. Pass a Snowflake Programmatic -Access Token (PAT) as ``spcs_pat`` and the client sends it on the -``X-SF-SPCS-Authorization`` header automatically; the gateway strips that header -before forwarding, so your DataMasque ``username``/``password`` (or -``token_source``) auth is unaffected. +When DataMasque is hosted on Snowpark Container Services (SPCS), +its `base_url` ends in `.snowflakecomputing.app` +and requests must first clear the Snowflake gateway. +Pass a Snowflake Programmatic Access Token (PAT) as `spcs_pat` +and the client clears the gateway for you, +independently of your DataMasque `username`/`password` (or `token_source`) auth. .. code-block:: python @@ -42,8 +41,9 @@ before forwarding, so your DataMasque ``username``/``password`` (or client = DataMasqueClient(config) client.authenticate() -Mint the PAT in Snowsight (User profile → Programmatic access tokens) for an -account that can reach the SPCS app. If the gateway rejects the PAT (for example -it has expired, or a network policy excludes your IP), the client raises -``SpcsGatewayAuthError`` with the Snowflake-provided detail and a hint at the -likely cause. +Create the PAT in Snowsight (User profile → Programmatic access tokens) +for an account that can reach the SPCS app. +If the gateway rejects the PAT +(for example it has expired, or a network policy excludes your IP), +the client raises `SpcsGatewayAuthError` +with the Snowflake-provided detail and a hint at the likely cause. From 4cde2e55daa05cb3a2497754551e5c93e6434f0b Mon Sep 17 00:00:00 2001 From: Beau Butler Date: Mon, 29 Jun 2026 23:01:06 +1200 Subject: [PATCH 5/5] bump 1.1.4 to 1.1.5 to leapfrog sap hana change --- HISTORY.rst | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index b767549..65f9f87 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,7 +2,7 @@ History ======= -1.1.4 (2026-06-29) +1.1.5 (2026-06-29) ------------------ * Added support for DataMasque deployments on Snowpark Container Services (SPCS): diff --git a/pyproject.toml b/pyproject.toml index 97636ab..d892caf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "datamasque-python" -version = "1.1.4" +version = "1.1.5" description = "Official Python client for the DataMasque data-masking API." authors = [ { name = "DataMasque Ltd" },