From 116dbaf29760886e7f1e3c23f9a8f397c2686256 Mon Sep 17 00:00:00 2001
From: jiatolentino <tolentino.joseireneo@gmail.com>
Date: Wed, 24 Jun 2026 12:53:01 +0800
Subject: [PATCH 1/4] fix: make DiscoveryMatch.label optional for
 non-sensitive/ignore matches

A non-sensitive or ignore discovery match carries no sensitivity label, but
DiscoveryMatch required `label: str`, so parsing schema-discovery results that
contained such a match (e.g. a scope.non_sensitive column, hit via the
generate_ruleset path) raised a pydantic ValidationError. The schema_discovery
CSV path never parsed the model, so this was latent until MongoDB document
discovery exercised it.
---
 datamasque/client/models/discovery.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 7bccefa..4c35ae0 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -252,7 +252,7 @@ class DiscoveryMatch(BaseModel):
 
     model_config = ConfigDict(extra="allow")
 
-    label: str
+    label: Optional[str] = None
     categories: list[str]
     flagged_by: str
     description: str

From 7be03cd9982d7ae70b5c4c8436e95ff3b46289c3 Mon Sep 17 00:00:00 2001
From: Colin Haywood <colin.haywood@datamasque.com>
Date: Thu, 25 Jun 2026 09:56:36 +1200
Subject: [PATCH 2/4] feat: Add finished_with_warnings RG status

Also fix comment re ignored matches - ignored matches don't get returned by the server at all
---
 datamasque/client/models/discovery.py | 4 ++--
 datamasque/client/models/status.py    | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/datamasque/client/models/discovery.py b/datamasque/client/models/discovery.py
index 4c35ae0..23a5a88 100644
--- a/datamasque/client/models/discovery.py
+++ b/datamasque/client/models/discovery.py
@@ -343,8 +343,8 @@ class FileDiscoveryMatch(BaseModel):
 
     flagged_by: str
     description: str
-    label: Optional[str] = None  # Omitted for non-sensitive and ignored matches.
-    categories: Optional[list[str]] = None  # Omitted for ignored matches.
+    label: Optional[str] = None  # Omitted for non-sensitive matches.
+    categories: Optional[list[str]] = None
     hit_ratio: Optional[int] = None  # None for metadata matches, percentage 0-100 for IDD matches.
 
 
diff --git a/datamasque/client/models/status.py b/datamasque/client/models/status.py
index f470fd0..93871e8 100644
--- a/datamasque/client/models/status.py
+++ b/datamasque/client/models/status.py
@@ -60,6 +60,7 @@ class AsyncRulesetGenerationTaskStatus(enum.Enum):
     """List of statuses of async ruleset generation tasks."""
 
     finished = "finished"
+    finished_with_warnings = "finished_with_warnings"
     failed = "failed"
     running = "running"
     queued = "queued"

From ed6458e6aed1dc011a2c4c8f3f069e6e6377e501 Mon Sep 17 00:00:00 2001
From: jiatolentino <tolentino.joseireneo@gmail.com>
Date: Tue, 16 Jun 2026 12:09:54 +0800
Subject: [PATCH 3/4] feat: return split db-discovery reports as zip bytes

---
 datamasque/client/discovery.py | 16 +++++++++++++--
 datamasque/client/runs.py      | 11 +++++++++--
 tests/test_discovery.py        | 36 ++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/datamasque/client/discovery.py b/datamasque/client/discovery.py
index f6f3850..69862eb 100644
--- a/datamasque/client/discovery.py
+++ b/datamasque/client/discovery.py
@@ -98,6 +98,10 @@ def start_async_ruleset_generation_from_csv(
         - A text file handle (e.g. `open(path)`)
         - A binary file handle (e.g. `open(path, 'rb')`)
 
+        If the content is a zip (for example a split report from `get_db_discovery_result_report()`),
+        it is detected by its magic bytes and uploaded as a zip;
+        otherwise it is uploaded as CSV.
+
         Generation runs asynchronously on the server.
         Poll `get_async_ruleset_generation_task_status` until it returns
         `AsyncRulesetGenerationTaskStatus.finished`,
@@ -114,14 +118,22 @@ def start_async_ruleset_generation_from_csv(
         else:
             content = csv_content
 
+        is_zip = False
+        if content.seekable():
+            is_zip = content.read(4) == b"PK\x03\x04"
+            content.seek(0)
+        filename = "ruleset.zip" if is_zip else "ruleset.csv"
+        content_type = "application/zip" if is_zip else "text/csv"
+
         files = [
             UploadFile(
                 field_name="csv_or_zip_file",
-                filename="ruleset.csv",
+                filename=filename,
                 content=content,
-                content_type="text/csv",
+                content_type=content_type,
             ),
         ]
+
         self.make_request(
             method="POST",
             path=f"/api/async-generate-ruleset/{connection_id}/from-csv/",
diff --git a/datamasque/client/runs.py b/datamasque/client/runs.py
index 6b9a827..9f4c87c 100644
--- a/datamasque/client/runs.py
+++ b/datamasque/client/runs.py
@@ -1,5 +1,6 @@
 import logging
 import re
+from typing import Union
 
 from datamasque.client.base import BaseClient
 from datamasque.client.exceptions import (
@@ -43,9 +44,12 @@ def get_run_report(self, run_id: RunId) -> str:
         response = self.make_request("GET", f"api/runs/{run_id}/run-report/")
         return response.text
 
-    def get_db_discovery_result_report(self, run_id: RunId, include_selection_column: bool = True) -> str:
+    def get_db_discovery_result_report(self, run_id: RunId, include_selection_column: bool = True) -> Union[str, bytes]:
         """
-        Returns the database-discovery result report for the specified run as CSV.
+        Returns the database-discovery result report for the specified run.
+
+        Returns CSV text (`str`),
+        or a zip of numbered CSV parts as `bytes` when the server splits a large report.
 
         When `include_selection_column` is true (the default),
         the CSV includes a `selected` column suitable for feeding back into ruleset generation.
@@ -54,6 +58,9 @@ def get_db_discovery_result_report(self, run_id: RunId, include_selection_column
         url = f"api/runs/{run_id}/db-discovery-results/report/"
         params = None if include_selection_column else {"include_selection_column": "false"}
         response = self.make_request("GET", url, params=params)
+
+        if response.headers.get("Content-Type", "").startswith("application/zip"):
+            return response.content
         return response.text
 
     def get_unfinished_runs(self) -> dict[str, UnfinishedRun]:
diff --git a/tests/test_discovery.py b/tests/test_discovery.py
index 42debfd..d954045 100644
--- a/tests/test_discovery.py
+++ b/tests/test_discovery.py
@@ -108,6 +108,17 @@ def test_get_db_discovery_result_report(client):
         assert result == "db discovery report without selection column"
 
 
+def test_get_db_discovery_result_report_returns_zip_bytes_when_split(client):
+    run_id = RunId(1)
+    zip_bytes = b"PK\x03\x04 split report zip bytes"
+    with requests_mock.Mocker() as m:
+        url = f"http://test-server/api/runs/{run_id}/db-discovery-results/report/"
+        m.get(url, content=zip_bytes, headers={"Content-Type": "application/zip"}, status_code=200)
+        result = client.get_db_discovery_result_report(run_id)
+        assert result == zip_bytes
+        assert isinstance(result, bytes)
+
+
 def test_poll_async_ruleset_generation(client):
     connection_id = ConnectionId("1")
     with requests_mock.Mocker() as m:
@@ -463,6 +474,31 @@ def test_start_async_ruleset_generation_from_csv_success(client, csv_content):
         assert form_data["csv_or_zip_file"]["content"] == b"schema,table,column,selected\npublic,users,email,true"
 
 
+@pytest.mark.parametrize(
+    "zip_content",
+    [
+        b"PK\x03\x04 zipped discovery report",
+        BytesIO(b"PK\x03\x04 zipped discovery report"),
+    ],
+    ids=["bytes", "BytesIO"],
+)
+def test_start_async_ruleset_generation_from_csv_uploads_zip_as_zip(client, zip_content):
+    """A split report is uploaded with a .zip filename and zip content-type, whether passed as bytes or a binary stream."""
+    connection_id = ConnectionId("1")
+
+    with requests_mock.Mocker() as m:
+        m.post(
+            f"http://test-server/api/async-generate-ruleset/{connection_id}/from-csv/",
+            status_code=201,
+        )
+        client.start_async_ruleset_generation_from_csv(connection_id, zip_content)
+
+        form_data = parse_multipart_form(m.last_request)
+        assert form_data["csv_or_zip_file"]["filename"] == "ruleset.zip"
+        assert form_data["csv_or_zip_file"]["content_type"] == "application/zip"
+        assert form_data["csv_or_zip_file"]["content"] == b"PK\x03\x04 zipped discovery report"
+
+
 def test_start_async_ruleset_generation_from_csv_with_target_size(client):
     """Test async ruleset generation from CSV with target_size_bytes parameter."""
     connection_id = ConnectionId("1")

From 1d65dca1556c5dfbf87ba64a2453db174d6cd1cd Mon Sep 17 00:00:00 2001
From: jiatolentino <tolentino.joseireneo@gmail.com>
Date: Thu, 25 Jun 2026 06:01:25 +0800
Subject: [PATCH 4/4] chore: release 1.1.1

---
 HISTORY.rst    | 9 +++++++++
 pyproject.toml | 2 +-
 setup.cfg      | 2 +-
 uv.lock        | 2 +-
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index dc72f9a..cbfc4d4 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,6 +2,15 @@
 History
 =======
 
+1.1.1 (2026-06-25)
+------------------
+
+* Made ``DiscoveryMatch.label`` optional (it is absent for non-sensitive/ignore matches).
+* Added the ``finished_with_warnings`` status to ``AsyncRulesetGenerationTaskStatus``.
+* ``get_db_discovery_result_report`` may now return ``bytes`` (a zip)
+  when the server splits a large DB-discovery report,
+  and ruleset generation from CSV now detects and forwards zip uploads.
+
 1.1.0 (2026-06-24)
 ------------------
 
diff --git a/pyproject.toml b/pyproject.toml
index 1d226e4..eca07af 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "datamasque-python"
-version = "1.1.0"
+version = "1.1.1"
 description = "Official Python client for the DataMasque data-masking API."
 authors = [
     { name = "DataMasque Ltd" },
diff --git a/setup.cfg b/setup.cfg
index 26b71a7..1022f00 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.1.0
+current_version = 1.1.1
 commit = True
 tag = True
 
diff --git a/uv.lock b/uv.lock
index 31e21ef..777b461 100644
--- a/uv.lock
+++ b/uv.lock
@@ -428,7 +428,7 @@ toml = [
 
 [[package]]
 name = "datamasque-python"
-version = "1.1.0.dev0"
+version = "1.1.1"
 source = { editable = "." }
 dependencies = [
     { name = "pydantic" },