From 9e742bc59adee81310d0f570275a7c73829965a3 Mon Sep 17 00:00:00 2001
From: Matt Fisher <m@ttfisher.com>
Date: Thu, 18 Jun 2026 12:41:04 +1000
Subject: [PATCH] Add Markdown footnote support to from_markdown

Parse standard Markdown footnotes (`text[^label]` references and
`[^label]: definition` lines) into Substack's footnoteAnchor inline nodes
and footnote blocks. Footnotes are numbered by order of first reference and
labels may be numeric or named. Also adds Post.footnote_anchor() and
Post.footnote() helpers for building footnotes manually, plus tests.
---
 README.md                        |  15 +++
 substack/post.py                 | 151 ++++++++++++++++++++++++
 tests/substack/test_footnotes.py | 191 +++++++++++++++++++++++++++++++
 3 files changed, 357 insertions(+)
 create mode 100644 tests/substack/test_footnotes.py

diff --git a/README.md b/README.md
index c4f57fa..cfe0611 100644
--- a/README.md
+++ b/README.md
@@ -156,6 +156,21 @@ This is a paragraph with **bold** and *italic* text.
 """
 post.from_markdown(markdown_content, api=api)
 
+# Markdown footnotes are supported too. References become inline anchors and
+# definitions become footnote blocks, numbered by order of first appearance.
+# Labels can be numbers or names (e.g. [^1] or [^source]).
+footnote_markdown = """
+A claim that needs support.[^1] Another, with a named label.[^source]
+
+[^1]: The supporting detail, with a [link](https://example.com).
+[^source]: Author, *Title* (2025).
+"""
+post.from_markdown(footnote_markdown, api=api)
+
+# Or build footnotes manually:
+post.paragraph(content=[{"content": "Some claim."}]).footnote_anchor(1)
+post.footnote(1, "The note text, with **formatting** allowed.")
+
 draft = api.post_draft(post.get_draft())
 
 # set section (can only be done after first posting the draft)
diff --git a/substack/post.py b/substack/post.py
index 8a9d55d..8d190f3 100644
--- a/substack/post.py
+++ b/substack/post.py
@@ -12,6 +12,10 @@
 
 from substack.exceptions import SectionNotExistsException
 
+# Markdown footnotes: ``text.[^label]`` references and ``[^label]: definition`` lines.
+FOOTNOTE_REFERENCE_PATTERN = re.compile(r"\[\^([^\]]+)\]")
+FOOTNOTE_DEFINITION_PATTERN = re.compile(r"^\[\^([^\]]+)\]:\s?(.*)$")
+
 
 def tokens_to_text_nodes(tokens: List[Dict]) -> List[Dict]:
     """Convert parse_inline() tokens to ProseMirror text nodes.
@@ -543,6 +547,135 @@ def code_block(self, content, attrs=None):
 
         return self
 
+    def footnote_anchor(self, number: int):
+        """
+
+        Add an inline footnote reference (the superscript marker) to the last block.
+
+        Args:
+            number: The footnote number this anchor points to.
+
+        Returns:
+            Self for method chaining.
+
+        """
+        content = self.draft_body["content"][-1].get("content", [])
+        content += [{"type": "footnoteAnchor", "attrs": {"number": number}}]
+        self.draft_body["content"][-1]["content"] = content
+        return self
+
+    def footnote(self, number: int, content=None):
+        """
+
+        Append a footnote block (the note shown at the foot of the post).
+
+        Args:
+            number: The footnote number, matching a footnote_anchor.
+            content: Text string or list of inline token dicts. A plain string is
+                parsed for inline Markdown; a parse_inline() token list or a list
+                of ready text nodes is also accepted.
+
+        Returns:
+            Self for method chaining.
+
+        """
+        if isinstance(content, str):
+            text_nodes = tokens_to_text_nodes(parse_inline(content))
+        elif isinstance(content, list):
+            # Accept either parse_inline tokens ({"content": ...}) or text nodes.
+            if content and content[0].get("type") == "text":
+                text_nodes = content
+            else:
+                text_nodes = tokens_to_text_nodes(content)
+        else:
+            text_nodes = []
+
+        node: Dict = {
+            "type": "footnote",
+            "attrs": {"number": number},
+            "content": [{"type": "paragraph", "content": text_nodes}],
+        }
+        self.draft_body["content"] = self.draft_body.get("content", []) + [node]
+        return self
+
+    @staticmethod
+    def _extract_footnote_definitions(markdown_content: str):
+        """
+
+        Pull ``[^label]: definition`` lines out of the Markdown.
+
+        Definitions may wrap onto indented continuation lines. Returns the body
+        with definitions removed plus a {label: definition_text} mapping.
+
+        """
+        lines = markdown_content.split("\n")
+        body_lines: List[str] = []
+        definitions: Dict[str, str] = {}
+        i = 0
+        while i < len(lines):
+            match = FOOTNOTE_DEFINITION_PATTERN.match(lines[i])
+            if match:
+                label, first = match.group(1), match.group(2)
+                parts = [first]
+                i += 1
+                # Continuation lines are indented and neither blank nor a new def.
+                while i < len(lines) and lines[i].strip() and lines[i][:1] in (" ", "\t"):
+                    parts.append(lines[i].strip())
+                    i += 1
+                definitions[label] = " ".join(p for p in parts if p).strip()
+            else:
+                body_lines.append(lines[i])
+                i += 1
+        return "\n".join(body_lines), definitions
+
+    @staticmethod
+    def _number_footnotes(markdown_content: str, definitions: Dict[str, str]):
+        """Number footnotes by order of first inline reference in the body."""
+        order: List[str] = []
+        for match in FOOTNOTE_REFERENCE_PATTERN.finditer(markdown_content):
+            label = match.group(1)
+            if label in definitions and label not in order:
+                order.append(label)
+        # Defined-but-unreferenced footnotes go last, in definition order.
+        for label in definitions:
+            if label not in order:
+                order.append(label)
+        return {label: index + 1 for index, label in enumerate(order)}
+
+    def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]):
+        """Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes."""
+        content = node.get("content")
+        if not isinstance(content, list):
+            return
+        new_content: List[Dict] = []
+        for child in content:
+            text = child.get("text", "")
+            if child.get("type") == "text" and FOOTNOTE_REFERENCE_PATTERN.search(text):
+                marks = child.get("marks")
+                last = 0
+                for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text):
+                    label = match.group(1)
+                    if label not in numbers_by_label:
+                        continue  # Unknown label: leave the literal text in place.
+                    if match.start() > last:
+                        segment = {"type": "text", "text": text[last:match.start()]}
+                        if marks:
+                            segment["marks"] = marks
+                        new_content.append(segment)
+                    new_content.append(
+                        {"type": "footnoteAnchor", "attrs": {"number": numbers_by_label[label]}}
+                    )
+                    last = match.end()
+                if last < len(text):
+                    segment = {"type": "text", "text": text[last:]}
+                    if marks:
+                        segment["marks"] = marks
+                    new_content.append(segment)
+            else:
+                self._inject_footnote_anchors(child, numbers_by_label)
+                new_content.append(child)
+        node["content"] = new_content
+
     def from_markdown(self, markdown_content: str, api=None):
         """
         Parse Markdown content and add it to the post.
@@ -559,6 +692,10 @@ def from_markdown(self, markdown_content: str, api=None):
           - Ordered lists: Lines starting with '1.', '2.', etc.
           - Horizontal rules: Lines with ---, ***, or ___
           - Inline formatting: **bold**, *italic*, ***bold+italic***, `code`, ~~strikethrough~~
+          - Footnotes: ``text.[^label]`` references plus ``[^label]: definition``
+            lines. References become inline anchors and definitions become
+            footnote blocks, numbered by order of first appearance. Labels may be
+            numbers or names (e.g. ``[^1]`` or ``[^agi-book]``).
 
         Args:
             markdown_content: Markdown string to parse and add to the post.
@@ -572,6 +709,13 @@ def from_markdown(self, markdown_content: str, api=None):
             >>> post = Post("Title", "Subtitle", user_id)
             >>> post.from_markdown("# Heading\\n\\nThis is **bold** text with [a link](https://example.com).")
         """
+        # Footnotes: extract ``[^label]: ...`` definitions and number them by
+        # order of first reference before parsing the rest of the body.
+        markdown_content, footnote_definitions = self._extract_footnote_definitions(
+            markdown_content
+        )
+        footnote_numbers = self._number_footnotes(markdown_content, footnote_definitions)
+
         lines = markdown_content.split("\n")
         blocks = []
         current_block: List[str] = []
@@ -844,4 +988,11 @@ def flush_ordered():
                             tokens = parse_inline(text_content)
                             self.add({"type": "paragraph", "content": tokens})
 
+        # Footnotes: turn ``[^label]`` references into inline anchors, then append
+        # the footnote blocks in numbered order.
+        if footnote_numbers:
+            self._inject_footnote_anchors(self.draft_body, footnote_numbers)
+            for label, number in sorted(footnote_numbers.items(), key=lambda item: item[1]):
+                self.footnote(number, footnote_definitions[label])
+
         return self
diff --git a/tests/substack/test_footnotes.py b/tests/substack/test_footnotes.py
new file mode 100644
index 0000000..f9db725
--- /dev/null
+++ b/tests/substack/test_footnotes.py
@@ -0,0 +1,191 @@
+"""Tests for Markdown footnote support in post.py."""
+
+from substack.post import Post
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def make_post():
+    """Create a fresh Post instance for testing."""
+    return Post(title="Test", subtitle="Sub", user_id=1)
+
+
+def body_content(post):
+    """Return the content list from the post's draft body."""
+    return post.draft_body["content"]
+
+
+def find_nodes(node, node_type, acc=None):
+    """Recursively collect every node of a given type from a doc tree."""
+    if acc is None:
+        acc = []
+    if isinstance(node, dict):
+        if node.get("type") == node_type:
+            acc.append(node)
+        for value in node.values():
+            find_nodes(value, node_type, acc)
+    elif isinstance(node, list):
+        for value in node:
+            find_nodes(value, node_type, acc)
+    return acc
+
+
+def anchors(post):
+    return find_nodes(post.draft_body, "footnoteAnchor")
+
+
+def footnotes(post):
+    return find_nodes(post.draft_body, "footnote")
+
+
+# ---------------------------------------------------------------------------
+# TestFootnoteHelpers
+# ---------------------------------------------------------------------------
+
+class TestFootnoteHelpers:
+    def test_footnote_anchor_added_inline(self):
+        post = make_post()
+        post.paragraph(content=[{"content": "See here."}])
+        post.footnote_anchor(1)
+        para = body_content(post)[0]
+        assert para["content"][-1] == {"type": "footnoteAnchor", "attrs": {"number": 1}}
+
+    def test_footnote_block_from_string(self):
+        post = make_post()
+        post.footnote(1, "A simple note.")
+        block = body_content(post)[-1]
+        assert block["type"] == "footnote"
+        assert block["attrs"] == {"number": 1}
+        assert block["content"][0]["type"] == "paragraph"
+        assert block["content"][0]["content"][0]["text"] == "A simple note."
+
+    def test_footnote_block_parses_inline_markdown(self):
+        post = make_post()
+        post.footnote(2, "See [the source](https://example.com).")
+        block = footnotes(post)[0]
+        text_nodes = block["content"][0]["content"]
+        link_node = next(n for n in text_nodes if n.get("marks"))
+        assert link_node["text"] == "the source"
+        assert link_node["marks"] == [{"type": "link", "attrs": {"href": "https://example.com"}}]
+
+
+# ---------------------------------------------------------------------------
+# TestFromMarkdownFootnotes
+# ---------------------------------------------------------------------------
+
+class TestFromMarkdownFootnotes:
+    def test_basic_reference_and_definition(self):
+        post = make_post()
+        post.from_markdown("A claim.[^1]\n\n[^1]: The supporting detail.")
+        assert len(anchors(post)) == 1
+        assert anchors(post)[0]["attrs"]["number"] == 1
+        blocks = footnotes(post)
+        assert len(blocks) == 1
+        assert blocks[0]["attrs"]["number"] == 1
+        assert blocks[0]["content"][0]["content"][0]["text"] == "The supporting detail."
+
+    def test_definition_removed_from_body(self):
+        post = make_post()
+        post.from_markdown("A claim.[^1]\n\n[^1]: The note.")
+        # The definition line must not leak into a paragraph.
+        paragraphs = find_nodes(post.draft_body, "paragraph")
+        body_text = " ".join(
+            n.get("text", "")
+            for p in paragraphs
+            for n in p.get("content", [])
+        )
+        assert "[^1]:" not in body_text
+
+    def test_anchor_injected_mid_sentence(self):
+        post = make_post()
+        post.from_markdown("Before[^1] and after.\n\n[^1]: Note.")
+        para = find_nodes(post.draft_body, "paragraph")[0]
+        types = [c["type"] for c in para["content"]]
+        assert types == ["text", "footnoteAnchor", "text"]
+        assert para["content"][0]["text"] == "Before"
+        assert para["content"][2]["text"] == " and after."
+
+    def test_named_labels_numbered_by_first_appearance(self):
+        post = make_post()
+        md = (
+            "First[^book] then second[^study].\n\n"
+            "[^study]: Second definition.\n"
+            "[^book]: First definition.\n"
+        )
+        post.from_markdown(md)
+        nums = [a["attrs"]["number"] for a in anchors(post)]
+        assert nums == [1, 2]  # order of reference, not of definition
+        blocks = sorted(footnotes(post), key=lambda b: b["attrs"]["number"])
+        assert blocks[0]["content"][0]["content"][0]["text"] == "First definition."
+        assert blocks[1]["content"][0]["content"][0]["text"] == "Second definition."
+
+    def test_repeated_reference_reuses_number(self):
+        post = make_post()
+        post.from_markdown("One[^a] two[^a].\n\n[^a]: Note.")
+        nums = [a["attrs"]["number"] for a in anchors(post)]
+        assert nums == [1, 1]
+        assert len(footnotes(post)) == 1
+
+    def test_link_inside_definition_preserved(self):
+        post = make_post()
+        post.from_markdown("Claim.[^1]\n\n[^1]: See [docs](https://example.com).")
+        block = footnotes(post)[0]
+        link_node = next(
+            n for n in block["content"][0]["content"] if n.get("marks")
+        )
+        assert link_node["marks"][0]["attrs"]["href"] == "https://example.com"
+
+    def test_multiline_definition(self):
+        post = make_post()
+        md = "Claim.[^1]\n\n[^1]: First line\n    continued on the next line."
+        post.from_markdown(md)
+        text = footnotes(post)[0]["content"][0]["content"][0]["text"]
+        assert text == "First line continued on the next line."
+
+    def test_unreferenced_definition_still_appended(self):
+        post = make_post()
+        post.from_markdown("No references here.\n\n[^1]: Orphan note.")
+        assert len(anchors(post)) == 0
+        assert len(footnotes(post)) == 1
+
+    def test_reference_without_definition_left_as_text(self):
+        post = make_post()
+        post.from_markdown("A dangling[^missing] reference.")
+        assert len(anchors(post)) == 0
+        assert len(footnotes(post)) == 0
+        para = find_nodes(post.draft_body, "paragraph")[0]
+        assert "[^missing]" in para["content"][0]["text"]
+
+    def test_definition_in_middle_moves_to_end(self):
+        post = make_post()
+        md = (
+            "First paragraph.[^1]\n\n"
+            "[^1]: First footnote.\n\n"
+            "Second paragraph."
+        )
+        post.from_markdown(md)
+
+        types = [node["type"] for node in body_content(post)]
+        # Both paragraphs come first; the footnote block is last regardless of
+        # where the definition appeared in the source.
+        assert types == ["paragraph", "paragraph", "footnote"]
+
+        paragraphs = find_nodes(post.draft_body, "paragraph")
+        assert paragraphs[0]["content"][0]["text"] == "First paragraph."
+        # The definition line did not become a paragraph in the body.
+        assert paragraphs[1]["content"][0]["text"] == "Second paragraph."
+
+        assert len(anchors(post)) == 1
+        block = footnotes(post)[0]
+        assert block["content"][0]["content"][0]["text"] == "First footnote."
+
+    def test_no_footnotes_is_unchanged(self):
+        post = make_post()
+        post.from_markdown("Just a plain paragraph.")
+        assert len(anchors(post)) == 0
+        assert len(footnotes(post)) == 0
+        assert find_nodes(post.draft_body, "paragraph")[0]["content"][0]["text"] == (
+            "Just a plain paragraph."
+        )