From 9e742bc59adee81310d0f570275a7c73829965a3 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Thu, 18 Jun 2026 12:41:04 +1000 Subject: [PATCH] Add Markdown footnote support to from_markdown Parse standard Markdown footnotes (`text[^label]` references and `[^label]: definition` lines) into Substack's footnoteAnchor inline nodes and footnote blocks. Footnotes are numbered by order of first reference and labels may be numeric or named. Also adds Post.footnote_anchor() and Post.footnote() helpers for building footnotes manually, plus tests. --- README.md | 15 +++ substack/post.py | 151 ++++++++++++++++++++++++ tests/substack/test_footnotes.py | 191 +++++++++++++++++++++++++++++++ 3 files changed, 357 insertions(+) create mode 100644 tests/substack/test_footnotes.py diff --git a/README.md b/README.md index c4f57fa..cfe0611 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,21 @@ This is a paragraph with **bold** and *italic* text. """ post.from_markdown(markdown_content, api=api) +# Markdown footnotes are supported too. References become inline anchors and +# definitions become footnote blocks, numbered by order of first appearance. +# Labels can be numbers or names (e.g. [^1] or [^source]). +footnote_markdown = """ +A claim that needs support.[^1] Another, with a named label.[^source] + +[^1]: The supporting detail, with a [link](https://example.com). +[^source]: Author, *Title* (2025). +""" +post.from_markdown(footnote_markdown, api=api) + +# Or build footnotes manually: +post.paragraph(content=[{"content": "Some claim."}]).footnote_anchor(1) +post.footnote(1, "The note text, with **formatting** allowed.") + draft = api.post_draft(post.get_draft()) # set section (can only be done after first posting the draft) diff --git a/substack/post.py b/substack/post.py index 8a9d55d..8d190f3 100644 --- a/substack/post.py +++ b/substack/post.py @@ -12,6 +12,10 @@ from substack.exceptions import SectionNotExistsException +# Markdown footnotes: ``text.[^label]`` references and ``[^label]: definition`` lines. +FOOTNOTE_REFERENCE_PATTERN = re.compile(r"\[\^([^\]]+)\]") +FOOTNOTE_DEFINITION_PATTERN = re.compile(r"^\[\^([^\]]+)\]:\s?(.*)$") + def tokens_to_text_nodes(tokens: List[Dict]) -> List[Dict]: """Convert parse_inline() tokens to ProseMirror text nodes. @@ -543,6 +547,135 @@ def code_block(self, content, attrs=None): return self + def footnote_anchor(self, number: int): + """ + + Add an inline footnote reference (the superscript marker) to the last block. + + Args: + number: The footnote number this anchor points to. + + Returns: + Self for method chaining. + + """ + content = self.draft_body["content"][-1].get("content", []) + content += [{"type": "footnoteAnchor", "attrs": {"number": number}}] + self.draft_body["content"][-1]["content"] = content + return self + + def footnote(self, number: int, content=None): + """ + + Append a footnote block (the note shown at the foot of the post). + + Args: + number: The footnote number, matching a footnote_anchor. + content: Text string or list of inline token dicts. A plain string is + parsed for inline Markdown; a parse_inline() token list or a list + of ready text nodes is also accepted. + + Returns: + Self for method chaining. + + """ + if isinstance(content, str): + text_nodes = tokens_to_text_nodes(parse_inline(content)) + elif isinstance(content, list): + # Accept either parse_inline tokens ({"content": ...}) or text nodes. + if content and content[0].get("type") == "text": + text_nodes = content + else: + text_nodes = tokens_to_text_nodes(content) + else: + text_nodes = [] + + node: Dict = { + "type": "footnote", + "attrs": {"number": number}, + "content": [{"type": "paragraph", "content": text_nodes}], + } + self.draft_body["content"] = self.draft_body.get("content", []) + [node] + return self + + @staticmethod + def _extract_footnote_definitions(markdown_content: str): + """ + + Pull ``[^label]: definition`` lines out of the Markdown. + + Definitions may wrap onto indented continuation lines. Returns the body + with definitions removed plus a {label: definition_text} mapping. + + """ + lines = markdown_content.split("\n") + body_lines: List[str] = [] + definitions: Dict[str, str] = {} + i = 0 + while i < len(lines): + match = FOOTNOTE_DEFINITION_PATTERN.match(lines[i]) + if match: + label, first = match.group(1), match.group(2) + parts = [first] + i += 1 + # Continuation lines are indented and neither blank nor a new def. + while i < len(lines) and lines[i].strip() and lines[i][:1] in (" ", "\t"): + parts.append(lines[i].strip()) + i += 1 + definitions[label] = " ".join(p for p in parts if p).strip() + else: + body_lines.append(lines[i]) + i += 1 + return "\n".join(body_lines), definitions + + @staticmethod + def _number_footnotes(markdown_content: str, definitions: Dict[str, str]): + """Number footnotes by order of first inline reference in the body.""" + order: List[str] = [] + for match in FOOTNOTE_REFERENCE_PATTERN.finditer(markdown_content): + label = match.group(1) + if label in definitions and label not in order: + order.append(label) + # Defined-but-unreferenced footnotes go last, in definition order. + for label in definitions: + if label not in order: + order.append(label) + return {label: index + 1 for index, label in enumerate(order)} + + def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]): + """Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes.""" + content = node.get("content") + if not isinstance(content, list): + return + new_content: List[Dict] = [] + for child in content: + text = child.get("text", "") + if child.get("type") == "text" and FOOTNOTE_REFERENCE_PATTERN.search(text): + marks = child.get("marks") + last = 0 + for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text): + label = match.group(1) + if label not in numbers_by_label: + continue # Unknown label: leave the literal text in place. + if match.start() > last: + segment = {"type": "text", "text": text[last:match.start()]} + if marks: + segment["marks"] = marks + new_content.append(segment) + new_content.append( + {"type": "footnoteAnchor", "attrs": {"number": numbers_by_label[label]}} + ) + last = match.end() + if last < len(text): + segment = {"type": "text", "text": text[last:]} + if marks: + segment["marks"] = marks + new_content.append(segment) + else: + self._inject_footnote_anchors(child, numbers_by_label) + new_content.append(child) + node["content"] = new_content + def from_markdown(self, markdown_content: str, api=None): """ Parse Markdown content and add it to the post. @@ -559,6 +692,10 @@ def from_markdown(self, markdown_content: str, api=None): - Ordered lists: Lines starting with '1.', '2.', etc. - Horizontal rules: Lines with ---, ***, or ___ - Inline formatting: **bold**, *italic*, ***bold+italic***, `code`, ~~strikethrough~~ + - Footnotes: ``text.[^label]`` references plus ``[^label]: definition`` + lines. References become inline anchors and definitions become + footnote blocks, numbered by order of first appearance. Labels may be + numbers or names (e.g. ``[^1]`` or ``[^agi-book]``). Args: markdown_content: Markdown string to parse and add to the post. @@ -572,6 +709,13 @@ def from_markdown(self, markdown_content: str, api=None): >>> post = Post("Title", "Subtitle", user_id) >>> post.from_markdown("# Heading\\n\\nThis is **bold** text with [a link](https://example.com).") """ + # Footnotes: extract ``[^label]: ...`` definitions and number them by + # order of first reference before parsing the rest of the body. + markdown_content, footnote_definitions = self._extract_footnote_definitions( + markdown_content + ) + footnote_numbers = self._number_footnotes(markdown_content, footnote_definitions) + lines = markdown_content.split("\n") blocks = [] current_block: List[str] = [] @@ -844,4 +988,11 @@ def flush_ordered(): tokens = parse_inline(text_content) self.add({"type": "paragraph", "content": tokens}) + # Footnotes: turn ``[^label]`` references into inline anchors, then append + # the footnote blocks in numbered order. + if footnote_numbers: + self._inject_footnote_anchors(self.draft_body, footnote_numbers) + for label, number in sorted(footnote_numbers.items(), key=lambda item: item[1]): + self.footnote(number, footnote_definitions[label]) + return self diff --git a/tests/substack/test_footnotes.py b/tests/substack/test_footnotes.py new file mode 100644 index 0000000..f9db725 --- /dev/null +++ b/tests/substack/test_footnotes.py @@ -0,0 +1,191 @@ +"""Tests for Markdown footnote support in post.py.""" + +from substack.post import Post + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def make_post(): + """Create a fresh Post instance for testing.""" + return Post(title="Test", subtitle="Sub", user_id=1) + + +def body_content(post): + """Return the content list from the post's draft body.""" + return post.draft_body["content"] + + +def find_nodes(node, node_type, acc=None): + """Recursively collect every node of a given type from a doc tree.""" + if acc is None: + acc = [] + if isinstance(node, dict): + if node.get("type") == node_type: + acc.append(node) + for value in node.values(): + find_nodes(value, node_type, acc) + elif isinstance(node, list): + for value in node: + find_nodes(value, node_type, acc) + return acc + + +def anchors(post): + return find_nodes(post.draft_body, "footnoteAnchor") + + +def footnotes(post): + return find_nodes(post.draft_body, "footnote") + + +# --------------------------------------------------------------------------- +# TestFootnoteHelpers +# --------------------------------------------------------------------------- + +class TestFootnoteHelpers: + def test_footnote_anchor_added_inline(self): + post = make_post() + post.paragraph(content=[{"content": "See here."}]) + post.footnote_anchor(1) + para = body_content(post)[0] + assert para["content"][-1] == {"type": "footnoteAnchor", "attrs": {"number": 1}} + + def test_footnote_block_from_string(self): + post = make_post() + post.footnote(1, "A simple note.") + block = body_content(post)[-1] + assert block["type"] == "footnote" + assert block["attrs"] == {"number": 1} + assert block["content"][0]["type"] == "paragraph" + assert block["content"][0]["content"][0]["text"] == "A simple note." + + def test_footnote_block_parses_inline_markdown(self): + post = make_post() + post.footnote(2, "See [the source](https://example.com).") + block = footnotes(post)[0] + text_nodes = block["content"][0]["content"] + link_node = next(n for n in text_nodes if n.get("marks")) + assert link_node["text"] == "the source" + assert link_node["marks"] == [{"type": "link", "attrs": {"href": "https://example.com"}}] + + +# --------------------------------------------------------------------------- +# TestFromMarkdownFootnotes +# --------------------------------------------------------------------------- + +class TestFromMarkdownFootnotes: + def test_basic_reference_and_definition(self): + post = make_post() + post.from_markdown("A claim.[^1]\n\n[^1]: The supporting detail.") + assert len(anchors(post)) == 1 + assert anchors(post)[0]["attrs"]["number"] == 1 + blocks = footnotes(post) + assert len(blocks) == 1 + assert blocks[0]["attrs"]["number"] == 1 + assert blocks[0]["content"][0]["content"][0]["text"] == "The supporting detail." + + def test_definition_removed_from_body(self): + post = make_post() + post.from_markdown("A claim.[^1]\n\n[^1]: The note.") + # The definition line must not leak into a paragraph. + paragraphs = find_nodes(post.draft_body, "paragraph") + body_text = " ".join( + n.get("text", "") + for p in paragraphs + for n in p.get("content", []) + ) + assert "[^1]:" not in body_text + + def test_anchor_injected_mid_sentence(self): + post = make_post() + post.from_markdown("Before[^1] and after.\n\n[^1]: Note.") + para = find_nodes(post.draft_body, "paragraph")[0] + types = [c["type"] for c in para["content"]] + assert types == ["text", "footnoteAnchor", "text"] + assert para["content"][0]["text"] == "Before" + assert para["content"][2]["text"] == " and after." + + def test_named_labels_numbered_by_first_appearance(self): + post = make_post() + md = ( + "First[^book] then second[^study].\n\n" + "[^study]: Second definition.\n" + "[^book]: First definition.\n" + ) + post.from_markdown(md) + nums = [a["attrs"]["number"] for a in anchors(post)] + assert nums == [1, 2] # order of reference, not of definition + blocks = sorted(footnotes(post), key=lambda b: b["attrs"]["number"]) + assert blocks[0]["content"][0]["content"][0]["text"] == "First definition." + assert blocks[1]["content"][0]["content"][0]["text"] == "Second definition." + + def test_repeated_reference_reuses_number(self): + post = make_post() + post.from_markdown("One[^a] two[^a].\n\n[^a]: Note.") + nums = [a["attrs"]["number"] for a in anchors(post)] + assert nums == [1, 1] + assert len(footnotes(post)) == 1 + + def test_link_inside_definition_preserved(self): + post = make_post() + post.from_markdown("Claim.[^1]\n\n[^1]: See [docs](https://example.com).") + block = footnotes(post)[0] + link_node = next( + n for n in block["content"][0]["content"] if n.get("marks") + ) + assert link_node["marks"][0]["attrs"]["href"] == "https://example.com" + + def test_multiline_definition(self): + post = make_post() + md = "Claim.[^1]\n\n[^1]: First line\n continued on the next line." + post.from_markdown(md) + text = footnotes(post)[0]["content"][0]["content"][0]["text"] + assert text == "First line continued on the next line." + + def test_unreferenced_definition_still_appended(self): + post = make_post() + post.from_markdown("No references here.\n\n[^1]: Orphan note.") + assert len(anchors(post)) == 0 + assert len(footnotes(post)) == 1 + + def test_reference_without_definition_left_as_text(self): + post = make_post() + post.from_markdown("A dangling[^missing] reference.") + assert len(anchors(post)) == 0 + assert len(footnotes(post)) == 0 + para = find_nodes(post.draft_body, "paragraph")[0] + assert "[^missing]" in para["content"][0]["text"] + + def test_definition_in_middle_moves_to_end(self): + post = make_post() + md = ( + "First paragraph.[^1]\n\n" + "[^1]: First footnote.\n\n" + "Second paragraph." + ) + post.from_markdown(md) + + types = [node["type"] for node in body_content(post)] + # Both paragraphs come first; the footnote block is last regardless of + # where the definition appeared in the source. + assert types == ["paragraph", "paragraph", "footnote"] + + paragraphs = find_nodes(post.draft_body, "paragraph") + assert paragraphs[0]["content"][0]["text"] == "First paragraph." + # The definition line did not become a paragraph in the body. + assert paragraphs[1]["content"][0]["text"] == "Second paragraph." + + assert len(anchors(post)) == 1 + block = footnotes(post)[0] + assert block["content"][0]["content"][0]["text"] == "First footnote." + + def test_no_footnotes_is_unchanged(self): + post = make_post() + post.from_markdown("Just a plain paragraph.") + assert len(anchors(post)) == 0 + assert len(footnotes(post)) == 0 + assert find_nodes(post.draft_body, "paragraph")[0]["content"][0]["text"] == ( + "Just a plain paragraph." + )