Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,21 @@ This is a paragraph with **bold** and *italic* text.
"""
post.from_markdown(markdown_content, api=api)

# Markdown footnotes are supported too. References become inline anchors and
# definitions become footnote blocks, numbered by order of first appearance.
# Labels can be numbers or names (e.g. [^1] or [^source]).
footnote_markdown = """
A claim that needs support.[^1] Another, with a named label.[^source]

[^1]: The supporting detail, with a [link](https://example.com).
[^source]: Author, *Title* (2025).
"""
post.from_markdown(footnote_markdown, api=api)

# Or build footnotes manually:
post.paragraph(content=[{"content": "Some claim."}]).footnote_anchor(1)
post.footnote(1, "The note text, with **formatting** allowed.")

draft = api.post_draft(post.get_draft())

# set section (can only be done after first posting the draft)
Expand Down
151 changes: 151 additions & 0 deletions substack/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

from substack.exceptions import SectionNotExistsException

# Markdown footnotes: ``text.[^label]`` references and ``[^label]: definition`` lines.
FOOTNOTE_REFERENCE_PATTERN = re.compile(r"\[\^([^\]]+)\]")
FOOTNOTE_DEFINITION_PATTERN = re.compile(r"^\[\^([^\]]+)\]:\s?(.*)$")


def tokens_to_text_nodes(tokens: List[Dict]) -> List[Dict]:
"""Convert parse_inline() tokens to ProseMirror text nodes.
Expand Down Expand Up @@ -543,6 +547,135 @@ def code_block(self, content, attrs=None):

return self

def footnote_anchor(self, number: int):
"""

Add an inline footnote reference (the superscript marker) to the last block.

Args:
number: The footnote number this anchor points to.

Returns:
Self for method chaining.

"""
content = self.draft_body["content"][-1].get("content", [])
content += [{"type": "footnoteAnchor", "attrs": {"number": number}}]
self.draft_body["content"][-1]["content"] = content
return self

def footnote(self, number: int, content=None):
"""

Append a footnote block (the note shown at the foot of the post).

Args:
number: The footnote number, matching a footnote_anchor.
content: Text string or list of inline token dicts. A plain string is
parsed for inline Markdown; a parse_inline() token list or a list
of ready text nodes is also accepted.

Returns:
Self for method chaining.

"""
if isinstance(content, str):
text_nodes = tokens_to_text_nodes(parse_inline(content))
elif isinstance(content, list):
# Accept either parse_inline tokens ({"content": ...}) or text nodes.
if content and content[0].get("type") == "text":
text_nodes = content
else:
text_nodes = tokens_to_text_nodes(content)
else:
text_nodes = []

node: Dict = {
"type": "footnote",
"attrs": {"number": number},
"content": [{"type": "paragraph", "content": text_nodes}],
}
self.draft_body["content"] = self.draft_body.get("content", []) + [node]
return self

@staticmethod
def _extract_footnote_definitions(markdown_content: str):
"""

Pull ``[^label]: definition`` lines out of the Markdown.

Definitions may wrap onto indented continuation lines. Returns the body
with definitions removed plus a {label: definition_text} mapping.

"""
lines = markdown_content.split("\n")
body_lines: List[str] = []
definitions: Dict[str, str] = {}
i = 0
while i < len(lines):
match = FOOTNOTE_DEFINITION_PATTERN.match(lines[i])
if match:
label, first = match.group(1), match.group(2)
parts = [first]
i += 1
# Continuation lines are indented and neither blank nor a new def.
while i < len(lines) and lines[i].strip() and lines[i][:1] in (" ", "\t"):
parts.append(lines[i].strip())
i += 1
definitions[label] = " ".join(p for p in parts if p).strip()
else:
body_lines.append(lines[i])
i += 1
return "\n".join(body_lines), definitions

@staticmethod
def _number_footnotes(markdown_content: str, definitions: Dict[str, str]):
"""Number footnotes by order of first inline reference in the body."""
order: List[str] = []
for match in FOOTNOTE_REFERENCE_PATTERN.finditer(markdown_content):
label = match.group(1)
if label in definitions and label not in order:
order.append(label)
# Defined-but-unreferenced footnotes go last, in definition order.
for label in definitions:
if label not in order:
order.append(label)
return {label: index + 1 for index, label in enumerate(order)}

def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]):
"""Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes."""
content = node.get("content")
if not isinstance(content, list):
return
new_content: List[Dict] = []
for child in content:
text = child.get("text", "")
if child.get("type") == "text" and FOOTNOTE_REFERENCE_PATTERN.search(text):
marks = child.get("marks")
last = 0
for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text):
label = match.group(1)
if label not in numbers_by_label:
continue # Unknown label: leave the literal text in place.
if match.start() > last:
segment = {"type": "text", "text": text[last:match.start()]}
if marks:
segment["marks"] = marks
new_content.append(segment)
new_content.append(
{"type": "footnoteAnchor", "attrs": {"number": numbers_by_label[label]}}
)
last = match.end()
if last < len(text):
segment = {"type": "text", "text": text[last:]}
if marks:
segment["marks"] = marks
new_content.append(segment)
else:
self._inject_footnote_anchors(child, numbers_by_label)
new_content.append(child)
node["content"] = new_content

def from_markdown(self, markdown_content: str, api=None):
"""
Parse Markdown content and add it to the post.
Expand All @@ -559,6 +692,10 @@ def from_markdown(self, markdown_content: str, api=None):
- Ordered lists: Lines starting with '1.', '2.', etc.
- Horizontal rules: Lines with ---, ***, or ___
- Inline formatting: **bold**, *italic*, ***bold+italic***, `code`, ~~strikethrough~~
- Footnotes: ``text.[^label]`` references plus ``[^label]: definition``
lines. References become inline anchors and definitions become
footnote blocks, numbered by order of first appearance. Labels may be
numbers or names (e.g. ``[^1]`` or ``[^agi-book]``).

Args:
markdown_content: Markdown string to parse and add to the post.
Expand All @@ -572,6 +709,13 @@ def from_markdown(self, markdown_content: str, api=None):
>>> post = Post("Title", "Subtitle", user_id)
>>> post.from_markdown("# Heading\\n\\nThis is **bold** text with [a link](https://example.com).")
"""
# Footnotes: extract ``[^label]: ...`` definitions and number them by
# order of first reference before parsing the rest of the body.
markdown_content, footnote_definitions = self._extract_footnote_definitions(
markdown_content
)
footnote_numbers = self._number_footnotes(markdown_content, footnote_definitions)

lines = markdown_content.split("\n")
blocks = []
current_block: List[str] = []
Expand Down Expand Up @@ -844,4 +988,11 @@ def flush_ordered():
tokens = parse_inline(text_content)
self.add({"type": "paragraph", "content": tokens})

# Footnotes: turn ``[^label]`` references into inline anchors, then append
# the footnote blocks in numbered order.
if footnote_numbers:
self._inject_footnote_anchors(self.draft_body, footnote_numbers)
for label, number in sorted(footnote_numbers.items(), key=lambda item: item[1]):
self.footnote(number, footnote_definitions[label])

return self
191 changes: 191 additions & 0 deletions tests/substack/test_footnotes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""Tests for Markdown footnote support in post.py."""

from substack.post import Post


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def make_post():
"""Create a fresh Post instance for testing."""
return Post(title="Test", subtitle="Sub", user_id=1)


def body_content(post):
"""Return the content list from the post's draft body."""
return post.draft_body["content"]


def find_nodes(node, node_type, acc=None):
"""Recursively collect every node of a given type from a doc tree."""
if acc is None:
acc = []
if isinstance(node, dict):
if node.get("type") == node_type:
acc.append(node)
for value in node.values():
find_nodes(value, node_type, acc)
elif isinstance(node, list):
for value in node:
find_nodes(value, node_type, acc)
return acc


def anchors(post):
return find_nodes(post.draft_body, "footnoteAnchor")


def footnotes(post):
return find_nodes(post.draft_body, "footnote")


# ---------------------------------------------------------------------------
# TestFootnoteHelpers
# ---------------------------------------------------------------------------

class TestFootnoteHelpers:
def test_footnote_anchor_added_inline(self):
post = make_post()
post.paragraph(content=[{"content": "See here."}])
post.footnote_anchor(1)
para = body_content(post)[0]
assert para["content"][-1] == {"type": "footnoteAnchor", "attrs": {"number": 1}}

def test_footnote_block_from_string(self):
post = make_post()
post.footnote(1, "A simple note.")
block = body_content(post)[-1]
assert block["type"] == "footnote"
assert block["attrs"] == {"number": 1}
assert block["content"][0]["type"] == "paragraph"
assert block["content"][0]["content"][0]["text"] == "A simple note."

def test_footnote_block_parses_inline_markdown(self):
post = make_post()
post.footnote(2, "See [the source](https://example.com).")
block = footnotes(post)[0]
text_nodes = block["content"][0]["content"]
link_node = next(n for n in text_nodes if n.get("marks"))
assert link_node["text"] == "the source"
assert link_node["marks"] == [{"type": "link", "attrs": {"href": "https://example.com"}}]


# ---------------------------------------------------------------------------
# TestFromMarkdownFootnotes
# ---------------------------------------------------------------------------

class TestFromMarkdownFootnotes:
def test_basic_reference_and_definition(self):
post = make_post()
post.from_markdown("A claim.[^1]\n\n[^1]: The supporting detail.")
assert len(anchors(post)) == 1
assert anchors(post)[0]["attrs"]["number"] == 1
blocks = footnotes(post)
assert len(blocks) == 1
assert blocks[0]["attrs"]["number"] == 1
assert blocks[0]["content"][0]["content"][0]["text"] == "The supporting detail."

def test_definition_removed_from_body(self):
post = make_post()
post.from_markdown("A claim.[^1]\n\n[^1]: The note.")
# The definition line must not leak into a paragraph.
paragraphs = find_nodes(post.draft_body, "paragraph")
body_text = " ".join(
n.get("text", "")
for p in paragraphs
for n in p.get("content", [])
)
assert "[^1]:" not in body_text

def test_anchor_injected_mid_sentence(self):
post = make_post()
post.from_markdown("Before[^1] and after.\n\n[^1]: Note.")
para = find_nodes(post.draft_body, "paragraph")[0]
types = [c["type"] for c in para["content"]]
assert types == ["text", "footnoteAnchor", "text"]
assert para["content"][0]["text"] == "Before"
assert para["content"][2]["text"] == " and after."

def test_named_labels_numbered_by_first_appearance(self):
post = make_post()
md = (
"First[^book] then second[^study].\n\n"
"[^study]: Second definition.\n"
"[^book]: First definition.\n"
)
post.from_markdown(md)
nums = [a["attrs"]["number"] for a in anchors(post)]
assert nums == [1, 2] # order of reference, not of definition
blocks = sorted(footnotes(post), key=lambda b: b["attrs"]["number"])
assert blocks[0]["content"][0]["content"][0]["text"] == "First definition."
assert blocks[1]["content"][0]["content"][0]["text"] == "Second definition."

def test_repeated_reference_reuses_number(self):
post = make_post()
post.from_markdown("One[^a] two[^a].\n\n[^a]: Note.")
nums = [a["attrs"]["number"] for a in anchors(post)]
assert nums == [1, 1]
assert len(footnotes(post)) == 1

def test_link_inside_definition_preserved(self):
post = make_post()
post.from_markdown("Claim.[^1]\n\n[^1]: See [docs](https://example.com).")
block = footnotes(post)[0]
link_node = next(
n for n in block["content"][0]["content"] if n.get("marks")
)
assert link_node["marks"][0]["attrs"]["href"] == "https://example.com"

def test_multiline_definition(self):
post = make_post()
md = "Claim.[^1]\n\n[^1]: First line\n continued on the next line."
post.from_markdown(md)
text = footnotes(post)[0]["content"][0]["content"][0]["text"]
assert text == "First line continued on the next line."

def test_unreferenced_definition_still_appended(self):
post = make_post()
post.from_markdown("No references here.\n\n[^1]: Orphan note.")
assert len(anchors(post)) == 0
assert len(footnotes(post)) == 1

def test_reference_without_definition_left_as_text(self):
post = make_post()
post.from_markdown("A dangling[^missing] reference.")
assert len(anchors(post)) == 0
assert len(footnotes(post)) == 0
para = find_nodes(post.draft_body, "paragraph")[0]
assert "[^missing]" in para["content"][0]["text"]

def test_definition_in_middle_moves_to_end(self):
post = make_post()
md = (
"First paragraph.[^1]\n\n"
"[^1]: First footnote.\n\n"
"Second paragraph."
)
post.from_markdown(md)

types = [node["type"] for node in body_content(post)]
# Both paragraphs come first; the footnote block is last regardless of
# where the definition appeared in the source.
assert types == ["paragraph", "paragraph", "footnote"]

paragraphs = find_nodes(post.draft_body, "paragraph")
assert paragraphs[0]["content"][0]["text"] == "First paragraph."
# The definition line did not become a paragraph in the body.
assert paragraphs[1]["content"][0]["text"] == "Second paragraph."

assert len(anchors(post)) == 1
block = footnotes(post)[0]
assert block["content"][0]["content"][0]["text"] == "First footnote."

def test_no_footnotes_is_unchanged(self):
post = make_post()
post.from_markdown("Just a plain paragraph.")
assert len(anchors(post)) == 0
assert len(footnotes(post)) == 0
assert find_nodes(post.draft_body, "paragraph")[0]["content"][0]["text"] == (
"Just a plain paragraph."
)