Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 98 additions & 1 deletion src/cfengine_cli/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,101 @@ def text(node: Node) -> str:
return node.text.decode("utf-8")


def _scan_quotes(inner: str) -> tuple:
"""Report which quote characters the inner text's logical content holds.

Returns (has_double, has_single, has_backtick). An escaped double or single
quote counts as that quote character; an escaped backslash, a line
continuation, and other backslash sequences contribute no quote character.
"""
has_double = has_single = has_backtick = False
i = 0
n = len(inner)
while i < n:
c = inner[i]
if c == "\\" and i + 1 < n:
nxt = inner[i + 1]
if nxt == '"':
has_double = True
elif nxt == "'":
has_single = True
i += 2
continue
if c == '"':
has_double = True
elif c == "'":
has_single = True
elif c == "`":
has_backtick = True
i += 1
return has_double, has_single, has_backtick


def _requote(inner: str, target: str) -> str:
"""Re-emit a quoted_string's inner text using ``target`` as the quote char.

Escape sequences that mean the same thing in every quote style - an escaped
backslash, a line continuation, or any other backslash sequence - pass
through unchanged. A literal double or single quote is escaped only when it
is the target delimiter, otherwise it is emitted bare.
"""
out = []
i = 0
n = len(inner)
while i < n:
c = inner[i]
if c == "\\" and i + 1 < n:
nxt = inner[i + 1]
if nxt == '"':
out.append('\\"' if target == '"' else '"')
elif nxt == "'":
out.append("\\'" if target == "'" else "'")
else:
out.append(c + nxt)
i += 2
continue
if c == '"':
out.append('\\"' if target == '"' else '"')
elif c == "'":
out.append("\\'" if target == "'" else "'")
else:
out.append(c)
i += 1
return "".join(out)


def _normalize_quotes(literal: str) -> str:
"""Normalize a CFEngine string literal to the preferred quote style.

Double quotes are preferred, then single quotes, then backticks. The style
is chosen from the string's content so the quote characters it contains
need no escaping: no double quote -> double quotes; a double quote but no
single quote -> single quotes; both -> backticks.
"""
assert (
len(literal) >= 2
and literal[0] == literal[-1]
and literal[0] in ("'", '"', "`")
), f"expected a quoted string literal, got {literal!r}"
delim = literal[0]
inner = literal[1:-1]
has_double, has_single, has_backtick = _scan_quotes(inner)
if not has_double:
target = '"'
elif not has_single:
target = "'"
else:
target = "`"
if target == "`" and has_backtick:
# Contains a double quote, a single quote, and a backtick; no style can
# hold all three without escaping, and a backtick can't be escaped, so
# leave the literal as the author wrote it.
return literal
if target == delim:
return literal
return target + _requote(inner, target) + target


class Formatter:
"""Accumulates formatted output line-by-line into a string buffer."""

Expand Down Expand Up @@ -202,6 +297,8 @@ def stringify_single_line_nodes(nodes: list[Node]) -> str:

def stringify_single_line_node(node: Node) -> str:
"""Recursively flatten a node and its children into a single-line string."""
if node.type == "quoted_string":
return _normalize_quotes(text(node))
if not node.children:
return text(node)
return stringify_single_line_nodes(node.children)
Expand Down Expand Up @@ -462,7 +559,7 @@ def _promiser_text(children: list[Node]) -> str | None:
promiser_node = next((c for c in children if c.type == "promiser"), None)
if not promiser_node:
return None
return text(promiser_node)
return _normalize_quotes(text(promiser_node))


def _promiser_line_with_stakeholder(children: list[Node]) -> str | None:
Expand Down
29 changes: 29 additions & 0 deletions tests/format/012_quotes.expected.cf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Quote-style normalization: double quotes are preferred, then single
# quotes, then backticks - whichever needs no escaping for the quote
# characters the string contains.
bundle agent main
{
vars:
"a" string => "hello";
"b" string => "world";
"c" string => 'say "hi"';
"d" string => "it's here";
"e" slist => { "one", "two" };
"f" string => "hello";
"g" string => 'say "hi"';
"h" string => "it's here";
"i" string => `it's "quoted"`;
"j" string => 'say "hi"';
"k" string => `he said "hi" it's`;
"l" string => "a\\b";
"m" string => "c\\\\d";
"n" string => "e\\f";
"o" string => 'mix "q" it\'s `tick`';
"p" string => `a\\b "c" it's`;
Comment thread
olehermanse marked this conversation as resolved.
"q" string => "foo\
bar";

reports:
"a single-quoted promiser";
"a backtick promiser";
}
29 changes: 29 additions & 0 deletions tests/format/012_quotes.input.cf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Quote-style normalization: double quotes are preferred, then single
# quotes, then backticks - whichever needs no escaping for the quote
# characters the string contains.
bundle agent main
{
vars:
"a" string => 'hello';
"b" string => "world";
"c" string => 'say "hi"';
"d" string => 'it\'s here';
"e" slist => { 'one', 'two' };
"f" string => `hello`;
"g" string => `say "hi"`;
"h" string => `it's here`;
"i" string => `it's "quoted"`;
"j" string => "say \"hi\"";
"k" string => 'he said "hi" it\'s';
"l" string => 'a\\b';
"m" string => "c\\\\d";
"n" string => `e\\f`;
"o" string => 'mix "q" it\'s `tick`';
"p" string => 'a\\b "c" it\'s';
Comment thread
olehermanse marked this conversation as resolved.
"q" string => 'foo\
bar';

reports:
'a single-quoted promiser';
`a backtick promiser`;
}
Loading