From e2d6aa257a9dc38fb4afb049dda3f1ee9615c9c1 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 18:48:01 +0000 Subject: [PATCH 1/4] format: normalize string-literal quote style Convert string literals to a preferred quote style: double quotes by default, single quotes when the content contains a double quote, and backticks when it contains both a double and a single quote. Applies to rval strings (via the quoted_string choke point) and promisers. --- src/cfengine_cli/format.py | 74 ++++++++++++++++++++++++++++- tests/format/012_quotes.expected.cf | 22 +++++++++ tests/format/012_quotes.input.cf | 22 +++++++++ 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tests/format/012_quotes.expected.cf create mode 100644 tests/format/012_quotes.input.cf diff --git a/src/cfengine_cli/format.py b/src/cfengine_cli/format.py index 5fb46ca..16622f4 100644 --- a/src/cfengine_cli/format.py +++ b/src/cfengine_cli/format.py @@ -95,6 +95,76 @@ def text(node: Node) -> str: return node.text.decode("utf-8") +def _decode_literal(inner: str, delim: str) -> str: + """Return the logical character content of a quoted_string's inner text. + + Backtick strings are taken literally - CFEngine processes no escapes + inside them. Single- and double-quoted strings recognize only the escapes + for a backslash, a double quote and a single quote (plus a backslash- + newline line continuation); any other backslash is kept literally, matching + CFEngine's lexer. + """ + if delim == "`": + return inner + out = [] + i = 0 + while i < len(inner): + c = inner[i] + if c == "\\" and i + 1 < len(inner): + nxt = inner[i + 1] + if nxt in ("\\", '"', "'"): + out.append(nxt) + i += 2 + continue + if nxt == "\n": # line continuation: drop both characters + i += 2 + continue + out.append(c) + i += 1 + return "".join(out) + + +def _encode_literal(content: str, delim: str) -> str: + """Wrap logical content in delim, escaping as that quote style requires.""" + if delim == "`": + return "`" + content + "`" + escaped = content.replace("\\", "\\\\").replace(delim, "\\" + delim) + return delim + escaped + delim + + +def _normalize_quotes(literal: str) -> str: + """Normalize a CFEngine string literal to the preferred quote style. + + Double quotes are preferred, then single quotes, then backticks. The style + is chosen from the string's content so the quote characters it contains + need no escaping: no double quote -> double quotes; a double quote but no + single quote -> single quotes; both -> backticks. + """ + if ( + len(literal) < 2 + or literal[0] != literal[-1] + or literal[0] not in ("'", '"', "`") + ): + return literal + delim = literal[0] + content = _decode_literal(literal[1:-1], delim) + has_double = '"' in content + has_single = "'" in content + if not has_double: + target = '"' + elif not has_single: + target = "'" + else: + target = "`" + if target == "`" and "`" in content: + # Needs all three quote styles at once; a backtick string cannot + # contain a backtick, so leave the literal as written. + return literal + if target == delim: + return literal + return _encode_literal(content, target) + + class Formatter: """Accumulates formatted output line-by-line into a string buffer.""" @@ -202,6 +272,8 @@ def stringify_single_line_nodes(nodes: list[Node]) -> str: def stringify_single_line_node(node: Node) -> str: """Recursively flatten a node and its children into a single-line string.""" + if node.type == "quoted_string": + return _normalize_quotes(text(node)) if not node.children: return text(node) return stringify_single_line_nodes(node.children) @@ -462,7 +534,7 @@ def _promiser_text(children: list[Node]) -> str | None: promiser_node = next((c for c in children if c.type == "promiser"), None) if not promiser_node: return None - return text(promiser_node) + return _normalize_quotes(text(promiser_node)) def _promiser_line_with_stakeholder(children: list[Node]) -> str | None: diff --git a/tests/format/012_quotes.expected.cf b/tests/format/012_quotes.expected.cf new file mode 100644 index 0000000..3a221e8 --- /dev/null +++ b/tests/format/012_quotes.expected.cf @@ -0,0 +1,22 @@ +# Quote-style normalization: double quotes are preferred, then single +# quotes, then backticks - whichever needs no escaping for the quote +# characters the string contains. +bundle agent main +{ + vars: + "a" string => "hello"; + "b" string => "world"; + "c" string => 'say "hi"'; + "d" string => "it's here"; + "e" slist => { "one", "two" }; + "f" string => "hello"; + "g" string => 'say "hi"'; + "h" string => "it's here"; + "i" string => `it's "quoted"`; + "j" string => 'say "hi"'; + "k" string => `he said "hi" it's`; + + reports: + "a single-quoted promiser"; + "a backtick promiser"; +} diff --git a/tests/format/012_quotes.input.cf b/tests/format/012_quotes.input.cf new file mode 100644 index 0000000..d8f8f24 --- /dev/null +++ b/tests/format/012_quotes.input.cf @@ -0,0 +1,22 @@ +# Quote-style normalization: double quotes are preferred, then single +# quotes, then backticks - whichever needs no escaping for the quote +# characters the string contains. +bundle agent main +{ +vars: +"a" string => 'hello'; +"b" string => "world"; +"c" string => 'say "hi"'; +"d" string => 'it\'s here'; +"e" slist => { 'one', 'two' }; +"f" string => `hello`; +"g" string => `say "hi"`; +"h" string => `it's here`; +"i" string => `it's "quoted"`; +"j" string => "say \"hi\""; +"k" string => 'he said "hi" it\'s'; + +reports: +'a single-quoted promiser'; +`a backtick promiser`; +} From f179d82d28d2995f830b66974a9100cb119cbc3a Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 19:40:53 +0000 Subject: [PATCH 2/4] format: treat backtick escapes like other quotes; expand quote tests Address review on PR #164: - Backtick strings process the SAME escapes as single/double quotes (\\, \", \'); a backtick string is not literal. Its only special property is that the delimiter itself cannot be escaped, so a literal backtick can never appear inside one. Unified _decode_literal (drops its delim arg) and _encode_literal (now escapes backslashes for backticks too) accordingly. - Removed the unrequested backslash-newline line-continuation handling from _decode_literal. - Turned the literal-shape guard in _normalize_quotes into an assert, since callers always pass a real quoted_string literal. - Expanded the 012_quotes golden fixture with backslash cases (l/m/n/p) and an all-three-quotes case (o) to cover the unified escape behavior and the backtick encode branch. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01U4hEZuqiuEFH2zy8wWGwyD --- src/cfengine_cli/format.py | 57 ++++++++++++++--------------- tests/format/012_quotes.expected.cf | 5 +++ tests/format/012_quotes.input.cf | 5 +++ 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/cfengine_cli/format.py b/src/cfengine_cli/format.py index 16622f4..4f5081f 100644 --- a/src/cfengine_cli/format.py +++ b/src/cfengine_cli/format.py @@ -95,40 +95,37 @@ def text(node: Node) -> str: return node.text.decode("utf-8") -def _decode_literal(inner: str, delim: str) -> str: +def _decode_literal(inner: str) -> str: """Return the logical character content of a quoted_string's inner text. - Backtick strings are taken literally - CFEngine processes no escapes - inside them. Single- and double-quoted strings recognize only the escapes - for a backslash, a double quote and a single quote (plus a backslash- - newline line continuation); any other backslash is kept literally, matching - CFEngine's lexer. + CFEngine processes the same escapes for all three quote styles: an escaped + backslash, double quote, or single quote is unescaped, and any other + backslash is kept as-is. (A backtick string still cannot contain a literal + backtick, since the delimiter itself can't be escaped.) """ - if delim == "`": - return inner out = [] i = 0 while i < len(inner): c = inner[i] - if c == "\\" and i + 1 < len(inner): - nxt = inner[i + 1] - if nxt in ("\\", '"', "'"): - out.append(nxt) - i += 2 - continue - if nxt == "\n": # line continuation: drop both characters - i += 2 - continue + if c == "\\" and i + 1 < len(inner) and inner[i + 1] in ("\\", '"', "'"): + out.append(inner[i + 1]) + i += 2 + continue out.append(c) i += 1 return "".join(out) def _encode_literal(content: str, delim: str) -> str: - """Wrap logical content in delim, escaping as that quote style requires.""" - if delim == "`": - return "`" + content + "`" - escaped = content.replace("\\", "\\\\").replace(delim, "\\" + delim) + """Wrap content in delim, escaping as that quote style requires. + + Backslashes are always escaped. Double- and single-quoted strings also + escape their own delimiter; a backtick can't be escaped, so the caller must + only choose backticks when the content contains no backtick. + """ + escaped = content.replace("\\", "\\\\") + if delim != "`": + escaped = escaped.replace(delim, "\\" + delim) return delim + escaped + delim @@ -140,14 +137,13 @@ def _normalize_quotes(literal: str) -> str: need no escaping: no double quote -> double quotes; a double quote but no single quote -> single quotes; both -> backticks. """ - if ( - len(literal) < 2 - or literal[0] != literal[-1] - or literal[0] not in ("'", '"', "`") - ): - return literal + assert ( + len(literal) >= 2 + and literal[0] == literal[-1] + and literal[0] in ("'", '"', "`") + ), f"expected a quoted string literal, got {literal!r}" delim = literal[0] - content = _decode_literal(literal[1:-1], delim) + content = _decode_literal(literal[1:-1]) has_double = '"' in content has_single = "'" in content if not has_double: @@ -157,8 +153,9 @@ def _normalize_quotes(literal: str) -> str: else: target = "`" if target == "`" and "`" in content: - # Needs all three quote styles at once; a backtick string cannot - # contain a backtick, so leave the literal as written. + # A string containing a double quote, a single quote, and a backtick + # can't use any style without escaping, and a backtick can't be + # escaped, so leave the literal as the author wrote it. return literal if target == delim: return literal diff --git a/tests/format/012_quotes.expected.cf b/tests/format/012_quotes.expected.cf index 3a221e8..955fb0e 100644 --- a/tests/format/012_quotes.expected.cf +++ b/tests/format/012_quotes.expected.cf @@ -15,6 +15,11 @@ bundle agent main "i" string => `it's "quoted"`; "j" string => 'say "hi"'; "k" string => `he said "hi" it's`; + "l" string => "a\\b"; + "m" string => "c\\\\d"; + "n" string => "e\\f"; + "o" string => 'mix "q" it\'s `tick`'; + "p" string => `a\\b "c" it's`; reports: "a single-quoted promiser"; diff --git a/tests/format/012_quotes.input.cf b/tests/format/012_quotes.input.cf index d8f8f24..878bb7a 100644 --- a/tests/format/012_quotes.input.cf +++ b/tests/format/012_quotes.input.cf @@ -15,6 +15,11 @@ vars: "i" string => `it's "quoted"`; "j" string => "say \"hi\""; "k" string => 'he said "hi" it\'s'; +"l" string => 'a\\b'; +"m" string => "c\\\\d"; +"n" string => `e\\f`; +"o" string => 'mix "q" it\'s `tick`'; +"p" string => 'a\\b "c" it\'s'; reports: 'a single-quoted promiser'; From 98d1b5bb71c307bf4788f2a779938f6e2c456682 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 19:53:57 +0000 Subject: [PATCH 3/4] format: add line-continuation fixture case --- tests/format/012_quotes.expected.cf | 2 ++ tests/format/012_quotes.input.cf | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/format/012_quotes.expected.cf b/tests/format/012_quotes.expected.cf index 955fb0e..954f711 100644 --- a/tests/format/012_quotes.expected.cf +++ b/tests/format/012_quotes.expected.cf @@ -20,6 +20,8 @@ bundle agent main "n" string => "e\\f"; "o" string => 'mix "q" it\'s `tick`'; "p" string => `a\\b "c" it's`; + "q" string => "foo\ +bar"; reports: "a single-quoted promiser"; diff --git a/tests/format/012_quotes.input.cf b/tests/format/012_quotes.input.cf index 878bb7a..a4792fb 100644 --- a/tests/format/012_quotes.input.cf +++ b/tests/format/012_quotes.input.cf @@ -20,6 +20,8 @@ vars: "n" string => `e\\f`; "o" string => 'mix "q" it\'s `tick`'; "p" string => 'a\\b "c" it\'s'; +"q" string => "foo\ +bar"; reports: 'a single-quoted promiser'; From 9a302e609967c6ee1a74f8619728bacedbfa6ebf Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 24 Jun 2026 19:58:16 +0000 Subject: [PATCH 4/4] format: preserve line continuations when re-quoting Replaced the _decode_literal/_encode_literal helpers with a single-pass, escape-aware re-quote (_scan_quotes + _requote) so converting between quote styles passes through escaped backslashes, line continuations, and other backslash sequences unchanged, and only re-escapes the target delimiter quote character. The old decode/encode round-trip would mangle a single-quoted line-continuation string into a doubled backslash. Fixed fixture case q to convert a single-quoted line-continuation string to double quotes with the continuation preserved verbatim. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01U4hEZuqiuEFH2zy8wWGwyD --- src/cfengine_cli/format.py | 86 +++++++++++++++++++++----------- tests/format/012_quotes.input.cf | 4 +- 2 files changed, 59 insertions(+), 31 deletions(-) diff --git a/src/cfengine_cli/format.py b/src/cfengine_cli/format.py index 4f5081f..4be89d8 100644 --- a/src/cfengine_cli/format.py +++ b/src/cfengine_cli/format.py @@ -95,38 +95,67 @@ def text(node: Node) -> str: return node.text.decode("utf-8") -def _decode_literal(inner: str) -> str: - """Return the logical character content of a quoted_string's inner text. +def _scan_quotes(inner: str) -> tuple: + """Report which quote characters the inner text's logical content holds. - CFEngine processes the same escapes for all three quote styles: an escaped - backslash, double quote, or single quote is unescaped, and any other - backslash is kept as-is. (A backtick string still cannot contain a literal - backtick, since the delimiter itself can't be escaped.) + Returns (has_double, has_single, has_backtick). An escaped double or single + quote counts as that quote character; an escaped backslash, a line + continuation, and other backslash sequences contribute no quote character. """ - out = [] + has_double = has_single = has_backtick = False i = 0 - while i < len(inner): + n = len(inner) + while i < n: c = inner[i] - if c == "\\" and i + 1 < len(inner) and inner[i + 1] in ("\\", '"', "'"): - out.append(inner[i + 1]) + if c == "\\" and i + 1 < n: + nxt = inner[i + 1] + if nxt == '"': + has_double = True + elif nxt == "'": + has_single = True i += 2 continue - out.append(c) + if c == '"': + has_double = True + elif c == "'": + has_single = True + elif c == "`": + has_backtick = True i += 1 - return "".join(out) + return has_double, has_single, has_backtick -def _encode_literal(content: str, delim: str) -> str: - """Wrap content in delim, escaping as that quote style requires. +def _requote(inner: str, target: str) -> str: + """Re-emit a quoted_string's inner text using ``target`` as the quote char. - Backslashes are always escaped. Double- and single-quoted strings also - escape their own delimiter; a backtick can't be escaped, so the caller must - only choose backticks when the content contains no backtick. + Escape sequences that mean the same thing in every quote style - an escaped + backslash, a line continuation, or any other backslash sequence - pass + through unchanged. A literal double or single quote is escaped only when it + is the target delimiter, otherwise it is emitted bare. """ - escaped = content.replace("\\", "\\\\") - if delim != "`": - escaped = escaped.replace(delim, "\\" + delim) - return delim + escaped + delim + out = [] + i = 0 + n = len(inner) + while i < n: + c = inner[i] + if c == "\\" and i + 1 < n: + nxt = inner[i + 1] + if nxt == '"': + out.append('\\"' if target == '"' else '"') + elif nxt == "'": + out.append("\\'" if target == "'" else "'") + else: + out.append(c + nxt) + i += 2 + continue + if c == '"': + out.append('\\"' if target == '"' else '"') + elif c == "'": + out.append("\\'" if target == "'" else "'") + else: + out.append(c) + i += 1 + return "".join(out) def _normalize_quotes(literal: str) -> str: @@ -143,23 +172,22 @@ def _normalize_quotes(literal: str) -> str: and literal[0] in ("'", '"', "`") ), f"expected a quoted string literal, got {literal!r}" delim = literal[0] - content = _decode_literal(literal[1:-1]) - has_double = '"' in content - has_single = "'" in content + inner = literal[1:-1] + has_double, has_single, has_backtick = _scan_quotes(inner) if not has_double: target = '"' elif not has_single: target = "'" else: target = "`" - if target == "`" and "`" in content: - # A string containing a double quote, a single quote, and a backtick - # can't use any style without escaping, and a backtick can't be - # escaped, so leave the literal as the author wrote it. + if target == "`" and has_backtick: + # Contains a double quote, a single quote, and a backtick; no style can + # hold all three without escaping, and a backtick can't be escaped, so + # leave the literal as the author wrote it. return literal if target == delim: return literal - return _encode_literal(content, target) + return target + _requote(inner, target) + target class Formatter: diff --git a/tests/format/012_quotes.input.cf b/tests/format/012_quotes.input.cf index a4792fb..eed6fae 100644 --- a/tests/format/012_quotes.input.cf +++ b/tests/format/012_quotes.input.cf @@ -20,8 +20,8 @@ vars: "n" string => `e\\f`; "o" string => 'mix "q" it\'s `tick`'; "p" string => 'a\\b "c" it\'s'; -"q" string => "foo\ -bar"; +"q" string => 'foo\ +bar'; reports: 'a single-quoted promiser';