diff --git a/internal-api/src/main/java/datadog/trace/util/SubSequence.java b/internal-api/src/main/java/datadog/trace/util/SubSequence.java index 37e18327d8e..bd911163261 100644 --- a/internal-api/src/main/java/datadog/trace/util/SubSequence.java +++ b/internal-api/src/main/java/datadog/trace/util/SubSequence.java @@ -66,8 +66,10 @@ public int length() { @Override public SubSequence subSequence(int start, int end) { + // start/end are offsets in THIS view's coordinates (CharSequence contract), so the absolute + // end is beginIndex + end -- NOT beginIndex + start + end (which overshoots by `start`). int newBeginIndex = this.beginIndex + start; - int newEndIndex = this.beginIndex + start + end; + int newEndIndex = this.beginIndex + end; return new SubSequence(this.str, newBeginIndex, newEndIndex); } @@ -81,63 +83,147 @@ public void appendTo(StringBuilder builder) { if (beginIndex != endIndex) builder.append(this.str, beginIndex, endIndex); } - /** Returns the hash code as backingStr.substr(beginIndex, endIndex).hashCode() */ + /** + * The same value as {@code toString().hashCode()} -- the {@link String} hash polynomial over this + * window -- but computed directly over the backing characters so hashing a view does not + * materialize a substring. Stays consistent with {@link #equals}: a view, its content-equal + * {@code String}, and an equal-content view all share this hash. + */ @Override public int hashCode() { - return this.toString().hashCode(); + int h = 0; + for (int i = this.beginIndex; i < this.endIndex; ++i) { + h = 31 * h + this.str.charAt(i); + } + return h; } /** - * Also handles String comparisons this.equals(backingStr.substr(beginIndex, endIndex)) is true + * Dispatches on the argument's runtime type: a {@code String} takes the {@link #equals(String)} + * region-compare fast path; any other {@code CharSequence} is compared via {@link + * #contentEquals(CharSequence)}. So {@code this.equals(backingStr.substring(beginIndex, + * endIndex))} is true, and two views with equal content are equal. */ @Override public boolean equals(Object obj) { - if (!(obj instanceof CharSequence)) return false; + if (obj instanceof String) return this.equals((String) obj); + if (obj instanceof CharSequence) return this.contentEquals((CharSequence) obj); - return this.equals((CharSequence) obj); + return false; + } + + /** + * Equivalent to {@code toString().equals(other)} -- compares this window against the whole {@code + * other} String with no substring materialized. Delegates to {@link String#regionMatches} so it + * reuses the JDK's backing-array compare rather than a per-char loop. + */ + public final boolean equals(String other) { + return other != null + && other.length() == this.length() + && this.str.regionMatches(this.beginIndex, other, 0, other.length()); } - public final boolean equals(CharSequence that) { - int thisLen = this.length(); - int thatLen = that.length(); + /** + * Equivalent to {@code toString().contentEquals(that)}: true when {@code that} has the same + * length and characters as this window. The general char-by-char comparison for any {@code + * CharSequence}; prefer {@link #equals(String)} when the argument is known to be a {@code + * String}. + */ + public final boolean contentEquals(CharSequence that) { + if (that == null) return false; - if (thisLen != thatLen) return false; + int len = this.length(); + if (len != that.length()) return false; - for (int i = 0; i < Math.min(this.length(), that.length()); ++i) { + for (int i = 0; i < len; ++i) { if (this.charAt(i) != that.charAt(i)) return false; } return true; } - /** Case-insensitive content comparison; mirrors {@link String#equalsIgnoreCase(String)}. */ - public final boolean equalsIgnoreCase(CharSequence that) { - int len = this.length(); - if (that == null || len != that.length()) return false; + /** + * Case-insensitive counterpart of {@link #equals(String)}. Like {@link + * String#equalsIgnoreCase(String)}, a {@code null} argument is {@code false} rather than an + * error. + */ + public final boolean equalsIgnoreCase(String other) { + return other != null + && other.length() == this.length() + && this.str.regionMatches(true, this.beginIndex, other, 0, other.length()); + } - for (int i = 0; i < len; ++i) { - char a = this.charAt(i); - char b = that.charAt(i); - if (a != b) { - // Same two-way fold String.regionMatches(ignoreCase) uses (handles locale edge cases). - char au = Character.toUpperCase(a); - char bu = Character.toUpperCase(b); - if (au != bu && Character.toLowerCase(au) != Character.toLowerCase(bu)) { - return false; - } - } - } - return true; + /** + * Equivalent to {@code toString().startsWith(prefix)}. The window guard ({@code prefix.length() + * <= length()}) keeps the delegated read inside {@code [beginIndex, endIndex)}. + */ + public final boolean startsWith(String prefix) { + return prefix.length() <= this.length() && this.str.startsWith(prefix, this.beginIndex); } - /** True if this sub-sequence begins with {@code prefix} (content comparison, no allocation). */ - public final boolean startsWith(CharSequence prefix) { - int prefixLen = prefix.length(); - if (prefixLen > this.length()) return false; + /** + * Equivalent to {@code length() > 0 && charAt(0) == c}, the single-character {@link + * #startsWith(String)}. + */ + public final boolean startsWith(char c) { + return this.beginIndex < this.endIndex && this.str.charAt(this.beginIndex) == c; + } - for (int i = 0; i < prefixLen; ++i) { - if (this.charAt(i) != prefix.charAt(i)) return false; - } - return true; + /** + * Equivalent to {@code toString().endsWith(suffix)}. Implemented as a prefix match anchored at + * {@code endIndex - suffix.length()} so the read stays inside this window. + */ + public final boolean endsWith(String suffix) { + int suffixLen = suffix.length(); + return suffixLen <= this.length() && this.str.startsWith(suffix, this.endIndex - suffixLen); + } + + /** + * Equivalent to {@code length() > 0 && charAt(length() - 1) == c}, the single-character {@link + * #endsWith(String)}. + */ + public final boolean endsWith(char c) { + return this.beginIndex < this.endIndex && this.str.charAt(this.endIndex - 1) == c; + } + + /** + * Equivalent to {@code toString().indexOf(needle)}: the offset of the first full occurrence of + * {@code needle} within this window relative to the window start, or {@code -1} if it does not + * occur fully in range. {@link String#indexOf(String, int)} returns the earliest occurrence at or + * after {@code beginIndex}, so a single bound check against {@code endIndex} is exact. + */ + public final int indexOf(String needle) { + int idx = this.str.indexOf(needle, this.beginIndex); + return (idx >= 0 && idx + needle.length() <= this.endIndex) ? idx - this.beginIndex : -1; + } + + /** + * Equivalent to {@code toString().indexOf(c)}: the offset of the first {@code c} within this + * window relative to the window start, or {@code -1} if it does not occur in range. + */ + public final int indexOf(char c) { + int idx = this.str.indexOf(c, this.beginIndex); + return (idx >= 0 && idx < this.endIndex) ? idx - this.beginIndex : -1; + } + + /** + * Equivalent to {@code toString().lastIndexOf(needle)}: the offset of the last full occurrence of + * {@code needle} within this window relative to the window start, or {@code -1} if it does not + * occur fully in range. Searches back from {@code endIndex - needle.length()} -- the latest start + * whose end still fits the window -- so the lower bound is a single check against {@code + * beginIndex}. + */ + public final int lastIndexOf(String needle) { + int idx = this.str.lastIndexOf(needle, this.endIndex - needle.length()); + return (idx >= this.beginIndex) ? idx - this.beginIndex : -1; + } + + /** + * Equivalent to {@code toString().lastIndexOf(c)}: the offset of the last {@code c} within this + * window relative to the window start, or {@code -1} if it does not occur in range. + */ + public final int lastIndexOf(char c) { + int idx = this.str.lastIndexOf(c, this.endIndex - 1); + return (idx >= this.beginIndex) ? idx - this.beginIndex : -1; } @Override diff --git a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java index 86e27039de6..1019a61a537 100644 --- a/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java +++ b/internal-api/src/test/java/datadog/trace/util/SubSequenceTest.java @@ -111,27 +111,136 @@ public void appendToBuilder() { } @Test - public void equalsIgnoreCase() { - SubSequence call = SubSequence.of("xx CALL yy", 3, 7); // "CALL" + public void subSequenceOfView() { + // Instance subSequence(start, end): start/end are in THIS view's coordinates (CharSequence + // contract), regardless of where the view sits in the backing string. + SubSequence view = SubSequence.of("abcdefghij", 2, 8); // "cdefgh" + SubSequence mid = view.subSequence(1, 4); // chars [1, 4) of "cdefgh" -> "def" + assertEquals("def", mid.toString()); + assertEquals(3, mid.beginIndex()); // absolute begin = 2 + 1 + assertEquals(6, mid.endIndex()); // absolute end = 2 + 4 (NOT 2 + 1 + 4) + + // full window and empty are exact + assertEquals("cdefgh", view.subSequence(0, view.length()).toString()); + assertEquals("", view.subSequence(2, 2).toString()); + + // nested: subSequence of a non-zero-start view stays correct (the case the old bug broke worst) + assertEquals("ef", mid.subSequence(1, 3).toString()); // chars [1, 3) of "def" -> "ef" + } + + @Test + public void equalsString() { + // "call" sits at [6, 10) inside the backing string, flanked by other text. + SubSequence call = SubSequence.of("xxxxx call yyyyy", 6, 10); + assertTrue(call.equals("call")); + assertFalse(call.equals("CALL")); // case-sensitive + assertFalse(call.equals("cal")); // shorter + assertFalse(call.equals("calls")); // longer (would overshoot endIndex) + assertFalse(call.equals((Object) null)); + + // equals(Object) routes a String through the region-compare fast path... + assertTrue(call.equals((Object) "call")); + // ...and any other CharSequence (incl. another SubSequence) through contentEquals. + assertTrue(call.equals((Object) new StringBuilder("call"))); + assertTrue(call.equals((Object) SubSequence.of("xxxxx call yyyyy", 6, 10))); + assertFalse(call.equals((Object) Integer.valueOf(4))); + } + + @Test + public void contentEqualsCharSequence() { + SubSequence call = SubSequence.of("xxxxx call yyyyy", 6, 10); // "call" + assertTrue(call.contentEquals("call")); // String is a CharSequence + assertTrue(call.contentEquals(new StringBuilder("call"))); + assertTrue(call.contentEquals(SubSequence.of("a call b", 2, 6))); + assertFalse(call.contentEquals("CALL")); // case-sensitive + assertFalse(call.contentEquals("cal")); // length mismatch + assertFalse(call.contentEquals(null)); + } + + @Test + public void equalsIgnoreCaseString() { + SubSequence call = SubSequence.of("xxxxx CaLl yyyyy", 6, 10); assertTrue(call.equalsIgnoreCase("call")); assertTrue(call.equalsIgnoreCase("CALL")); - assertTrue(call.equalsIgnoreCase("CaLl")); - assertFalse(call.equalsIgnoreCase("calls")); // length differs - assertFalse(call.equalsIgnoreCase("cant")); // same length, content differs + assertFalse(call.equalsIgnoreCase("cal")); + assertFalse(call.equalsIgnoreCase("calls")); + assertFalse(call.equalsIgnoreCase(null)); // matches String.equalsIgnoreCase(null) + } - // case-sensitive equals stays case-sensitive - assertFalse(call.equals("call")); - assertTrue(call.equals("CALL")); + @Test + public void startsWithString() { + SubSequence view = SubSequence.of("xx{call}xx", 2, 8); // "{call}" + assertTrue(view.startsWith("{")); + assertTrue(view.startsWith("{call")); + assertTrue(view.startsWith("{call}")); + assertFalse(view.startsWith("call")); + assertFalse(view.startsWith("{call}x")); // overshoots endIndex even though backing has 'x' + assertTrue(view.startsWith("")); // empty prefix + } + + @Test + public void endsWithString() { + SubSequence view = SubSequence.of("xx{call}xx", 2, 8); // "{call}" + assertTrue(view.endsWith("}")); + assertTrue(view.endsWith("call}")); + assertTrue(view.endsWith("{call}")); + assertFalse(view.endsWith("call")); + assertFalse(view.endsWith("x{call}")); // undershoots beginIndex even though backing has 'x' + assertTrue(view.endsWith("")); // empty suffix + } + + @Test + public void indexOfString() { + SubSequence view = SubSequence.of("aa-bc-bc-aa", 3, 8); // "bc-bc" + assertEquals(0, view.indexOf("bc")); // window-relative offset of the first occurrence + assertEquals(2, view.indexOf("-bc")); // non-zero relative offset + assertEquals(2, view.indexOf("-")); + assertEquals(-1, view.indexOf("aa")); // present in backing string but outside the window + assertEquals(-1, view.indexOf("bc-bc-")); // overshoots endIndex + } + + @Test + public void lastIndexOfString() { + SubSequence view = SubSequence.of("aa-bc-bc-aa", 3, 8); // "bc-bc" + assertEquals(3, view.lastIndexOf("bc")); // last "bc" -> relative 3 + assertEquals(2, view.lastIndexOf("-")); + assertEquals(-1, view.lastIndexOf("aa")); // outside the window on both ends + assertEquals(-1, view.lastIndexOf("bc-bc-")); // overshoots endIndex + } + + @Test + public void startsWithChar() { + SubSequence view = SubSequence.of("xx{call}xx", 2, 8); // "{call}" + assertTrue(view.startsWith('{')); + assertFalse(view.startsWith('c')); // 'c' is at offset 1, not the start + assertFalse(view.startsWith('x')); // backing char before beginIndex, outside the window + assertFalse(SubSequence.EMPTY.startsWith('x')); // empty window + } + + @Test + public void endsWithChar() { + SubSequence view = SubSequence.of("xx{call}xx", 2, 8); // "{call}" + assertTrue(view.endsWith('}')); + assertFalse(view.endsWith('l')); // 'l' is one before the end + assertFalse(view.endsWith('x')); // backing char at endIndex, outside the window + assertFalse(SubSequence.EMPTY.endsWith('x')); // empty window + } + + @Test + public void indexOfChar() { + SubSequence view = SubSequence.of("aa-bc-bc-aa", 3, 8); // "bc-bc" + assertEquals(0, view.indexOf('b')); // window-relative offset of the first occurrence + assertEquals(1, view.indexOf('c')); + assertEquals(2, view.indexOf('-')); + assertEquals(-1, view.indexOf('a')); // present in backing string but outside the window } @Test - public void startsWith() { - SubSequence braceCall = SubSequence.of("xx{call} yy", 2, 7); // "{call" - assertTrue(braceCall.startsWith("")); - assertTrue(braceCall.startsWith("{")); - assertTrue(braceCall.startsWith("{ca")); - assertTrue(braceCall.startsWith("{call")); - assertFalse(braceCall.startsWith("call")); // not the prefix - assertFalse(braceCall.startsWith("{calls and more")); // prefix longer than sequence + public void lastIndexOfChar() { + SubSequence view = SubSequence.of("aa-bc-bc-aa", 3, 8); // "bc-bc" + assertEquals(3, view.lastIndexOf('b')); // last 'b' -> relative 3 + assertEquals(4, view.lastIndexOf('c')); + assertEquals(2, view.lastIndexOf('-')); + assertEquals(-1, view.lastIndexOf('a')); // outside the window on both ends } }