diff --git a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java index 28f9e39c710..d62ad037cf4 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/CoreTracer.java @@ -37,6 +37,7 @@ import datadog.trace.api.EndpointTracker; import datadog.trace.api.IdGenerationStrategy; import datadog.trace.api.InstrumenterConfig; +import datadog.trace.api.KnownTagIds; import datadog.trace.api.Pair; import datadog.trace.api.TagMap; import datadog.trace.api.TraceConfig; @@ -653,6 +654,14 @@ private CoreTracer( // preload this enum to avoid triggering classloading on the hot path TraceCollector.PublishState.values(); + // Dense known-tag store (experimental, OFF by default): registering the KnownTags resolver + // flips the dense store live so known tags store without a per-tag Entry. Gated by a system + // property for A/B benchmarking; when off, keyOf stays a no-op and tag storage is byte-identical + // to today. Promote to a Config flag if this becomes a permanent rollout. + if (Boolean.getBoolean("dd.trace.dense.tags.enabled")) { + KnownTagIds.init(); + } + if (reportInTracerFlare) { TracerFlare.addReporter(this); } @@ -2195,13 +2204,27 @@ protected static final DDSpanContext buildSpanContext( // By setting the tags on the context we apply decorators to any tags that have been set via // the builder. This is the order that the tags were added previously, but maybe the `tags` // set in the builder should come last, so that they override other tags. - context.setAllTags(mergedTracerTags, mergedTracerTagsNeedsIntercept); + // + // mergedTracerTags is trace-level shared state and the precedence floor (everything below + // overrides it). When it carries no interceptable tags, attach it as a read-through PARENT + // (shared by reference, no per-span copy) instead of copying its entries into the span. When + // it does need interception, fall back to copying (the interceptor's per-span side-effects + // can't be shared by reference). + if (mergedTracerTagsNeedsIntercept) { + context.setAllTags(mergedTracerTags, true); + } else { + context.parentTags(mergedTracerTags); + } context.setAllTags(tagLedger); context.setAllTags(coreTags, coreTagsNeedsIntercept); context.setAllTags(rootSpanTags, rootSpanTagsNeedsIntercept); context.setAllTags(contextualTags); - // remove version here since will be done later on the postProcessor. - // it will allow knowing if it will be set manually or not + // Version is added later by the postProcessor (InternalTagsAdder), only if not already set + // during the request. Config version is kept out of the trace-level bundle (see + // withTracerTags), so this removal now only wipes a version set via the span builder — + // keeping + // the existing semantics where a builder-set version is replaced by the config version. Under + // read-through this is a cheap local removal (version isn't in the parent, so no tombstone). context.removeTag(Tags.VERSION); return context; } @@ -2432,6 +2455,13 @@ static TagMap withTracerTags( Map userSpanTags, Config config, TraceConfig traceConfig) { final TagMap result = TagMap.create(userSpanTags.size() + 5); result.putAll(userSpanTags); + // Version is conditionally managed by InternalTagsAdder (added only when service == DD_SERVICE + // and not set during the request), so keep it OUT of the trace-level bundle. This matters under + // read-through: the bundle becomes a shared parent, and a per-span removeTag(VERSION) on a key + // that lived in the parent would mint a per-span tombstone. With version excluded here, the + // per-span removeTag (retained, to wipe a builder-set version) is a cheap local op, never a + // tombstone. Behavior is unchanged: version was applied-then-removed at build today. + result.remove(Tags.VERSION); if (null != config) { // static if (!config.getEnv().isEmpty()) { result.set("env", config.getEnv()); diff --git a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java index 520311a20c1..e9822b9c155 100644 --- a/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java +++ b/dd-trace-core/src/main/java/datadog/trace/core/DDSpanContext.java @@ -1045,6 +1045,21 @@ void setAllTags(final TagMap map, boolean needsIntercept) { } } + /** + * Attaches {@code parent} as a read-through parent of this span's tags instead of copying its + * entries in (level-split phase 1). The parent must be frozen and free of interceptable tags — + * the caller gates on {@code !needsIntercept}, since read-through bypasses the per-span + * interceptor side-effects that {@link #setAllTags(TagMap, boolean)} applies. + */ + void parentTags(final TagMap parent) { + if (parent == null || parent.isEmpty()) { + return; + } + synchronized (unsafeTags) { + unsafeTags.withParent(parent); + } + } + void setAllTags(final TagMap.Ledger ledger) { if (ledger == null) { return; diff --git a/internal-api/build.gradle.kts b/internal-api/build.gradle.kts index 6bce24ae27c..45b1ef6faa5 100644 --- a/internal-api/build.gradle.kts +++ b/internal-api/build.gradle.kts @@ -270,6 +270,7 @@ dependencies { api("com.datadoghq:dd-javac-plugin-client:0.2.2") testImplementation("org.snakeyaml:snakeyaml-engine:2.9") + testImplementation("org.openjdk.jol:jol-core:0.17") // StringIndexFootprintTest object-layout measurement testImplementation(project(":utils:test-utils")) testImplementation(libs.bundles.junit5) testImplementation("org.junit.vintage:junit-vintage-engine:${libs.versions.junit5.get()}") diff --git a/internal-api/src/jmh/java/datadog/trace/api/DenseStoreAllocBenchmark.java b/internal-api/src/jmh/java/datadog/trace/api/DenseStoreAllocBenchmark.java new file mode 100644 index 00000000000..df0f6f3e1dd --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/api/DenseStoreAllocBenchmark.java @@ -0,0 +1,122 @@ +package datadog.trace.api; + +import datadog.trace.bootstrap.instrumentation.api.Tags; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +/** + * Deterministic allocation A/B for the dense known-tag store, using the REAL {@link KnownTagIds} + * resolver (a {@code StringIndex} probe + a constant-returning {@code switch} — allocation-free, + * exactly like production). An earlier synthetic prefix resolver allocated in {@code keyOf} + * (substring) and {@code nameOf} (concat), contaminating the dense arm; this measures the store, + * not the resolver. + * + *

Models how a real span's tags route: {@code today} = all custom (what ships now — every tag + * buckets, since nothing is registered as known), {@code dense} = the same tag count with a + * realistic fraction routed to the dense store (real known tag names) and the rest custom. Run with + * {@code -prof gc}; the {@code gc.alloc.rate.norm} (B/op) delta at the same {@code tagCount} is + * what enabling the dense store does to a real span's per-build allocation. + */ +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Warmup(iterations = 2, time = 2) +@Measurement(iterations = 3, time = 2) +@Fork(1) +@Threads(1) +public class DenseStoreAllocBenchmark { + + // Real stored (dense-routed) tag names — a realistic web/db span's known set. + static final String[] KNOWN = + new String[] { + DDTags.BASE_SERVICE, + Tags.VERSION, + Tags.COMPONENT, + Tags.SPAN_KIND, + Tags.HTTP_METHOD, + Tags.HTTP_ROUTE, + Tags.DB_TYPE, + Tags.DB_INSTANCE, + Tags.PEER_HOSTNAME, + Tags.DB_USER, + DDTags.LANGUAGE_TAG_KEY, + Tags.PEER_PORT, + }; + + // today = all custom (all bucket, what ships now); dense = ~70% known + custom (a real span); + // allKnown = 100% known (the trace-tier read-through parent's shape — exercises lazy buckets). + @Param({"today", "dense", "allKnown"}) + String scenario; + + @Param({"7", "12"}) + int tagCount; + + private String[] keys; + private String[] values; + + @Setup(Level.Trial) + public void setup() { + KnownTagIds.init(); // registers the real (allocation-free) resolver + int knownCount; + if ("allKnown".equals(scenario)) { + knownCount = tagCount; // 100% known (<= KNOWN.length) + } else if ("dense".equals(scenario)) { + knownCount = (tagCount * 7) / 10; // ~70% known + custom + } else { + knownCount = 0; // today: all custom (all bucket) + } + this.keys = new String[tagCount]; + this.values = new String[tagCount]; + for (int i = 0; i < tagCount; i++) { + this.keys[i] = i < knownCount ? KNOWN[i] : "custom.tag." + i; + this.values[i] = "value-" + i; + } + } + + @Benchmark + public TagMap buildMap() { + TagMap m = TagMap.create(16); + for (int i = 0; i < tagCount; i++) { + m.set(keys[i], values[i]); + } + return m; + } + + @Benchmark + public void buildAndSerialize(Blackhole bh) { + TagMap m = TagMap.create(16); + for (int i = 0; i < tagCount; i++) { + m.set(keys[i], values[i]); + } + // forEach: the alloc-free flyweight emit for dense + m.forEach(reader -> bh.consume(reader.objectValue())); + bh.consume(m); + } + + @Benchmark + public void buildAndSerializeViaIterator(Blackhole bh) { + TagMap m = TagMap.create(16); + for (int i = 0; i < tagCount; i++) { + m.set(keys[i], values[i]); + } + // models the REAL serializer's count pre-pass (TraceMapperV0_4:95): the EntryReader iterator + // materializes an Entry per dense tag -> should erase the dense alloc win. + for (TagMap.EntryReader reader : m) { + bh.consume(reader.objectValue()); + } + bh.consume(m); + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/StringIndexSwitchBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/StringIndexSwitchBenchmark.java new file mode 100644 index 00000000000..916cfe70059 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/StringIndexSwitchBenchmark.java @@ -0,0 +1,299 @@ +package datadog.trace.util; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.CompilerControl; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * The third {@link StringIndex} use case: replacing a {@code switch} over interned {@code String} + * literals that maps a key to a small {@code int} id (exactly what {@code TagInterceptor} does to + * decide whether/how to intercept a tag). Both forms resolve a key to an id, 0 == "not found". + * + *

Compared: + * + *

+ * + *

What this measures: two axes. A prior investigation found the {@code TagInterceptor} + * switch wasn't being inlined / specialized into its hot caller. So each form is measured across + * (a) inlining — {@code _inlined} vs {@code _noinline} (a real call, {@code TagInterceptor}'s + * actual regime) via {@link CompilerControl} — and (b) key shape — a constant key vs a runtime, + * varied key. The results (below) land the teaching point: the dominant axis is + * key-constancy, not inlining. At steady state the inline-vs-not gap is small for both + * forms; what sinks the switch is a runtime, varied key (it can't specialize), while the + * StringIndex {@code Support} path stays flat across both axes — so the win is largest exactly + * where {@code TagInterceptor} lives. + * + *

The {@code _inlined} and {@code _noinline} helpers carry duplicate bodies on purpose: that's + * the only way to pin each form's inlining decision independently. + * + *

{@code @Threads(8)}; read-only, so no store dilutes the signal. Hit keys are the interned + * literals (the {@code ==} fast path StringIndex and the switch both get); misses are distinct and + * never present. Run via {@code -Pjmh.includes=StringIndexSwitchBenchmark} (add {@code -prof gc} — + * should be ~0 B/op both ways; this proves throughput, not allocation). + * + *

JDK 17 results (Apple M1, quiet machine, {@code @Fork(5)}, {@code @Threads(8)}; M ops/s, + * ±1–5%): + * + *

{@code
+ * key             switch (inl / noinl)   stringIndex (inl / noinl)
+ * const            2778 / 2769            2047 / 2035
+ * hit  (runtime)   1161 / 1166            2147 / 2152
+ * miss             2083 / 2050            2546 / 2539
+ * }
+ * + *

Two takeaways: + * + *

+ * + *

So the {@code const} arm is the control: it exposes the switch's "fast" as a single-key + * specialization artifact — drop the constant and the switch is ~half StringIndex's throughput. + */ +@Fork(5) // matches the documented @Fork(5) numbers; the switch's const-key arm is profile-bimodal +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@Threads(8) +@State(Scope.Benchmark) +public class StringIndexSwitchBenchmark { + static final String[] KEYS = { + "alpha", "bravo", "charlie", "delta", "echo", "foxtrot", "golf", "hotel", + "india", "juliet", "kilo", "lima", "mike", "november", "oscar", "papa" + }; + + // A compile-time-constant hit key. javac inlines it, so the JIT can constant-propagate it into an + // inlined switch and fold the whole switch away -- the switch's theoretical ceiling. The const_* + // arms pair this with INLINE vs DONT_INLINE to show that ceiling only materializes when the call + // ALSO inlines: across a DONT_INLINE boundary the constant can't propagate in, so the switch runs + // in full. TagInterceptor's real regime is a runtime tag through a non-inlined call -- neither + // holds -- which is why StringIndex wins where it counts. + static final String CONST_KEY = "mike"; + + /** Distinct String instances that are never present, for the miss path. */ + static final String[] MISSES = newMisses(); + + static String[] newMisses() { + String[] misses = new String[KEYS.length * 2]; + for (int i = 0; i < misses.length; ++i) { + misses[i] = "dne-" + i; + } + return misses; + } + + // StringIndex placed arrays + slot-aligned ids, pulled into static final fields so the JIT folds + // the refs to constants (the hot path StringIndex recommends). IDS[slot] is the 1-based id; + // empty slots stay 0, which doubles as the "not found" sentinel. + static final int[] HASHES; + static final String[] NAMES; + static final int[] IDS; + + static { + StringIndex.Data data = StringIndex.Support.create(KEYS); + HASHES = data.hashes; + NAMES = data.names; + IDS = new int[HASHES.length]; + for (int i = 0; i < KEYS.length; ++i) { + IDS[StringIndex.Support.indexOf(HASHES, NAMES, KEYS[i])] = i + 1; // 1-based; 0 = not found + } + } + + /** Per-thread cursors so threads don't contend on a shared index under {@code @Threads(8)}. */ + @State(Scope.Thread) + public static class Cursor { + int hit = 0; + int miss = 0; + + String nextHit() { + int i = hit + 1; + if (i >= KEYS.length) { + i = 0; + } + hit = i; + return KEYS[i]; + } + + String nextMiss() { + int i = miss + 1; + if (i >= MISSES.length) { + i = 0; + } + miss = i; + return MISSES[i]; + } + } + + @CompilerControl(CompilerControl.Mode.INLINE) + static int switchInline(String key) { + switch (key) { + case "alpha": + return 1; + case "bravo": + return 2; + case "charlie": + return 3; + case "delta": + return 4; + case "echo": + return 5; + case "foxtrot": + return 6; + case "golf": + return 7; + case "hotel": + return 8; + case "india": + return 9; + case "juliet": + return 10; + case "kilo": + return 11; + case "lima": + return 12; + case "mike": + return 13; + case "november": + return 14; + case "oscar": + return 15; + case "papa": + return 16; + default: + return 0; + } + } + + // Duplicate body, pinned non-inlinable -- TagInterceptor's actual call regime. + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + static int switchNoInline(String key) { + switch (key) { + case "alpha": + return 1; + case "bravo": + return 2; + case "charlie": + return 3; + case "delta": + return 4; + case "echo": + return 5; + case "foxtrot": + return 6; + case "golf": + return 7; + case "hotel": + return 8; + case "india": + return 9; + case "juliet": + return 10; + case "kilo": + return 11; + case "lima": + return 12; + case "mike": + return 13; + case "november": + return 14; + case "oscar": + return 15; + case "papa": + return 16; + default: + return 0; + } + } + + @CompilerControl(CompilerControl.Mode.INLINE) + static int indexInline(String key) { + int slot = StringIndex.Support.indexOf(HASHES, NAMES, key); + return slot >= 0 ? IDS[slot] : 0; + } + + @CompilerControl(CompilerControl.Mode.DONT_INLINE) + static int indexNoInline(String key) { + int slot = StringIndex.Support.indexOf(HASHES, NAMES, key); + return slot >= 0 ? IDS[slot] : 0; + } + + @Benchmark + public int switch_hit_inlined(Cursor cursor) { + return switchInline(cursor.nextHit()); + } + + @Benchmark + public int switch_miss_inlined(Cursor cursor) { + return switchInline(cursor.nextMiss()); + } + + @Benchmark + public int switch_hit_noinline(Cursor cursor) { + return switchNoInline(cursor.nextHit()); + } + + @Benchmark + public int switch_miss_noinline(Cursor cursor) { + return switchNoInline(cursor.nextMiss()); + } + + @Benchmark + public int stringIndex_hit_inlined(Cursor cursor) { + return indexInline(cursor.nextHit()); + } + + @Benchmark + public int stringIndex_miss_inlined(Cursor cursor) { + return indexInline(cursor.nextMiss()); + } + + @Benchmark + public int stringIndex_hit_noinline(Cursor cursor) { + return indexNoInline(cursor.nextHit()); + } + + @Benchmark + public int stringIndex_miss_noinline(Cursor cursor) { + return indexNoInline(cursor.nextMiss()); + } + + // --- constant key: the switch's best case (const-propagated). Inlined -> folds away; not-inlined + // -> the constant can't cross the boundary, so the switch runs in full. --- + + @Benchmark + public int switch_const_inlined() { + return switchInline(CONST_KEY); + } + + @Benchmark + public int switch_const_noinline() { + return switchNoInline(CONST_KEY); + } + + @Benchmark + public int stringIndex_const_inlined() { + return indexInline(CONST_KEY); + } + + @Benchmark + public int stringIndex_const_noinline() { + return indexNoInline(CONST_KEY); + } +} diff --git a/internal-api/src/jmh/java/datadog/trace/util/TagMapReadThroughBenchmark.java b/internal-api/src/jmh/java/datadog/trace/util/TagMapReadThroughBenchmark.java new file mode 100644 index 00000000000..4181e99f146 --- /dev/null +++ b/internal-api/src/jmh/java/datadog/trace/util/TagMapReadThroughBenchmark.java @@ -0,0 +1,84 @@ +package datadog.trace.util; + +import datadog.trace.api.TagMap; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; + +/** + * Models span-build tag assembly with vs without read-through of the shared trace-level bundle. + * + *

+ * + *

Run with {@code -prof gc}; the B/op delta is the per-span allocation read-through saves. Both + * arms set the same span tags, so the delta isolates the trace-bundle handling. {@code + * traceTagCount} sweeps the bundle size — the win scales with it (more trace tags → more cloned + * BucketGroups and local collisions avoided). {@code traceTagCount = 7} ≈ a realistic + * mergedTracerTags (env, version, language, runtime-id, a propagation tag, a couple global tags). + */ +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput) +@OutputTimeUnit(TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 2) +@Measurement(iterations = 5, time = 2) +@Fork(3) +@Threads(8) +public class TagMapReadThroughBenchmark { + + @Param({"3", "7", "15"}) + int traceTagCount; + + private TagMap traceTags; + + @Setup(Level.Trial) + public void setup() { + TagMap m = TagMap.create(Math.max(16, traceTagCount * 2)); + for (int i = 0; i < traceTagCount; i++) { + m.set("_dd.trace.tag." + i, "trace-value-" + i); + } + this.traceTags = m.freeze(); + } + + @Benchmark + public TagMap copyDown() { + TagMap m = TagMap.create(16); + m.putAll(traceTags); // putAll-into-empty: shares frozen entries, clones BucketGroups + setSpanTags(m); + return m; + } + + @Benchmark + public TagMap readThrough() { + TagMap m = TagMap.create(16); + m.withParent(traceTags); // no copy; trace tags read through the shared frozen parent + setSpanTags(m); + return m; + } + + private static void setSpanTags(TagMap m) { + m.set("http.method", "GET"); + m.set("http.url", "/api/checkout/cart"); + m.set("component", "spring-web-controller"); + m.set("span.kind", "server"); + m.set("http.status_code", 200); + } +} diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java new file mode 100644 index 00000000000..3b49007f736 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/api/KnownTagIds.java @@ -0,0 +1,318 @@ +package datadog.trace.api; + +import datadog.trace.bootstrap.instrumentation.api.Tags; +import datadog.trace.util.StringIndex; + +/** + * Hand-assigned tag-id constants for well-known tags, plus the {@link KnownTags.Resolver} that + * resolves them. This is the single registry shared by the tracer core and by instrumentation + * (decorators) — it lives in {@code internal-api} so both layers can reference the ids; the + * eventual code generator will replace the hand assignment here. + * + *

Reserved serials {@code [1, KnownTags.FIRST_STORED_SERIAL)} name "virtual" tags handled by the + * tag interceptor / span fields and are NOT stored in the {@code TagMap}; their {@code fieldPos} is + * the {@link KnownTags#NO_SLOT} sentinel that is out of slot range, so any incidental store routes + * to the hash buckets rather than a positional slot. Serials {@code >= FIRST_STORED_SERIAL} name + * stored tags that slot/bucket normally (or, with {@code NO_SLOT}, are stored bucket-only). + * + *

The resolver registers on class initialization, so simply referencing any constant here makes + * tag-id resolution live before the first span is built. + * + *

Slice-1 note (keyOf substrate): the {@code fieldPos} assignments below (and {@link + * #SLOT_COUNT}) describe a single universal positional layout (slots 0..25). That layout is + * currently dormant — no dense store consumes {@code fieldPos} yet — and is provisional: the + * dense-store slice replaces the universal layout with per-role / per-type sizing (see the + * over-provision finding in {@code dense-tagmap-design.md}). {@code keyOf}/{@code nameOf} depend + * only on {@code globalSerial} + name, not {@code fieldPos}, so the ids themselves are stable + * across any layout scheme. + */ +public final class KnownTagIds { + // slot count = (max stored fieldPos) + 1. Stored tags use fieldPos 0..25. PROVISIONAL universal + // layout — see the slice-1 note above; the dense-store slice supersedes this with role/type + // sizing. + static final int SLOT_COUNT = 26; + + // ---- reserved / virtual (tag-interceptor handled, not stored) ---- + // Reserved tags are always intercepted -> set the INTERCEPTED flag. + public static final int ERROR_SERIAL = 1; + public static final long ERROR = KnownTags.intercepted(KnownTags.tagId(ERROR_SERIAL, Tags.ERROR)); + + // ---- stored (slotted / bucketed) ---- + public static final int PARENT_ID_SERIAL = KnownTags.FIRST_STORED_SERIAL; + public static final long PARENT_ID = KnownTags.tagId(PARENT_ID_SERIAL, 0, DDTags.PARENT_ID); + + // common (process-constant) tags added by InternalTagsAdder to ~every span + public static final int BASE_SERVICE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 1; + public static final long BASE_SERVICE = + KnownTags.tagId(BASE_SERVICE_SERIAL, 1, DDTags.BASE_SERVICE); + + public static final int VERSION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 2; + public static final long VERSION = KnownTags.tagId(VERSION_SERIAL, 2, Tags.VERSION); + + // build-time-known constant tags merged into defaultSpanTags (see CoreTracer.withTracerTags). + // "env" is a base-mixin tag; the *_ENABLED flags are product-mixin tags. Hand-assigned for now. + public static final String ENV = "env"; + public static final int ENV_SERIAL = KnownTags.FIRST_STORED_SERIAL + 3; + public static final long ENV_ID = KnownTags.tagId(ENV_SERIAL, 3, ENV); + + public static final int DJM_ENABLED_SERIAL = KnownTags.FIRST_STORED_SERIAL + 4; + public static final long DJM_ENABLED = KnownTags.tagId(DJM_ENABLED_SERIAL, 4, DDTags.DJM_ENABLED); + + public static final int DSM_ENABLED_SERIAL = KnownTags.FIRST_STORED_SERIAL + 5; + public static final long DSM_ENABLED = KnownTags.tagId(DSM_ENABLED_SERIAL, 5, DDTags.DSM_ENABLED); + + // common tags added by the tag post-processors (RemoteHostnameAdder / IntegrationAdder / + // ServiceNameSourceAdder). Not intercepted; stored. + public static final int TRACER_HOST_SERIAL = KnownTags.FIRST_STORED_SERIAL + 6; + public static final long TRACER_HOST_ID = + KnownTags.tagId(TRACER_HOST_SERIAL, 6, DDTags.TRACER_HOST); + + public static final int INTEGRATION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 7; + public static final long INTEGRATION_ID = + KnownTags.tagId(INTEGRATION_SERIAL, 7, DDTags.DD_INTEGRATION); + + public static final int SVC_SRC_SERIAL = KnownTags.FIRST_STORED_SERIAL + 8; + public static final long SVC_SRC_ID = KnownTags.tagId(SVC_SRC_SERIAL, 8, DDTags.DD_SVC_SRC); + + // peer.service tags, read/written by PeerServiceCalculator (post-processor; uses Map put/get that + // bypass the interceptor). peer.service is intercepted on the set-path but STORED, so it slots. + public static final int PEER_SERVICE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 9; + public static final long PEER_SERVICE = + KnownTags.intercepted(KnownTags.tagId(PEER_SERVICE_SERIAL, 9, Tags.PEER_SERVICE)); + + public static final int PEER_SERVICE_REMAPPED_FROM_SERIAL = KnownTags.FIRST_STORED_SERIAL + 10; + public static final long PEER_SERVICE_REMAPPED_FROM = + KnownTags.tagId(PEER_SERVICE_REMAPPED_FROM_SERIAL, 10, DDTags.PEER_SERVICE_REMAPPED_FROM); + + // HTTP tags read by HttpEndpointPostProcessor. http.method/http.url are intercepted-but-stored + // (interceptTag side-effects then returns false → stored); http.route is not intercepted. All + // stored, so the string set-path slots them via keyOf and the id reads here find them. + public static final int HTTP_METHOD_SERIAL = KnownTags.FIRST_STORED_SERIAL + 11; + public static final long HTTP_METHOD = + KnownTags.intercepted(KnownTags.tagId(HTTP_METHOD_SERIAL, 11, Tags.HTTP_METHOD)); + + public static final int HTTP_ROUTE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 12; + public static final long HTTP_ROUTE = KnownTags.tagId(HTTP_ROUTE_SERIAL, 12, Tags.HTTP_ROUTE); + + public static final int HTTP_URL_SERIAL = KnownTags.FIRST_STORED_SERIAL + 13; + public static final long HTTP_URL = + KnownTags.intercepted(KnownTags.tagId(HTTP_URL_SERIAL, 13, Tags.HTTP_URL)); + + // peer connection tags set by BaseDecorator.onPeerConnection on ~every client/producer span. + // Not intercepted; stored. Slotted (common across client instrumentations). + public static final int PEER_HOSTNAME_SERIAL = KnownTags.FIRST_STORED_SERIAL + 14; + public static final long PEER_HOSTNAME = + KnownTags.tagId(PEER_HOSTNAME_SERIAL, 14, Tags.PEER_HOSTNAME); + + public static final int PEER_HOST_IPV4_SERIAL = KnownTags.FIRST_STORED_SERIAL + 15; + public static final long PEER_HOST_IPV4 = + KnownTags.tagId(PEER_HOST_IPV4_SERIAL, 15, Tags.PEER_HOST_IPV4); + + public static final int PEER_HOST_IPV6_SERIAL = KnownTags.FIRST_STORED_SERIAL + 16; + public static final long PEER_HOST_IPV6 = + KnownTags.tagId(PEER_HOST_IPV6_SERIAL, 16, Tags.PEER_HOST_IPV6); + + public static final int PEER_PORT_SERIAL = KnownTags.FIRST_STORED_SERIAL + 17; + public static final long PEER_PORT = KnownTags.tagId(PEER_PORT_SERIAL, 17, Tags.PEER_PORT); + + // Universal decorator tags — set on ~every span (component/span.kind via Base/Server/Client + // decorators, language via ServerDecorator). span.kind is intercepted (setSpanKindOrdinal). + public static final int COMPONENT_SERIAL = KnownTags.FIRST_STORED_SERIAL + 18; + public static final long COMPONENT = KnownTags.tagId(COMPONENT_SERIAL, 18, Tags.COMPONENT); + + public static final int SPAN_KIND_SERIAL = KnownTags.FIRST_STORED_SERIAL + 19; + public static final long SPAN_KIND = + KnownTags.intercepted(KnownTags.tagId(SPAN_KIND_SERIAL, 19, Tags.SPAN_KIND)); + + public static final int LANGUAGE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 20; + public static final long LANGUAGE = KnownTags.tagId(LANGUAGE_SERIAL, 20, DDTags.LANGUAGE_TAG_KEY); + + // JDBC / database-client tags — set on every db span (58% of petclinic spans). Not intercepted + // (only db.statement is, and that's handled separately). + public static final int DB_TYPE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 21; + public static final long DB_TYPE = KnownTags.tagId(DB_TYPE_SERIAL, 21, Tags.DB_TYPE); + + public static final int DB_INSTANCE_SERIAL = KnownTags.FIRST_STORED_SERIAL + 22; + public static final long DB_INSTANCE = KnownTags.tagId(DB_INSTANCE_SERIAL, 22, Tags.DB_INSTANCE); + + public static final int DB_USER_SERIAL = KnownTags.FIRST_STORED_SERIAL + 23; + public static final long DB_USER = KnownTags.tagId(DB_USER_SERIAL, 23, Tags.DB_USER); + + public static final int DB_OPERATION_SERIAL = KnownTags.FIRST_STORED_SERIAL + 24; + public static final long DB_OPERATION = + KnownTags.tagId(DB_OPERATION_SERIAL, 24, Tags.DB_OPERATION); + + public static final int DB_POOL_NAME_SERIAL = KnownTags.FIRST_STORED_SERIAL + 25; + public static final long DB_POOL_NAME = + KnownTags.tagId(DB_POOL_NAME_SERIAL, 25, Tags.DB_POOL_NAME); + + // Open-addressed name -> id table backing keyOf (data, not a switch): scales flat as the known + // set grows, where a generated switch eventually falls off the inline threshold. KEYOF_NAMES and + // KEYOF_VALUES are parallel; the table places names by hash and a parallel ids[] by slot. + private static final String[] KEYOF_NAMES = { + Tags.ERROR, + DDTags.PARENT_ID, + DDTags.BASE_SERVICE, + Tags.VERSION, + ENV, + DDTags.DJM_ENABLED, + DDTags.DSM_ENABLED, + DDTags.TRACER_HOST, + DDTags.DD_INTEGRATION, + DDTags.DD_SVC_SRC, + Tags.PEER_SERVICE, + DDTags.PEER_SERVICE_REMAPPED_FROM, + Tags.HTTP_METHOD, + Tags.HTTP_ROUTE, + Tags.HTTP_URL, + Tags.PEER_HOSTNAME, + Tags.PEER_HOST_IPV4, + Tags.PEER_HOST_IPV6, + Tags.PEER_PORT, + Tags.COMPONENT, + Tags.SPAN_KIND, + DDTags.LANGUAGE_TAG_KEY, + Tags.DB_TYPE, + Tags.DB_INSTANCE, + Tags.DB_USER, + Tags.DB_OPERATION, + Tags.DB_POOL_NAME, + }; + + private static final long[] KEYOF_VALUES = { + ERROR, + PARENT_ID, + BASE_SERVICE, + VERSION, + ENV_ID, + DJM_ENABLED, + DSM_ENABLED, + TRACER_HOST_ID, + INTEGRATION_ID, + SVC_SRC_ID, + PEER_SERVICE, + PEER_SERVICE_REMAPPED_FROM, + HTTP_METHOD, + HTTP_ROUTE, + HTTP_URL, + PEER_HOSTNAME, + PEER_HOST_IPV4, + PEER_HOST_IPV6, + PEER_PORT, + COMPONENT, + SPAN_KIND, + LANGUAGE, + DB_TYPE, + DB_INSTANCE, + DB_USER, + DB_OPERATION, + DB_POOL_NAME, + }; + + // Static-final raw arrays placed by StringIndex.Support: the JIT folds these refs to constants on + // the keyOf hot path (the fastest of StringIndex's three usage modes — no instance dereference). + private static final int[] KEYOF_HASHES; + private static final String[] KEYOF_KEYS; + private static final long[] KEYOF_IDS; + + static { + StringIndex.Data data = StringIndex.Support.create(KEYOF_NAMES); + long[] ids = new long[data.names.length]; + for (int j = 0; j < KEYOF_NAMES.length; j++) { + ids[StringIndex.Support.indexOf(data.hashes, data.names, KEYOF_NAMES[j])] = KEYOF_VALUES[j]; + } + KEYOF_HASHES = data.hashes; + KEYOF_KEYS = data.names; + KEYOF_IDS = ids; + } + + static final KnownTags.Resolver RESOLVER = + new KnownTags.Resolver() { + @Override + public String nameOf(long tagId) { + switch (KnownTags.globalSerial(tagId)) { + case ERROR_SERIAL: + return Tags.ERROR; + case PARENT_ID_SERIAL: + return DDTags.PARENT_ID; + case BASE_SERVICE_SERIAL: + return DDTags.BASE_SERVICE; + case VERSION_SERIAL: + return Tags.VERSION; + case ENV_SERIAL: + return ENV; + case DJM_ENABLED_SERIAL: + return DDTags.DJM_ENABLED; + case DSM_ENABLED_SERIAL: + return DDTags.DSM_ENABLED; + case TRACER_HOST_SERIAL: + return DDTags.TRACER_HOST; + case INTEGRATION_SERIAL: + return DDTags.DD_INTEGRATION; + case SVC_SRC_SERIAL: + return DDTags.DD_SVC_SRC; + case PEER_SERVICE_SERIAL: + return Tags.PEER_SERVICE; + case PEER_SERVICE_REMAPPED_FROM_SERIAL: + return DDTags.PEER_SERVICE_REMAPPED_FROM; + case HTTP_METHOD_SERIAL: + return Tags.HTTP_METHOD; + case HTTP_ROUTE_SERIAL: + return Tags.HTTP_ROUTE; + case HTTP_URL_SERIAL: + return Tags.HTTP_URL; + case PEER_HOSTNAME_SERIAL: + return Tags.PEER_HOSTNAME; + case PEER_HOST_IPV4_SERIAL: + return Tags.PEER_HOST_IPV4; + case PEER_HOST_IPV6_SERIAL: + return Tags.PEER_HOST_IPV6; + case PEER_PORT_SERIAL: + return Tags.PEER_PORT; + case COMPONENT_SERIAL: + return Tags.COMPONENT; + case SPAN_KIND_SERIAL: + return Tags.SPAN_KIND; + case LANGUAGE_SERIAL: + return DDTags.LANGUAGE_TAG_KEY; + case DB_TYPE_SERIAL: + return Tags.DB_TYPE; + case DB_INSTANCE_SERIAL: + return Tags.DB_INSTANCE; + case DB_USER_SERIAL: + return Tags.DB_USER; + case DB_OPERATION_SERIAL: + return Tags.DB_OPERATION; + case DB_POOL_NAME_SERIAL: + return Tags.DB_POOL_NAME; + default: + return null; + } + } + + @Override + public int slotCount() { + return SLOT_COUNT; + } + + @Override + public long keyOf(String name) { + int slot = StringIndex.Support.indexOf(KEYOF_HASHES, KEYOF_KEYS, name); + return slot < 0 ? 0L : KEYOF_IDS[slot]; + } + }; + + static { + KnownTags.register(RESOLVER); + } + + /** + * Forces resolver registration. Merely invoking this static method runs {@code } (which + * registers {@link #RESOLVER}), so calling it once at tracer init flips the dense store live; + * idempotent. Until something references this class the registry stays dormant and {@code keyOf} + * returns 0, so tag storage is byte-identical to the bucket-only behavior. + */ + public static void init() {} + + private KnownTagIds() {} +} diff --git a/internal-api/src/main/java/datadog/trace/api/KnownTags.java b/internal-api/src/main/java/datadog/trace/api/KnownTags.java new file mode 100644 index 00000000000..41167077b87 --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/api/KnownTags.java @@ -0,0 +1,167 @@ +package datadog.trace.api; + +/** + * Registry for generated tag ID ↔ name resolution. The code generator populates this at tracer init + * via {@link #register(Resolver)}. Once registered, HotSpot CHA devirtualizes and inlines the + * resolver's switch, making {@link #nameOf}/{@link #keyOf} effectively zero-overhead. + */ +public final class KnownTags { + // Plain (non-volatile) fast-path flag: false until a resolver is ever registered. A plain read is + // free and hoistable, unlike a volatile read of `resolver` (costly on weak memory models such as + // ARM). A stale `false` is benign — callers treat the tag as unknown and use the hash buckets, + // which is correct, just unoptimized; the next read after publication takes the slot path. + private static boolean active; + + private static volatile Resolver resolver; + + /** Fast-path gate: true once a resolver has been registered. */ + public static boolean isActive() { + return active; + } + + /* + * tagId bit layout: [63 intercepted] [62-48 globalSerial (15 bits)] [47-32 fieldPos] + * [31-0 nameHash]. Bit 63 (the sign bit) marks a tag the tag interceptor must see, so the check + * is a single {@code tagId < 0}. globalSerial is globally unique per known tag; fieldPos is its + * slot in the global positional layout (TagMap.knownEntries index); nameHash is + * TagMap.Entry#_hash(name) and is layout-independent. Unknown (string-only) tags have the upper + * 32 bits zero. NOTE: TagMap.Entry decodes nameHash inline as (int) tagId on its hot path, so the + * low-32 encoding here must stay in sync with that. + */ + public static int globalSerial(long tagId) { + return (int) ((tagId >>> 48) & 0x7FFF); + } + + /** + * Flag bit (the sign bit) marking a tag the tag interceptor must process — reserved/"virtual" + * tags AND intercepted-but-stored tags (e.g. http.method, which the interceptor side-effects and + * also stores). Encoded in the id so {@code DDSpanContext.setTag(long)} can route with a single + * sign test ({@link #isIntercepted}) instead of resolving the name. Non-intercepted tags (peer.*, + * base.service, …) leave it clear and take the fast store path. Must agree with the interceptor's + * name-based {@code needsIntercept} for every assigned id. + */ + public static final long INTERCEPTED = Long.MIN_VALUE; // 1L << 63 + + /** True if the tagId is flagged for tag-interceptor processing. */ + public static boolean isIntercepted(long tagId) { + return tagId < 0L; + } + + /** Returns the tagId with the {@link #INTERCEPTED} flag set. */ + public static long intercepted(long tagId) { + return tagId | INTERCEPTED; + } + + public static int fieldPos(long tagId) { + return (int) ((tagId >>> 32) & 0xFFFF); + } + + public static int nameHash(long tagId) { + return (int) tagId; + } + + /** + * globalSerial partition. {@code [1, FIRST_STORED_SERIAL)} is reserved for "virtual" tags that + * are specially handled (redirected to span fields or processed by the tag interceptor) and are + * NOT stored in the TagMap — these are hand-assigned in tracer core. {@code [FIRST_STORED_SERIAL, + * ..]} is for generated convention tags that ARE stored (slotted/bucketed). {@code globalSerial + * == 0} means unknown / string-only. Both core and the code generator must agree on this + * boundary. + */ + public static final int FIRST_STORED_SERIAL = 256; + + /** True if the tagId names a reserved "virtual"/specially-handled tag (not stored in the map). */ + public static boolean isReserved(long tagId) { + int globalSerial = globalSerial(tagId); + return globalSerial > 0 && globalSerial < FIRST_STORED_SERIAL; + } + + /** True if the tagId names a generated, map-stored (slotted/bucketed) tag. */ + public static boolean isStored(long tagId) { + return globalSerial(tagId) >= FIRST_STORED_SERIAL; + } + + /** + * Sentinel {@code fieldPos} meaning "no positional slot". It is the maximum value the 16-bit + * fieldPos field can hold, so it always compares {@code >= slotCount()} and routes to the hash + * buckets rather than the fast positional array. Two kinds of tagId use it: + * + *

+ */ + public static final int NO_SLOT = 0xFFFF; + + /** + * True if the tagId names a stored tag that deliberately has no positional slot (bucket-only). + */ + public static boolean isUnslotted(long tagId) { + return isStored(tagId) && fieldPos(tagId) == NO_SLOT; + } + + /** + * Builds a tagId from its parts: {@code globalSerial} (globally unique per known tag), {@code + * fieldPos} (the tag's slot within its span type's positional table), and the tag {@code name} + * (whose hash is computed via the same function the runtime uses, so the low 32 bits match {@link + * TagMap.Entry#hash()}). Inverse of {@link #globalSerial}/{@link #fieldPos}/{@link #nameHash}. + * Intended for the code generator and tests. + */ + public static long tagId(int globalSerial, int fieldPos, String name) { + long nameHash = TagMap.Entry._hash(name) & 0xFFFFFFFFL; + return ((long) globalSerial << 48) | ((long) (fieldPos & 0xFFFF) << 32) | nameHash; + } + + /** + * Builds a tagId with no positional slot ({@code fieldPos == }{@link #NO_SLOT}). Use for reserved + * "virtual" tags and for "low-priority" stored tags that get a stable id but are intentionally + * kept out of the fast slot array (they route to the hash buckets). See {@link #NO_SLOT}. + */ + public static long tagId(int globalSerial, String name) { + return tagId(globalSerial, NO_SLOT, name); + } + + // Number of positional slots in the global layout = (max stored fieldPos) + 1, declared by the + // registered provider. Captured once at registration and read as a dynamic constant; TagMap sizes + // its knownEntries array to exactly this rather than a hardcoded max. 0 when no resolver. + private static int slotCount; + + /** Slot count of the registered provider (max stored fieldPos + 1); 0 if none. */ + public static int slotCount() { + return slotCount; + } + + public interface Resolver { + String nameOf(long tagId); + + long keyOf(String name); + + /** Number of positional slots this provider uses: (max stored fieldPos) + 1. */ + int slotCount(); + } + + public static void register(Resolver resolver) { + KnownTags.resolver = resolver; // volatile write publishes the resolver + KnownTags.slotCount = (resolver != null) ? resolver.slotCount() : 0; + KnownTags.active = (resolver != null); // plain write; readers re-read resolver volatile anyway + } + + public static String nameOf(long tagId) { + if (!active) return null; + Resolver r = resolver; + return r != null ? r.nameOf(tagId) : null; + } + + public static long keyOf(String name) { + if (!active) return 0L; + Resolver r = resolver; + return r != null ? r.keyOf(name) : 0L; + } + + private KnownTags() {} +} diff --git a/internal-api/src/main/java/datadog/trace/api/TagMap.java b/internal-api/src/main/java/datadog/trace/api/TagMap.java index f8f33f1c023..d235dce4d2c 100644 --- a/internal-api/src/main/java/datadog/trace/api/TagMap.java +++ b/internal-api/src/main/java/datadog/trace/api/TagMap.java @@ -6,6 +6,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.NoSuchElementException; @@ -278,6 +279,30 @@ void forEach( /** Checks if the TagMap is writable - if not throws {@link IllegalStateException} */ void checkWriteAccess(); + /** + * Cheap, conservative emptiness check: {@code true} guarantees the map is empty; {@code false} + * means it may be non-empty. Unlike {@link #isEmpty()} (exact, {@link java.util.Map} contract) + * this never resolves read-through parents to an exact union, so it is safe on the hot path. + * Prefer it to {@link #isEmpty()} wherever a conservative answer suffices. + */ + boolean isDefinitelyEmpty(); + + /** + * Cheap upper-bound estimate of the map size ({@code >=} the exact {@link #size()}). Does not + * account for read-through shadowing or removals, so it may over-count; intended for capacity + * hints. Prefer it to {@link #size()} wherever an upper bound suffices. + */ + int estimateSize(); + + /** + * Attaches a frozen parent for read-through (level-split phase 1): reads that miss this map's own + * entries fall through to {@code parent}, while local entries shadow it and local removals + * tombstone it. The parent must be frozen, so it is safely shareable across spans/threads without + * synchronization. Single-parent by design in phase 1; generalizing to multiple flattened parents + * is additive. Returns {@code this} for chaining. + */ + TagMap withParent(TagMap parent); + abstract class EntryChange { public static final EntryRemoval newRemoval(String tag) { return new EntryRemoval(tag); @@ -1181,19 +1206,72 @@ final class OptimizedTagMap implements TagMap { // and TagMap's EMPTY constant reads back through the factory into here; deferring the build // to a separate holder keeps that read from observing a half-initialized static. static final class EmptyHolder { - // Using special constructor that creates a frozen view of an existing array. - // Bucket calculation requires that array length is a power of 2; size 0 fails with - // ArrayIndexOutOfBoundsException, but size 1 works. - static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1], 0); + // Frozen empty view. Allocates its OWN length-16 (power-of-two) array rather than reading + // OptimizedTagMap.EMPTY_BUCKETS: this nested class can initialize before OptimizedTagMap's + // sets that static, which would leave EMPTY with null buckets. One-time singleton, + // frozen so never written. + static final OptimizedTagMap EMPTY = new OptimizedTagMap(new Object[1 << 4], 0); } - private final Object[] buckets; + // Shared immutable empty buckets (all null, length 16). Every map points here until its first + // custom-tag write copies-on-write to a private array (materializeBuckets), so an all-known / + // known-heavy map (e.g. the trace-tier read-through parent) allocates ZERO buckets. Length is + // always 16, so reads need no null guard and read-through bucket alignment (hash & 15) holds. + private static final Object[] EMPTY_BUCKETS = new Object[1 << 4]; + + private Object[] buckets; private int size; private boolean frozen; + /** + * Dense known-tag store (dense-tagmap-design §5). Values for KNOWN tags (those {@link + * KnownTags#keyOf} resolves to a stored id) live in these INSERTION-ORDERED parallel arrays with + * NO per-tag {@link Entry} object — the allocation win. Lazily allocated on the first known-tag + * write ({@code null} until then, so all-unknown maps pay nothing) and grown x2 from {@link + * #KNOWN_INIT_CAP}. Matched by globalSerial via a linear scan ({@link #knownIndexOf}); reads + * aren't hot, so O(knownCount) is fine and positional indexing is deferred. Dormant until a + * resolver is registered: {@code keyOf} returns 0, so nothing routes here and production is + * byte-identical. + * + *

Disjoint from {@link #buckets} by construction: known-ness is global ({@code keyOf} is + * deterministic), so a known tag is ALWAYS dense and never bucketed, and vice-versa. That + * disjointness keeps read-through shadow checks within-region — a parent dense entry can only be + * shadowed by a local dense entry of the same id, a parent bucket entry only by a local bucket + * entry — so the existing bucket read-through code is unchanged. + * + *

{@link #size} counts bucket entries only; {@link #knownCount} counts dense entries; the + * local total is {@code size + knownCount}. + */ + private long[] knownIds; + + private Object[] knownValues; + private int knownCount; + + private static final int KNOWN_INIT_CAP = 8; + + /** + * Optional frozen parent for read-through (level-split phase 1). When non-null, reads that miss + * the local buckets fall through to the parent; a local entry shadows the parent's (local-wins). + * Phase 1 is single-parent by design (anti-false-generalization); generalizing to multiple + * flattened parents is additive. Must be frozen when attached, so it is safely shareable. + */ + private OptimizedTagMap parent; + + /** + * Parent keys removed locally (read-through tombstones). Lazily allocated on the first such + * removal; {@code null} both means "no tombstones" and serves as the gate that keeps the hot + * paths untouched. Only meaningful when {@link #parent} != null. A tombstone stops read-through + * fall-through for its key, so a key removed from a child no longer reads through to the parent. + * Kept off the bucket structure deliberately — it is shape-agnostic (bare-Entry vs BucketGroup) + * and rare, so it costs a lazy allocation on removal rather than complicating the hot bucket + * code. + */ + private Set removedFromParent; + public OptimizedTagMap() { - // needs to be a power of 2 for bucket masking calculation to work as intended - this.buckets = new Object[1 << 4]; + // Start on the shared empty buckets; materializeBuckets() COWs to a private power-of-two array + // on the first custom-tag write. All-known maps never allocate buckets. + this.buckets = EMPTY_BUCKETS; this.size = 0; this.frozen = false; } @@ -1212,12 +1290,72 @@ public boolean isOptimized() { @Override public int size() { - return this.size; + // Exact (Map contract). Under read-through resolves the union; prefer estimateSize() for hints. + int local = this.size + this.knownCount; // buckets + dense + OptimizedTagMap p = this.parent; + return p == null ? local : local + this.visibleParentCount(); + } + + /** + * Exact count of parent entries not shadowed locally or tombstoned (the read-through addition). + */ + private int visibleParentCount() { + int count = 0; + // parent dense entries not shadowed by a local dense entry / tombstoned + long[] parentIds = this.parent.knownIds; + int parentKnownCount = this.parent.knownCount; + for (int i = 0; i < parentKnownCount; ++i) { + if (!this.parentDenseHidden(parentIds[i])) count++; + } + Object[] parentBuckets = this.parent.buckets; + Object[] thisBuckets = this.buckets; + for (int i = 0; i < parentBuckets.length; ++i) { + Object parentBucket = parentBuckets[i]; + Object localBucket = thisBuckets[i]; + if (parentBucket instanceof Entry) { + if (parentEntryVisibleInBucket(localBucket, (Entry) parentBucket)) count++; + } else if (parentBucket instanceof BucketGroup) { + for (BucketGroup g = (BucketGroup) parentBucket; g != null; g = g.prev) { + for (int j = 0; j < BucketGroup.LEN; ++j) { + Entry pe = g._entryAt(j); + if (pe != null && parentEntryVisibleInBucket(localBucket, pe)) count++; + } + } + } + } + return count; } @Override public boolean isEmpty() { - return (this.size == 0); + // Exact (Map contract). Under read-through resolves the parent; prefer isDefinitelyEmpty(). + if (this.size != 0 || this.knownCount != 0) { + return false; + } + OptimizedTagMap p = this.parent; + if (p == null) { + return true; + } + if (this.removedFromParent == null) { + // no local entries and no tombstones -> empty iff the parent is empty (nothing shadows it) + return p.isEmpty(); + } + // size == 0 with tombstones (rare): empty iff every parent entry is tombstoned + return this.visibleParentCount() == 0; + } + + @Override + public boolean isDefinitelyEmpty() { + return this.size == 0 + && this.knownCount == 0 + && (this.parent == null || this.parent.isDefinitelyEmpty()); + } + + @Override + public int estimateSize() { + // Upper bound: local (buckets + dense) + parent, ignoring shadowing/removals (over-counts). + int local = this.size + this.knownCount; + return this.parent == null ? local : local + this.parent.estimateSize(); } @Deprecated @@ -1328,30 +1466,152 @@ public Set> entrySet() { @Override public Entry getEntry(String tag) { - Object[] thisBuckets = this.buckets; + Entry local = this.getLocalEntry(tag); + if (local != null) { + // Local entry shadows the parent (local-wins) — unchanged hot path. + return local; + } + // Read-through: miss locally, defer to the frozen parent. Single-parent in phase 1. + // The tombstone check lives only here, on the cold miss+parent path — the hot local hit above + // never touches it. + OptimizedTagMap p = this.parent; + if (p == null) { + return null; + } + if (this.removedFromParent != null && this.removedFromParent.contains(tag)) { + return null; // tombstoned: removed locally, do not read through + } + return p.getEntry(tag); + } + /** Looks up an entry in this map's own storage only (dense then buckets) — no read-through. */ + private Entry getLocalEntry(String tag) { + // Known tags live in the dense store; resolve identity and check there first. keyOf is a no-op + // (returns 0 -> isStored false) until a resolver is registered, so this is inert in production. + long id = KnownTags.keyOf(tag); + if (KnownTags.isStored(id)) { + Object known = this.knownRawValue(id); + return known == null ? null : Entry.newAnyEntry(tag, known); + } + Object[] thisBuckets = this.buckets; int hash = TagMap.Entry._hash(tag); - int bucketIndex = hash & (thisBuckets.length - 1); + return findInBucket(thisBuckets[hash & (thisBuckets.length - 1)], hash, tag); + } - Object bucket = thisBuckets[bucketIndex]; - if (bucket == null) { - return null; - } else if (bucket instanceof Entry) { + /** + * Finds an entry by hash/tag within a single bucket object (Entry | BucketGroup chain | null). + */ + private static Entry findInBucket(Object bucket, int hash, String tag) { + if (bucket instanceof Entry) { Entry tagEntry = (Entry) bucket; - if (tagEntry.matches(tag)) return tagEntry; + return tagEntry.matches(tag) ? tagEntry : null; } else if (bucket instanceof BucketGroup) { - BucketGroup lastGroup = (BucketGroup) bucket; + return ((BucketGroup) bucket).findInChain(hash, tag); + } + return null; + } - Entry tagEntry = lastGroup.findInChain(hash, tag); - return tagEntry; + /** + * Whether a parent entry is visible through this child at its (shared) bucket: not tombstoned and + * not shadowed by a local entry. Exploits universal hashing — by {@code _hash}, the only local + * entry that could shadow {@code pe} lives in this map's same-index bucket, so we compare against + * {@code localBucket} alone, reusing {@code pe}'s cached hash (no re-hash, no full-map probe). + */ + private boolean parentEntryVisibleInBucket(Object localBucket, Entry pe) { + if (this.removedFromParent != null && this.removedFromParent.contains(pe.tag)) { + return false; // tombstoned: removed locally } + return findInBucket(localBucket, pe.hash(), pe.tag) == null; // not shadowed by a local entry + } + + // ---- dense known-tag store (see the knownIds field doc) + // ---------------------------------------- + + /** + * Linear scan of the dense store for {@code tagId}, returning its index or -1. Ids are canonical + * (the only way one enters is {@link KnownTags#keyOf} or a {@code KnownTagIds} constant, both + * canonical), so a full {@code long} compare is exact and cheaper than extracting globalSerial. + */ + private int knownIndexOf(long tagId) { + long[] ids = this.knownIds; + int n = this.knownCount; + for (int i = 0; i < n; ++i) { + if (ids[i] == tagId) return i; + } + return -1; + } + + private void ensureKnownCapacity() { + if (this.knownIds == null) { + this.knownIds = new long[KNOWN_INIT_CAP]; + this.knownValues = new Object[KNOWN_INIT_CAP]; + } else if (this.knownCount == this.knownIds.length) { + int newCap = this.knownIds.length << 1; + this.knownIds = Arrays.copyOf(this.knownIds, newCap); + this.knownValues = Arrays.copyOf(this.knownValues, newCap); + } + } + + /** + * Stores a known tag's value densely (no {@link Entry} alloc). Overwrites in place when present + * (returning the prior value materialized as an Entry, per the {@code Map} contract — usually + * discarded by {@code set}); otherwise appends, growing x2 as needed. + */ + private Entry putKnownValue(long tagId, Object value) { + int i = this.knownIndexOf(tagId); + if (i >= 0) { + Object prior = this.knownValues[i]; + this.knownValues[i] = value; + return materializeKnown(tagId, prior); + } + this.ensureKnownCapacity(); + int slot = this.knownCount++; + this.knownIds[slot] = tagId; + this.knownValues[slot] = value; return null; } + /** Raw dense value for {@code tagId}, or {@code null} when absent (no Entry, no boxing). */ + private Object knownRawValue(long tagId) { + int i = this.knownIndexOf(tagId); + return i < 0 ? null : this.knownValues[i]; + } + + /** + * Removes a known tag from the dense store (swap-with-last), returning the prior Entry or null. + */ + private Entry removeKnown(long tagId) { + int i = this.knownIndexOf(tagId); + if (i < 0) return null; + Object prior = this.knownValues[i]; + int last = --this.knownCount; + this.knownIds[i] = this.knownIds[last]; + this.knownValues[i] = this.knownValues[last]; + this.knownIds[last] = 0L; + this.knownValues[last] = null; + return materializeKnown(tagId, prior); + } + + /** Materializes a transient Entry for a dense (id, value) pair — only on explicit get/iterate. */ + private static Entry materializeKnown(long tagId, Object value) { + return Entry.newAnyEntry(KnownTags.nameOf(tagId), value); + } + + /** + * Whether a parent dense entry is hidden through this child: shadowed by a local dense entry of + * the same id, or tombstoned. (Disjointness means a parent dense entry can't be shadowed by a + * local bucket entry — known tags never bucket — so no bucket check is needed here.) + */ + private boolean parentDenseHidden(long tagId) { + if (this.knownIndexOf(tagId) >= 0) return true; // shadowed by a local dense entry + return this.removedFromParent != null + && this.removedFromParent.contains(KnownTags.nameOf(tagId)); // tombstoned + } + @Deprecated @Override public Object put(String tag, Object value) { - TagMap.Entry entry = this.getAndSet(Entry.newAnyEntry(tag, value)); + TagMap.Entry entry = this.getAndSet(tag, value); return entry == null ? null : entry.objectValue(); } @@ -1360,46 +1620,89 @@ public void set(TagMap.EntryReader newEntryReader) { this.getAndSet(newEntryReader.entry()); } + // The set(String, ...) family delegates to the matching getAndSet(String, ...) overload, which + // routes known tags to the dense store BEFORE constructing any Entry (so a known-tag set + // allocates no Entry). The discarded return is free on the common first-set path (prior == null). @Override public void set(String tag, Object value) { - this.getAndSet(Entry.newAnyEntry(tag, value)); + this.getAndSet(tag, value); } @Override public void set(String tag, CharSequence value) { - this.getAndSet(Entry.newObjectEntry(tag, value)); + this.getAndSet(tag, value); } @Override public void set(String tag, boolean value) { - this.getAndSet(Entry.newBooleanEntry(tag, value)); + this.getAndSet(tag, value); } @Override public void set(String tag, int value) { - this.getAndSet(Entry.newIntEntry(tag, value)); + this.getAndSet(tag, value); } @Override public void set(String tag, long value) { - this.getAndSet(Entry.newLongEntry(tag, value)); + this.getAndSet(tag, value); } @Override public void set(String tag, float value) { - this.getAndSet(Entry.newFloatEntry(tag, value)); + this.getAndSet(tag, value); } @Override public void set(String tag, double value) { - this.getAndSet(Entry.newDoubleEntry(tag, value)); + this.getAndSet(tag, value); } @Override public Entry getAndSet(Entry newEntry) { + // Entry-based path (set(EntryReader), entry-sharing). The Entry is already constructed by the + // caller, so a known tag keeps its value densely and drops the Entry. The hot string/typed + // setters route to dense BEFORE constructing an Entry (see set/getAndSet(String, ...)) so a + // known-tag set allocates no Entry at all. + long id = KnownTags.keyOf(newEntry.tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, newEntry.tag, newEntry.objectValue()) + : this.getAndSetBucket(newEntry); + } + + /** + * Stores a known tag's (resolved id, value) densely with NO Entry retained — the alloc win. + * Returns the prior value materialized as an Entry (Map contract); {@code set} discards it. + */ + private Entry getAndSetKnown(long id, String tag, Object value) { this.checkWriteAccess(); + if (this.removedFromParent != null) { + this.removedFromParent.remove(tag); + } + return this.putKnownValue(id, value); + } - Object[] thisBuckets = this.buckets; + /** Copy-on-write the shared empty buckets to a private array on the first bucket write. */ + private Object[] materializeBuckets() { + Object[] b = this.buckets; + if (b == EMPTY_BUCKETS) { + b = new Object[1 << 4]; + this.buckets = b; + } + return b; + } + + /** Stores an entry in the hash buckets — the unknown/custom-tag path. */ + private Entry getAndSetBucket(Entry newEntry) { + this.checkWriteAccess(); + + // Re-setting a key clears any read-through tombstone for it (the new value overrides the + // removal). Gated on the lazy field, so this is a no-op for the common no-tombstone case. + if (this.removedFromParent != null) { + this.removedFromParent.remove(newEntry.tag); + } + + Object[] thisBuckets = this.materializeBuckets(); int newHash = newEntry.hash(); int bucketIndex = newHash & (thisBuckets.length - 1); @@ -1444,39 +1747,63 @@ public Entry getAndSet(Entry newEntry) { return null; } + // Each getAndSet(String, ...) resolves keyOf FIRST: a known tag stores its value densely with no + // Entry (boxing the primitive only on this branch); a custom tag falls back to the typed Entry + // (no boxing for primitives, preserving the bucket store's no-box property). @Override public Entry getAndSet(String tag, Object value) { - return this.getAndSet(Entry.newAnyEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, value) + : this.getAndSetBucket(Entry.newAnyEntry(tag, value)); } @Override public Entry getAndSet(String tag, CharSequence value) { - return this.getAndSet(Entry.newObjectEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, value) + : this.getAndSetBucket(Entry.newObjectEntry(tag, value)); } @Override public TagMap.Entry getAndSet(String tag, boolean value) { - return this.getAndSet(Entry.newBooleanEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, Boolean.valueOf(value)) + : this.getAndSetBucket(Entry.newBooleanEntry(tag, value)); } @Override public TagMap.Entry getAndSet(String tag, int value) { - return this.getAndSet(Entry.newIntEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, Integer.valueOf(value)) + : this.getAndSetBucket(Entry.newIntEntry(tag, value)); } @Override public TagMap.Entry getAndSet(String tag, long value) { - return this.getAndSet(Entry.newLongEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, Long.valueOf(value)) + : this.getAndSetBucket(Entry.newLongEntry(tag, value)); } @Override public TagMap.Entry getAndSet(String tag, float value) { - return this.getAndSet(Entry.newFloatEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, Float.valueOf(value)) + : this.getAndSetBucket(Entry.newFloatEntry(tag, value)); } @Override public TagMap.Entry getAndSet(String tag, double value) { - return this.getAndSet(Entry.newDoubleEntry(tag, value)); + long id = KnownTags.keyOf(tag); + return KnownTags.isStored(id) + ? this.getAndSetKnown(id, tag, Double.valueOf(value)) + : this.getAndSetBucket(Entry.newDoubleEntry(tag, value)); } public void putAll(Map map) { @@ -1514,7 +1841,9 @@ public void putAll(TagMap that) { } private void putAllOptimizedMap(OptimizedTagMap that) { - if (this.size == 0) { + // "empty" must consider BOTH local regions — a map with only dense entries has size == 0 but is + // not empty, and putAllIntoEmptyMap would clobber its dense store. + if (this.size == 0 && this.knownCount == 0) { this.putAllIntoEmptyMap(that); } else { this.putAllMerge(that); @@ -1522,7 +1851,9 @@ private void putAllOptimizedMap(OptimizedTagMap that) { } private void putAllMerge(OptimizedTagMap that) { - Object[] thisBuckets = this.buckets; + // COW our buckets only if the source has bucket entries to merge in; otherwise the loop below + // writes nothing and the shared empty buckets stay shared. + Object[] thisBuckets = (that.size > 0) ? this.materializeBuckets() : this.buckets; Object[] thatBuckets = that.buckets; // Since TagMap-s don't support expansion, buckets are perfectly aligned @@ -1633,33 +1964,49 @@ private void putAllMerge(OptimizedTagMap that) { } } } + + // merge the source's dense known-tag entries; incoming clobbers existing (same as buckets) + for (int i = 0; i < that.knownCount; ++i) { + this.putKnownValue(that.knownIds[i], that.knownValues[i]); + } } /* * Specially optimized version of putAll for the common case of destination map being empty */ private void putAllIntoEmptyMap(OptimizedTagMap that) { - Object[] thisBuckets = this.buckets; - Object[] thatBuckets = that.buckets; - - // Check against both thisBuckets.length && thatBuckets.length is to help the JIT do bound check - // elimination - for (int i = 0; i < thisBuckets.length && i < thatBuckets.length; ++i) { - Object thatBucket = thatBuckets[i]; - - // faster to explicitly null check first, then do instanceof - if (thatBucket == null) { - // do nothing - } else if (thatBucket instanceof BucketGroup) { - // if it is a BucketGroup, then need to clone - BucketGroup thatGroup = (BucketGroup) thatBucket; + // Only copy buckets (and COW ours) when the source actually has bucket entries; an all-known + // source leaves us on the shared empty buckets. + if (that.size > 0) { + Object[] thisBuckets = this.materializeBuckets(); + Object[] thatBuckets = that.buckets; + + // Check against both thisBuckets.length && thatBuckets.length is to help the JIT do bound + // check elimination + for (int i = 0; i < thisBuckets.length && i < thatBuckets.length; ++i) { + Object thatBucket = thatBuckets[i]; + + // faster to explicitly null check first, then do instanceof + if (thatBucket == null) { + // do nothing + } else if (thatBucket instanceof BucketGroup) { + // if it is a BucketGroup, then need to clone + BucketGroup thatGroup = (BucketGroup) thatBucket; - thisBuckets[i] = thatGroup.cloneChain(); - } else { // if ( thatBucket instanceof Entry ) - thisBuckets[i] = thatBucket; + thisBuckets[i] = thatGroup.cloneChain(); + } else { // if ( thatBucket instanceof Entry ) + thisBuckets[i] = thatBucket; + } } + this.size = that.size; + } + + // clone the dense known-tag store (values are immutable boxes/objects -> safe to share refs) + if (that.knownCount > 0) { + this.knownIds = Arrays.copyOf(that.knownIds, that.knownIds.length); + this.knownValues = Arrays.copyOf(that.knownValues, that.knownValues.length); + this.knownCount = that.knownCount; } - this.size = that.size; } public void fillMap(Map map) { @@ -1678,6 +2025,9 @@ public void fillMap(Map map) { thisGroup.fillMapFromChain(map); } } + for (int i = 0; i < this.knownCount; ++i) { + map.put(KnownTags.nameOf(this.knownIds[i]), this.knownValues[i]); + } } public void fillStringMap(Map stringMap) { @@ -1696,6 +2046,10 @@ public void fillStringMap(Map stringMap) { thisGroup.fillStringMapFromChain(stringMap); } } + for (int i = 0; i < this.knownCount; ++i) { + stringMap.put( + KnownTags.nameOf(this.knownIds[i]), TagValueConversions.toString(this.knownValues[i])); + } } @Override @@ -1714,6 +2068,37 @@ public boolean remove(String tag) { public Entry getAndRemove(String tag) { this.checkWriteAccess(); + Entry localRemoved = this.removeLocal(tag); + + OptimizedTagMap p = this.parent; + if (p != null) { + // Read-through: if the parent still exposes this key, removing it must also hide it from + // fall-through — install a tombstone. The prior *visible* value (Map.remove contract) is the + // local entry if there was one, otherwise the parent's (which we now hide). Single-parent in + // phase 1; rare path (only when removing a parent-exposed key). + boolean alreadyTombstoned = + this.removedFromParent != null && this.removedFromParent.contains(tag); + if (!alreadyTombstoned) { + Entry parentEntry = p.getEntry(tag); + if (parentEntry != null) { + if (this.removedFromParent == null) { + this.removedFromParent = new HashSet<>(); + } + this.removedFromParent.add(tag); + return localRemoved != null ? localRemoved : parentEntry; + } + } + } + return localRemoved; + } + + /** Removes an entry from this map's own storage only — no parent/tombstone handling. */ + private Entry removeLocal(String tag) { + long id = KnownTags.keyOf(tag); + if (KnownTags.isStored(id)) { + return this.removeKnown(id); + } + Object[] thisBuckets = this.buckets; int hash = TagMap.Entry._hash(tag); @@ -1753,7 +2138,14 @@ public Entry getAndRemove(String tag) { @Override public TagMap copy() { OptimizedTagMap copy = new OptimizedTagMap(); - copy.putAllIntoEmptyMap(this); + copy.putAllIntoEmptyMap(this); // clones this map's own (local) buckets + size + // Preserve read-through: share the frozen parent (immutable -> safe to share) and copy the + // tombstones, so the copy is observationally identical to this map (same union) and remains + // independently mutable (writes land on the copy's local buckets, never the shared parent). + copy.parent = this.parent; + if (this.removedFromParent != null) { + copy.removedFromParent = new HashSet<>(this.removedFromParent); + } return copy; } @@ -1765,6 +2157,16 @@ public TagMap immutableCopy() { } } + @Override + public TagMap withParent(TagMap parent) { + OptimizedTagMap p = (OptimizedTagMap) parent; + if (p != null && !p.frozen) { + throw new IllegalStateException("read-through parent must be frozen"); + } + this.parent = p; + return this; + } + @Override public Iterator iterator() { return new EntryReaderIterator(this); @@ -1777,6 +2179,15 @@ public Stream stream() { @Override public void forEach(Consumer consumer) { + // local dense known tags via a reused flyweight (no per-entry Entry alloc — the serialize win) + if (this.knownCount > 0) { + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < this.knownCount; ++i) { + reader.set(KnownTags.nameOf(this.knownIds[i]), this.knownValues[i]); + consumer.accept(reader); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1792,10 +2203,59 @@ public void forEach(Consumer consumer) { thisGroup.forEachInChain(consumer); } } + + // read-through: parent entries not shadowed locally or tombstoned. Kept out of line so the + // common parent == null path stays byte-identical to before (small / inlinable). + if (this.parent != null) { + this.forEachParent(consumer); + } + } + + private void forEachParent(Consumer consumer) { + // parent dense known tags not shadowed by a local dense entry / tombstoned + long[] parentIds = this.parent.knownIds; + int parentKnownCount = this.parent.knownCount; + if (parentKnownCount > 0) { + Object[] parentValues = this.parent.knownValues; + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < parentKnownCount; ++i) { + long id = parentIds[i]; + if (!this.parentDenseHidden(id)) { + reader.set(KnownTags.nameOf(id), parentValues[i]); + consumer.accept(reader); + } + } + } + + Object[] localBuckets = this.buckets; + Object[] parentBuckets = this.parent.buckets; // leaf parent: same length, same bucket per key + for (int i = 0; i < parentBuckets.length; ++i) { + Object parentBucket = parentBuckets[i]; + Object localBucket = localBuckets[i]; + if (parentBucket instanceof Entry) { + Entry pe = (Entry) parentBucket; + if (parentEntryVisibleInBucket(localBucket, pe)) consumer.accept(pe); + } else if (parentBucket instanceof BucketGroup) { + for (BucketGroup g = (BucketGroup) parentBucket; g != null; g = g.prev) { + for (int j = 0; j < BucketGroup.LEN; ++j) { + Entry pe = g._entryAt(j); + if (pe != null && parentEntryVisibleInBucket(localBucket, pe)) consumer.accept(pe); + } + } + } + } } @Override public void forEach(T thisObj, BiConsumer consumer) { + if (this.knownCount > 0) { + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < this.knownCount; ++i) { + reader.set(KnownTags.nameOf(this.knownIds[i]), this.knownValues[i]); + consumer.accept(thisObj, reader); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1811,11 +2271,60 @@ public void forEach(T thisObj, BiConsumer con thisGroup.forEachInChain(thisObj, consumer); } } + + // read-through: parent entries not shadowed locally or tombstoned (kept out of line). + if (this.parent != null) { + this.forEachParent(thisObj, consumer); + } + } + + private void forEachParent(T thisObj, BiConsumer consumer) { + long[] parentIds = this.parent.knownIds; + int parentKnownCount = this.parent.knownCount; + if (parentKnownCount > 0) { + Object[] parentValues = this.parent.knownValues; + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < parentKnownCount; ++i) { + long id = parentIds[i]; + if (!this.parentDenseHidden(id)) { + reader.set(KnownTags.nameOf(id), parentValues[i]); + consumer.accept(thisObj, reader); + } + } + } + + Object[] localBuckets = this.buckets; + Object[] parentBuckets = this.parent.buckets; // leaf parent: same length, same bucket per key + for (int i = 0; i < parentBuckets.length; ++i) { + Object parentBucket = parentBuckets[i]; + Object localBucket = localBuckets[i]; + if (parentBucket instanceof Entry) { + Entry pe = (Entry) parentBucket; + if (parentEntryVisibleInBucket(localBucket, pe)) consumer.accept(thisObj, pe); + } else if (parentBucket instanceof BucketGroup) { + for (BucketGroup g = (BucketGroup) parentBucket; g != null; g = g.prev) { + for (int j = 0; j < BucketGroup.LEN; ++j) { + Entry pe = g._entryAt(j); + if (pe != null && parentEntryVisibleInBucket(localBucket, pe)) { + consumer.accept(thisObj, pe); + } + } + } + } + } } @Override public void forEach( T thisObj, U otherObj, TriConsumer consumer) { + if (this.knownCount > 0) { + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < this.knownCount; ++i) { + reader.set(KnownTags.nameOf(this.knownIds[i]), this.knownValues[i]); + consumer.accept(thisObj, otherObj, reader); + } + } + Object[] thisBuckets = this.buckets; for (int i = 0; i < thisBuckets.length; ++i) { @@ -1831,13 +2340,59 @@ public void forEach( thisGroup.forEachInChain(thisObj, otherObj, consumer); } } + + // read-through: parent entries not shadowed locally or tombstoned (kept out of line). + if (this.parent != null) { + this.forEachParent(thisObj, otherObj, consumer); + } + } + + private void forEachParent( + T thisObj, U otherObj, TriConsumer consumer) { + long[] parentIds = this.parent.knownIds; + int parentKnownCount = this.parent.knownCount; + if (parentKnownCount > 0) { + Object[] parentValues = this.parent.knownValues; + EntryReadingHelper reader = new EntryReadingHelper(); + for (int i = 0; i < parentKnownCount; ++i) { + long id = parentIds[i]; + if (!this.parentDenseHidden(id)) { + reader.set(KnownTags.nameOf(id), parentValues[i]); + consumer.accept(thisObj, otherObj, reader); + } + } + } + + Object[] localBuckets = this.buckets; + Object[] parentBuckets = this.parent.buckets; // leaf parent: same length, same bucket per key + for (int i = 0; i < parentBuckets.length; ++i) { + Object parentBucket = parentBuckets[i]; + Object localBucket = localBuckets[i]; + if (parentBucket instanceof Entry) { + Entry pe = (Entry) parentBucket; + if (parentEntryVisibleInBucket(localBucket, pe)) consumer.accept(thisObj, otherObj, pe); + } else if (parentBucket instanceof BucketGroup) { + for (BucketGroup g = (BucketGroup) parentBucket; g != null; g = g.prev) { + for (int j = 0; j < BucketGroup.LEN; ++j) { + Entry pe = g._entryAt(j); + if (pe != null && parentEntryVisibleInBucket(localBucket, pe)) { + consumer.accept(thisObj, otherObj, pe); + } + } + } + } + } } public void clear() { this.checkWriteAccess(); - Arrays.fill(this.buckets, null); + // Drop the private bucket array back to the shared empty sentinel (also avoids mutating it). + this.buckets = EMPTY_BUCKETS; this.size = 0; + this.knownIds = null; + this.knownValues = null; + this.knownCount = 0; } public OptimizedTagMap freeze() { @@ -1892,10 +2447,27 @@ void checkIntegrity() { } } + // dense store: ids must be unique (no tag stored twice) and the count within array bounds. + if (this.knownCount > 0) { + if (this.knownIds == null || this.knownCount > this.knownIds.length) { + throw new IllegalStateException("incorrect known count"); + } + for (int i = 0; i < this.knownCount; ++i) { + for (int j = i + 1; j < this.knownCount; ++j) { + if (this.knownIds[i] == this.knownIds[j]) { + throw new IllegalStateException("duplicate known id"); + } + } + } + } + if (this.size != this.computeSize()) { throw new IllegalStateException("incorrect size"); } - if (this.isEmpty() != this.checkIfEmpty()) { + // Local-structure invariant: the size counter's emptiness must match the local buckets. Uses + // the + // local (this.size == 0), NOT isEmpty(), which under read-through resolves the parent too. + if ((this.size == 0) != this.checkIfEmpty()) { throw new IllegalStateException("incorrect empty status"); } } @@ -2013,33 +2585,46 @@ String toInternalString() { } abstract static class IteratorBase { - private final Object[] buckets; + private final OptimizedTagMap map; + private final Object[] localBuckets; + + // current array being walked: local buckets first, then the parent's (read-through union) + private Object[] buckets; + private boolean inParent = false; - private Entry nextEntry; + // Currency is EntryReader, not Entry: a BUCKET entry is its own (real, retain-safe) Entry, but + // a + // DENSE entry is emitted via the reused denseReader flyweight (alloc-free, "use now"). This is + // the contract of TagMap.iterator()/keySet()/values(). entrySet() (Iterator) sits on + // top and calls .entry() per next() to get a real retain-safe Entry (see EntriesIterator). + private EntryReader nextEntry; + private EntryReadingHelper denseReader; // lazily created on the first dense emit private int bucketIndex = -1; private BucketGroup group = null; private int groupIndex = 0; + // dense-store cursors: local known tags, then (read-through) parent known tags + private int knownIndex = 0; + private int parentKnownIndex = 0; + IteratorBase(OptimizedTagMap map) { + this.map = map; + this.localBuckets = map.buckets; this.buckets = map.buckets; } public final boolean hasNext() { if (this.nextEntry != null) return true; - while (this.bucketIndex < this.buckets.length) { - this.nextEntry = this.advance(); - if (this.nextEntry != null) return true; - } - - return false; + this.nextEntry = this.advance(); + return this.nextEntry != null; } - final Entry nextEntryOrThrowNoSuchElement() { + final EntryReader nextEntryOrThrowNoSuchElement() { if (this.nextEntry != null) { - Entry nextEntry = this.nextEntry; + EntryReader nextEntry = this.nextEntry; this.nextEntry = null; return nextEntry; } @@ -2051,9 +2636,9 @@ final Entry nextEntryOrThrowNoSuchElement() { } } - final Entry nextEntryOrNull() { + final EntryReader nextEntryOrNull() { if (this.nextEntry != null) { - Entry nextEntry = this.nextEntry; + EntryReader nextEntry = this.nextEntry; this.nextEntry = null; return nextEntry; } @@ -2061,7 +2646,76 @@ final Entry nextEntryOrNull() { return this.hasNext() ? this.nextEntry : null; } - private final Entry advance() { + private final EntryReader advance() { + // phase: local dense known tags (local entries always emit — no shadow check). Emitted via + // the + // reused denseReader flyweight — NO per-entry Entry alloc (the read/serialize alloc win). + if (this.knownIndex < this.map.knownCount) { + int i = this.knownIndex++; + return this.emitDense(this.map.knownIds[i], this.map.knownValues[i]); + } + while (true) { + Entry tagEntry = this.rawAdvance(); + if (tagEntry != null) { + // local entries emit as-is; parent entries only if not shadowed locally or tombstoned. + // bucketIndex indexes the parent buckets here, which (universal hashing) line up with the + // same-index local bucket — so localBuckets[bucketIndex] is the bucket that could shadow. + if (!this.inParent + || this.map.parentEntryVisibleInBucket( + this.localBuckets[this.bucketIndex], tagEntry)) { + return tagEntry; + } + continue; // parent entry shadowed/tombstoned -> skip + } + + // current bucket array exhausted; before switching to parent buckets, drain parent dense + // (read-through union). Re-entrant: while inParent stays false, the exhausted local-bucket + // rawAdvance keeps returning null and funnels back here until parent dense is fully + // drained. + if (!this.inParent && this.map.parent != null) { + EntryReader parentDense = this.advanceParentDense(); + if (parentDense != null) return parentDense; + + this.inParent = true; + this.buckets = this.map.parent.buckets; + this.bucketIndex = -1; + this.group = null; + this.groupIndex = 0; + continue; + } + return null; + } + } + + /** + * Next visible parent dense entry (not shadowed locally / tombstoned), or null when drained. + */ + private final EntryReader advanceParentDense() { + OptimizedTagMap p = this.map.parent; + long[] parentIds = p.knownIds; + int parentKnownCount = p.knownCount; + while (this.parentKnownIndex < parentKnownCount) { + int i = this.parentKnownIndex++; + long id = parentIds[i]; + if (!this.map.parentDenseHidden(id)) { + return this.emitDense(id, p.knownValues[i]); + } + } + return null; + } + + /** Sets and returns the reused dense flyweight (lazily created); "use now", do not retain. */ + private EntryReader emitDense(long tagId, Object value) { + EntryReadingHelper reader = this.denseReader; + if (reader == null) { + reader = this.denseReader = new EntryReadingHelper(); + } + reader.set(KnownTags.nameOf(tagId), value); + return reader; + } + + /** Next raw entry in the current bucket array, ignoring shadowing/tombstones. */ + private final Entry rawAdvance() { while (this.bucketIndex < this.buckets.length) { if (this.group != null) { for (++this.groupIndex; this.groupIndex < BucketGroup.LEN; ++this.groupIndex) { @@ -2579,19 +3233,36 @@ static final class Entries extends AbstractSet> { @Override public int size() { - return this.map.computeSize(); + return this.map.size(); } @Override public boolean isEmpty() { - return this.map.checkIfEmpty(); + return this.map.isEmpty(); } @Override public Iterator> iterator() { - @SuppressWarnings({"rawtypes", "unchecked"}) - Iterator> iter = (Iterator) this.map.iterator(); - return iter; + return new EntriesIterator(this.map); + } + } + + /** + * entrySet() yields real, retain-safe {@code Map.Entry} objects. It sits on top of the + * EntryReader iterator and materializes each via {@code .entry()}: a bucket entry's reader IS the + * real stored Entry (returns {@code this}, free); a dense entry's flyweight materializes a fresh + * Entry. Deliberately NOT alloc-optimized for dense — bulk reads use {@code forEach}/EntryReader, + * and manual instrumentation does point get/set, not bulk entrySet iteration. + */ + static final class EntriesIterator extends IteratorBase + implements Iterator> { + EntriesIterator(OptimizedTagMap map) { + super(map); + } + + @Override + public Map.Entry next() { + return this.nextEntryOrThrowNoSuchElement().entry(); } } @@ -2604,12 +3275,12 @@ static final class Keys extends AbstractSet { @Override public int size() { - return this.map.computeSize(); + return this.map.size(); } @Override public boolean isEmpty() { - return this.map.checkIfEmpty(); + return this.map.isEmpty(); } @Override @@ -2643,12 +3314,12 @@ static final class Values extends AbstractCollection { @Override public int size() { - return this.map.computeSize(); + return this.map.size(); } @Override public boolean isEmpty() { - return this.map.checkIfEmpty(); + return this.map.isEmpty(); } @Override diff --git a/internal-api/src/main/java/datadog/trace/util/StringIndex.java b/internal-api/src/main/java/datadog/trace/util/StringIndex.java new file mode 100644 index 00000000000..ec16b51dd3e --- /dev/null +++ b/internal-api/src/main/java/datadog/trace/util/StringIndex.java @@ -0,0 +1,297 @@ +package datadog.trace.util; + +import java.lang.reflect.Array; +import java.util.function.Function; +import java.util.function.ToIntFunction; +import java.util.function.ToLongFunction; + +/** + * Flat open-addressed name set. Generic — it knows only names. + * + *

Three ways to use it, trading convenience for indirection: + * + *

    + *
  • {@link Support} — static algorithm over raw arrays. Keep the arrays in your own + * (ideally {@code static final}) fields and the JIT folds the refs to constants. The fastest + * path; nothing to dereference. + *
  • {@link Data} — a build-time carrier for the placed {@code {hashes, names}} returned + * by {@link Support#create}. Pull its fields into your own and discard it. + *
  • The {@code StringIndex} instance ({@link #of}) — a convenience wrapper holding the + * arrays; {@link #indexOf}/{@link #contains} delegate to {@link Support}. Costs an + * instance-field load per call (the indirection the static path removes) — fine off the hot + * path. + *
+ * + *

Consumers attach their own parallel payload arrays (ids, values, ...) sized to {@link #slots} + * and indexed by the slot {@code indexOf} returns. {@code mapValues}/{@code mapIntValues}/{@code + * mapLongValues} build such an array at construction; {@code lookup}/{@code lookupOrDefault} read + * one back in a single call (slot resolve + array read). + * + *

Slot 0-value is the empty sentinel: {@link Support#hash} never returns 0, so {@code hashes[i] + * == 0} unambiguously means an empty slot. + * + *

Trades memory for simplicity (and, incidentally, speed). The table is 2x-oversized (load + * factor ≤ 0.5) so build-time placement always finds a free slot and never has to rehash or + * resize — short probe chains are a welcome side effect, not the design goal. The cached {@code + * int[]} hashes gate {@code equals()}. Both cost memory, so a tightly-packed set is more compact: + * prefer {@link java.util.Set#copyOf} (the JDK's {@code SetN}) when you only need membership, and + * reach for {@code StringIndex} for the {@code indexOf}->parallel-array (name→id) + * capability or the hot, allocation-free static {@link Support} path. (If footprint ever matters + * more than build simplicity, a higher load factor with construction-time rehashing would close the + * gap.) + */ +public final class StringIndex { + private final int[] hashes; + private final String[] names; + public final int slots; // == hashes.length + + private StringIndex(int[] hashes, String[] names) { + this.hashes = hashes; + this.names = names; + this.slots = hashes.length; + } + + /** + * Convenience instance — wraps the placed arrays. For the hot path prefer raw {@link Support}. + */ + public static StringIndex of(String... names) { + Data data = Support.create(names); + return new StringIndex(data.hashes, data.names); + } + + /** Slot of {@code name}, or -1. Delegates to {@link Support} on the instance's arrays. */ + public int indexOf(String name) { + return Support.indexOf(this.hashes, this.names, name); + } + + public boolean contains(String name) { + return indexOf(name) >= 0; + } + + /** Table size — allocate parallel payload arrays of this length. */ + public int slots() { + return this.slots; + } + + // --- value mapping: build a slot-aligned parallel array (off the hot path) --- + + /** + * Builds a slot-aligned {@code T[]} of values: {@code out[indexOf(name)] == fn.apply(name)} for + * every indexed name; other slots stay {@code null}. {@code type} is the array element type (Java + * can't allocate a generic array without it). Pair with {@link #lookup(Object[], String)}. + */ + public T[] mapValues(Class type, Function fn) { + return Support.mapValues(this.names, type, fn); + } + + /** Slot-aligned {@code int[]} of values; absent slots stay 0. See {@link #mapValues}. */ + public int[] mapIntValues(ToIntFunction fn) { + return Support.mapIntValues(this.names, fn); + } + + /** Slot-aligned {@code long[]} of values; absent slots stay 0. See {@link #mapValues}. */ + public long[] mapLongValues(ToLongFunction fn) { + return Support.mapLongValues(this.names, fn); + } + + // --- lookup: resolve a key and read its parallel value in one call --- + + /** {@code data[indexOf(key)]}, or {@code null} when {@code key} is absent. */ + public T lookup(T[] data, String key) { + return Support.lookup(this.hashes, this.names, data, key); + } + + /** {@code data[indexOf(key)]}, or {@code defaultValue} when {@code key} is absent. */ + public T lookupOrDefault(T[] data, String key, T defaultValue) { + return Support.lookupOrDefault(this.hashes, this.names, data, key, defaultValue); + } + + /** {@code data[indexOf(key)]}, or 0 when {@code key} is absent. */ + public int lookup(int[] data, String key) { + return Support.lookup(this.hashes, this.names, data, key); + } + + /** {@code data[indexOf(key)]}, or {@code defaultValue} when {@code key} is absent. */ + public int lookupOrDefault(int[] data, String key, int defaultValue) { + return Support.lookupOrDefault(this.hashes, this.names, data, key, defaultValue); + } + + /** {@code data[indexOf(key)]}, or 0 when {@code key} is absent. */ + public long lookup(long[] data, String key) { + return Support.lookup(this.hashes, this.names, data, key); + } + + /** {@code data[indexOf(key)]}, or {@code defaultValue} when {@code key} is absent. */ + public long lookupOrDefault(long[] data, String key, long defaultValue) { + return Support.lookupOrDefault(this.hashes, this.names, data, key, defaultValue); + } + + /** Build-time carrier. Pull the fields into your own (static final) fields; don't keep this. */ + public static final class Data { + public final int[] hashes; + public final String[] names; + + Data(int[] hashes, String[] names) { + this.hashes = hashes; + this.names = names; + } + } + + /** + * Static algorithm over raw arrays. Query helpers take raw arrays, never a Data or a StringIndex. + */ + public static final class Support { + private Support() {} + + /** Spread of String.hashCode; 0 reserved as the empty sentinel. */ + public static int hash(String name) { + int h = name.hashCode(); // cached on String -> field load + return h == 0 ? 0xDD06 : h ^ (h >>> 16); + } + + /** Power-of-two size, 2x-oversized so load factor stays <= 0.5. */ + public static int tableSizeFor(int n) { + int size = 1; + while (size <= n) { + size <<= 1; + } + return size << 1; + } + + /** Build the placed table. Returns a Data carrier; pull its arrays into your own fields. */ + public static Data create(String... names) { + int size = tableSizeFor(names.length); + int[] hashes = new int[size]; + String[] placed = new String[size]; + for (String name : names) { + put(hashes, placed, name, hash(name)); + } + return new Data(hashes, placed); + } + + /** + * Slot-aligned {@code T[]} over placed {@code names}: {@code out[slot] = fn(name)} per name, + * {@code null} elsewhere. {@code type} is the array element type (generic-array allocation). + */ + @SuppressWarnings("unchecked") + public static T[] mapValues(String[] names, Class type, Function fn) { + T[] out = (T[]) Array.newInstance(type, names.length); + for (int slot = 0; slot < names.length; slot++) { + String name = names[slot]; + if (name != null) { + out[slot] = fn.apply(name); + } + } + return out; + } + + /** + * Slot-aligned {@code int[]} over placed {@code names}; {@code out[slot] = fn(name)}, 0 else. + */ + public static int[] mapIntValues(String[] names, ToIntFunction fn) { + int[] out = new int[names.length]; + for (int slot = 0; slot < names.length; slot++) { + String name = names[slot]; + if (name != null) { + out[slot] = fn.applyAsInt(name); + } + } + return out; + } + + /** + * Slot-aligned {@code long[]} over placed {@code names}; {@code out[slot] = fn(name)}, 0 else. + */ + public static long[] mapLongValues(String[] names, ToLongFunction fn) { + long[] out = new long[names.length]; + for (int slot = 0; slot < names.length; slot++) { + String name = names[slot]; + if (name != null) { + out[slot] = fn.applyAsLong(name); + } + } + return out; + } + + /** Build-time placement. Returns the slot. */ + public static int put(int[] hashes, String[] names, String name, int h) { + final int mask = hashes.length - 1; + int i = h & mask; + for (int probes = 0; probes <= mask; probes++, i = (i + 1) & mask) { + if (hashes[i] == 0) { + hashes[i] = h; + names[i] = name; + return i; + } + if (hashes[i] == h && eq(names[i], name)) { + return i; // already present + } + } + throw new IllegalStateException("table full"); // impossible at LF <= 0.5 + } + + /** Probe; returns the slot or -1. Raw arrays — no Data, no instance. */ + public static int indexOf(int[] hashes, String[] names, String name, int h) { + final int mask = hashes.length - 1; + int i = h & mask; + for (int probes = 0; probes <= mask; probes++, i = (i + 1) & mask) { + int sh = hashes[i]; + if (sh == 0) { + return -1; + } + if (sh == h && eq(names[i], name)) { + return i; + } + } + return -1; + } + + public static int indexOf(int[] hashes, String[] names, String name) { + return indexOf(hashes, names, name, hash(name)); + } + + /** {@code data[indexOf(...)]}, or {@code null} when {@code key} is absent. */ + public static T lookup(int[] hashes, String[] names, T[] data, String key) { + int slot = indexOf(hashes, names, key); + return slot >= 0 ? data[slot] : null; + } + + /** {@code data[indexOf(...)]}, or {@code defaultValue} when {@code key} is absent. */ + public static T lookupOrDefault( + int[] hashes, String[] names, T[] data, String key, T defaultValue) { + int slot = indexOf(hashes, names, key); + return slot >= 0 ? data[slot] : defaultValue; + } + + /** {@code data[indexOf(...)]}, or 0 when {@code key} is absent. */ + public static int lookup(int[] hashes, String[] names, int[] data, String key) { + int slot = indexOf(hashes, names, key); + return slot >= 0 ? data[slot] : 0; + } + + /** {@code data[indexOf(...)]}, or {@code defaultValue} when {@code key} is absent. */ + public static int lookupOrDefault( + int[] hashes, String[] names, int[] data, String key, int defaultValue) { + int slot = indexOf(hashes, names, key); + return slot >= 0 ? data[slot] : defaultValue; + } + + /** {@code data[indexOf(...)]}, or 0 when {@code key} is absent. */ + public static long lookup(int[] hashes, String[] names, long[] data, String key) { + int slot = indexOf(hashes, names, key); + return slot >= 0 ? data[slot] : 0L; + } + + /** {@code data[indexOf(...)]}, or {@code defaultValue} when {@code key} is absent. */ + public static long lookupOrDefault( + int[] hashes, String[] names, long[] data, String key, long defaultValue) { + int slot = indexOf(hashes, names, key); + return slot >= 0 ? data[slot] : defaultValue; + } + + // `a` is a stored name on an occupied slot (never null); `b` is a non-null query. + private static boolean eq(String a, String b) { + return a == b || a.equals(b); // interned literals hit the == fast path + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/api/KnownTagIdsTest.java b/internal-api/src/test/java/datadog/trace/api/KnownTagIdsTest.java new file mode 100644 index 00000000000..543c0b848ba --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/api/KnownTagIdsTest.java @@ -0,0 +1,151 @@ +package datadog.trace.api; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.bootstrap.instrumentation.api.Tags; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +/** + * Parity test for the keyOf substrate (slice 1): the {@link KnownTagIds} registry + the {@link + * KnownTags.Resolver} it registers. Verifies name ↔ id resolution without any dense store — + * {@code keyOf}/{@code nameOf} depend only on globalSerial + name, not on the (dormant) positional + * layout. + */ +class KnownTagIdsTest { + + /** (name, id) pairs — the full registry. keyOf returns the id verbatim (incl. INTERCEPTED). */ + static Stream knownTags() { + return Stream.of( + Arguments.of(Tags.ERROR, KnownTagIds.ERROR), + Arguments.of(DDTags.PARENT_ID, KnownTagIds.PARENT_ID), + Arguments.of(DDTags.BASE_SERVICE, KnownTagIds.BASE_SERVICE), + Arguments.of(Tags.VERSION, KnownTagIds.VERSION), + Arguments.of(KnownTagIds.ENV, KnownTagIds.ENV_ID), + Arguments.of(DDTags.DJM_ENABLED, KnownTagIds.DJM_ENABLED), + Arguments.of(DDTags.DSM_ENABLED, KnownTagIds.DSM_ENABLED), + Arguments.of(DDTags.TRACER_HOST, KnownTagIds.TRACER_HOST_ID), + Arguments.of(DDTags.DD_INTEGRATION, KnownTagIds.INTEGRATION_ID), + Arguments.of(DDTags.DD_SVC_SRC, KnownTagIds.SVC_SRC_ID), + Arguments.of(Tags.PEER_SERVICE, KnownTagIds.PEER_SERVICE), + Arguments.of(DDTags.PEER_SERVICE_REMAPPED_FROM, KnownTagIds.PEER_SERVICE_REMAPPED_FROM), + Arguments.of(Tags.HTTP_METHOD, KnownTagIds.HTTP_METHOD), + Arguments.of(Tags.HTTP_ROUTE, KnownTagIds.HTTP_ROUTE), + Arguments.of(Tags.HTTP_URL, KnownTagIds.HTTP_URL), + Arguments.of(Tags.PEER_HOSTNAME, KnownTagIds.PEER_HOSTNAME), + Arguments.of(Tags.PEER_HOST_IPV4, KnownTagIds.PEER_HOST_IPV4), + Arguments.of(Tags.PEER_HOST_IPV6, KnownTagIds.PEER_HOST_IPV6), + Arguments.of(Tags.PEER_PORT, KnownTagIds.PEER_PORT), + Arguments.of(Tags.COMPONENT, KnownTagIds.COMPONENT), + Arguments.of(Tags.SPAN_KIND, KnownTagIds.SPAN_KIND), + Arguments.of(DDTags.LANGUAGE_TAG_KEY, KnownTagIds.LANGUAGE), + Arguments.of(Tags.DB_TYPE, KnownTagIds.DB_TYPE), + Arguments.of(Tags.DB_INSTANCE, KnownTagIds.DB_INSTANCE), + Arguments.of(Tags.DB_USER, KnownTagIds.DB_USER), + Arguments.of(Tags.DB_OPERATION, KnownTagIds.DB_OPERATION), + Arguments.of(Tags.DB_POOL_NAME, KnownTagIds.DB_POOL_NAME)); + } + + /** + * The subset flagged INTERCEPTED (sign bit) — must agree with the interceptor's needsIntercept. + */ + static Stream interceptedTags() { + return Stream.of( + Arguments.of(KnownTagIds.ERROR), + Arguments.of(KnownTagIds.PEER_SERVICE), + Arguments.of(KnownTagIds.HTTP_METHOD), + Arguments.of(KnownTagIds.HTTP_URL), + Arguments.of(KnownTagIds.SPAN_KIND)); + } + + @Test + void resolverIsActiveOnceReferenced() { + // referencing any constant triggers KnownTagIds. -> KnownTags.register + assertTrue(KnownTagIds.ERROR != 0L); + assertTrue(KnownTags.isActive()); + assertEquals(KnownTagIds.SLOT_COUNT, KnownTags.slotCount()); + } + + @ParameterizedTest + @MethodSource("knownTags") + void keyOfResolvesNameToId(String name, long id) { + assertEquals(id, KnownTags.keyOf(name), "keyOf(" + name + ")"); + } + + @ParameterizedTest + @MethodSource("knownTags") + void nameOfResolvesIdToName(String name, long id) { + assertEquals(name, KnownTags.nameOf(id), "nameOf(" + name + ")"); + } + + @ParameterizedTest + @MethodSource("knownTags") + void nameHashMatchesEntryHash(String name, long id) { + assertEquals((int) TagMap.Entry._hash(name), KnownTags.nameHash(id), "nameHash(" + name + ")"); + } + + @ParameterizedTest + @MethodSource("interceptedTags") + void interceptedTagsCarryFlag(long id) { + assertTrue(KnownTags.isIntercepted(id), "isIntercepted"); + } + + @Test + void nonInterceptedTagsDoNotCarryFlag() { + Set intercepted = new HashSet<>(); + interceptedTags().forEach(a -> intercepted.add((Long) a.get()[0])); + knownTags() + .forEach( + a -> { + long id = (Long) a.get()[1]; + if (!intercepted.contains(id)) { + assertFalse(KnownTags.isIntercepted(id), "not intercepted: " + a.get()[0]); + } + }); + } + + @Test + void unknownNamesResolveToZero() { + assertEquals(0L, KnownTags.keyOf("definitely.not.a.known.tag")); + assertEquals(0L, KnownTags.keyOf("http.statuscode")); // close-but-not-listed + assertEquals(0L, KnownTags.keyOf("")); + } + + @Test + void unknownIdsResolveToNullName() { + assertNull(KnownTags.nameOf(0L)); + assertNull(KnownTags.nameOf(KnownTags.tagId(9999, "made.up"))); + } + + @Test + void errorIsReservedTheRestAreStored() { + assertTrue(KnownTags.isReserved(KnownTagIds.ERROR), "ERROR reserved"); + assertFalse(KnownTags.isStored(KnownTagIds.ERROR), "ERROR not stored"); + knownTags() + .forEach( + a -> { + long id = (Long) a.get()[1]; + if (id != KnownTagIds.ERROR) { + assertTrue(KnownTags.isStored(id), "stored: " + a.get()[0]); + assertFalse(KnownTags.isReserved(id), "not reserved: " + a.get()[0]); + } + }); + } + + @Test + void globalSerialsAreUnique() { + List serials = new ArrayList<>(); + knownTags().forEach(a -> serials.add((long) KnownTags.globalSerial((Long) a.get()[1]))); + assertEquals(serials.size(), new HashSet<>(serials).size(), "globalSerials must be unique"); + } +} diff --git a/internal-api/src/test/java/datadog/trace/api/OptimizedTagMapDenseForkedTest.java b/internal-api/src/test/java/datadog/trace/api/OptimizedTagMapDenseForkedTest.java new file mode 100644 index 00000000000..cbf12dac893 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/api/OptimizedTagMapDenseForkedTest.java @@ -0,0 +1,273 @@ +package datadog.trace.api; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.bootstrap.instrumentation.api.Tags; +import java.util.HashMap; +import java.util.Map; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * Exercises the dense known-tag store with a LIVE resolver. Registration ({@link KnownTags}) is a + * global static with no un-register, so this lives in a {@code ForkedTest} (isolated JVM) to keep + * dense routing from leaking into the bucket-only tests in the shared JVM. The dense store is + * dormant in production (no resolver) — this is where it actually executes. + * + *

Stored tags (globalSerial ≥ {@code FIRST_STORED_SERIAL}) route to the dense store; reserved + * tags (e.g. {@code error}) and arbitrary tags stay in the hash buckets. Behavior must be + * observationally identical to the bucket store. + */ +class OptimizedTagMapDenseForkedTest { + + // stored (dense-routed) tags + static final String BASE_SERVICE = DDTags.BASE_SERVICE; + static final String COMPONENT = Tags.COMPONENT; + static final String DB_TYPE = Tags.DB_TYPE; + static final String HTTP_METHOD = Tags.HTTP_METHOD; // stored + intercepted + static final String DB_INSTANCE = Tags.DB_INSTANCE; + // arbitrary (bucket-routed) tags + static final String CUSTOM_A = "custom.tag.a"; + static final String CUSTOM_B = "custom.tag.b"; + + @BeforeAll + static void registerResolver() { + // referencing any KnownTagIds constant triggers its -> KnownTags.register + assertTrue(KnownTagIds.BASE_SERVICE != 0L); + assertTrue(KnownTags.isActive(), "resolver must be live for the dense store to engage"); + assertTrue(KnownTags.isStored(KnownTags.keyOf(BASE_SERVICE)), "base_service routes dense"); + assertFalse(KnownTags.isStored(KnownTags.keyOf(CUSTOM_A)), "custom tag stays in buckets"); + assertFalse(KnownTags.isStored(KnownTags.keyOf(Tags.ERROR)), "error is reserved, not stored"); + } + + private static OptimizedTagMap map() { + return (OptimizedTagMap) TagMap.create(); + } + + @Test + void knownTagRoundTripsThroughDenseStore() { + OptimizedTagMap map = map(); + map.set(BASE_SERVICE, "billing"); + map.set(COMPONENT, "spring-web"); + + assertEquals("billing", map.getObject(BASE_SERVICE)); + assertEquals("spring-web", map.getString(COMPONENT)); + assertEquals("billing", map.getEntry(BASE_SERVICE).objectValue()); + assertTrue(map.containsKey(BASE_SERVICE)); + assertEquals(2, map.size()); + map.checkIntegrity(); + } + + @Test + void typedKnownValuesRoundTrip() { + OptimizedTagMap map = map(); + map.set(DB_TYPE, "postgresql"); + map.set(HTTP_METHOD, "GET"); + map.set(Tags.PEER_PORT, 5432); + + assertEquals("postgresql", map.getString(DB_TYPE)); + assertEquals("GET", map.getString(HTTP_METHOD)); + assertEquals(5432, map.getInt(Tags.PEER_PORT)); + assertEquals(3, map.size()); + map.checkIntegrity(); + } + + @Test + void knownAndUnknownCoexist() { + OptimizedTagMap map = map(); + map.set(BASE_SERVICE, "billing"); // dense + map.set(CUSTOM_A, "alpha"); // bucket + map.set(DB_TYPE, "h2"); // dense + map.set(CUSTOM_B, "beta"); // bucket + + assertEquals("billing", map.getObject(BASE_SERVICE)); + assertEquals("alpha", map.getObject(CUSTOM_A)); + assertEquals("h2", map.getObject(DB_TYPE)); + assertEquals("beta", map.getObject(CUSTOM_B)); + assertEquals(4, map.size()); + assertFalse(map.isEmpty()); + map.checkIntegrity(); + + Map collected = new HashMap<>(); + map.fillMap(collected); + assertEquals(4, collected.size()); + assertEquals("billing", collected.get(BASE_SERVICE)); + assertEquals("alpha", collected.get(CUSTOM_A)); + assertEquals("h2", collected.get(DB_TYPE)); + assertEquals("beta", collected.get(CUSTOM_B)); + } + + @Test + void overwriteKnownReplacesInPlace() { + OptimizedTagMap map = map(); + map.set(COMPONENT, "first"); + assertEquals("first", map.getObject(COMPONENT)); + map.set(COMPONENT, "second"); + assertEquals("second", map.getObject(COMPONENT)); + assertEquals(1, map.size()); // overwrite, not append + map.checkIntegrity(); + } + + @Test + void removeKnownClearsIt() { + OptimizedTagMap map = map(); + map.set(BASE_SERVICE, "billing"); + map.set(DB_TYPE, "h2"); + map.set(CUSTOM_A, "alpha"); + assertEquals(3, map.size()); + + TagMap.Entry removed = map.getAndRemove(BASE_SERVICE); + assertEquals("billing", removed.objectValue()); + assertNull(map.getObject(BASE_SERVICE)); + assertEquals("h2", map.getObject(DB_TYPE)); // sibling dense entry intact + assertEquals("alpha", map.getObject(CUSTOM_A)); + assertEquals(2, map.size()); + map.checkIntegrity(); + } + + @Test + void forEachAndIteratorEmitDenseAndBucketEntries() { + OptimizedTagMap map = map(); + map.set(BASE_SERVICE, "billing"); + map.set(COMPONENT, "web"); + map.set(CUSTOM_A, "alpha"); + + Map viaForEach = new HashMap<>(); + map.forEach(reader -> viaForEach.put(reader.tag(), reader.objectValue())); + assertEquals(3, viaForEach.size()); + assertEquals("billing", viaForEach.get(BASE_SERVICE)); + assertEquals("web", viaForEach.get(COMPONENT)); + assertEquals("alpha", viaForEach.get(CUSTOM_A)); + + Map viaIterator = new HashMap<>(); + for (TagMap.EntryReader reader : map) { + viaIterator.put(reader.tag(), reader.objectValue()); + } + assertEquals(viaForEach, viaIterator); + } + + @Test + void copyPreservesDenseStore() { + OptimizedTagMap map = map(); + map.set(BASE_SERVICE, "billing"); + map.set(CUSTOM_A, "alpha"); + + OptimizedTagMap copy = (OptimizedTagMap) map.copy(); + assertEquals("billing", copy.getObject(BASE_SERVICE)); + assertEquals("alpha", copy.getObject(CUSTOM_A)); + assertEquals(2, copy.size()); + + // independence: mutating the copy doesn't touch the original's dense store + copy.set(BASE_SERVICE, "shipping"); + assertEquals("shipping", copy.getObject(BASE_SERVICE)); + assertEquals("billing", map.getObject(BASE_SERVICE)); + copy.checkIntegrity(); + map.checkIntegrity(); + } + + @Test + void clearEmptiesDenseStore() { + OptimizedTagMap map = map(); + map.set(BASE_SERVICE, "billing"); + map.set(CUSTOM_A, "alpha"); + map.clear(); + assertEquals(0, map.size()); + assertTrue(map.isEmpty()); + assertNull(map.getObject(BASE_SERVICE)); + map.checkIntegrity(); + } + + @Test + void putAllMergesDenseStore() { + OptimizedTagMap src = map(); + src.set(BASE_SERVICE, "billing"); + src.set(DB_TYPE, "h2"); + src.set(CUSTOM_A, "alpha"); + + OptimizedTagMap dst = map(); + dst.set(COMPONENT, "web"); // dense, distinct + dst.set(BASE_SERVICE, "old"); // dense, clobbered by src + dst.putAll((TagMap) src); + + assertEquals("billing", dst.getObject(BASE_SERVICE)); // src clobbers + assertEquals("h2", dst.getObject(DB_TYPE)); + assertEquals("web", dst.getObject(COMPONENT)); + assertEquals("alpha", dst.getObject(CUSTOM_A)); + assertEquals(4, dst.size()); + dst.checkIntegrity(); + } + + // ---- read-through union (dense parent + dense child) ---- + + private static OptimizedTagMap frozenParent() { + OptimizedTagMap parent = map(); + parent.set(BASE_SERVICE, "billing"); // dense + parent.set(COMPONENT, "web"); // dense + parent.set(CUSTOM_A, "alpha"); // bucket + parent.freeze(); + return parent; + } + + @Test + void childReadsThroughToParentDense() { + OptimizedTagMap child = map(); + child.withParent(frozenParent()); + child.set(DB_TYPE, "h2"); // child-only dense + child.set(CUSTOM_B, "beta"); // child-only bucket + + // inherited from parent + assertEquals("billing", child.getObject(BASE_SERVICE)); + assertEquals("web", child.getObject(COMPONENT)); + assertEquals("alpha", child.getObject(CUSTOM_A)); + // own + assertEquals("h2", child.getObject(DB_TYPE)); + assertEquals("beta", child.getObject(CUSTOM_B)); + // union size: 3 parent + 2 child + assertEquals(5, child.size()); + assertFalse(child.isEmpty()); + + Map union = new HashMap<>(); + child.forEach(reader -> union.put(reader.tag(), reader.objectValue())); + assertEquals(5, union.size()); + assertEquals("billing", union.get(BASE_SERVICE)); + assertEquals("h2", union.get(DB_TYPE)); + child.checkIntegrity(); + } + + @Test + void childDenseShadowsParentDense() { + OptimizedTagMap child = map(); + child.withParent(frozenParent()); + child.set(BASE_SERVICE, "shipping"); // shadows parent's dense base_service + + assertEquals("shipping", child.getObject(BASE_SERVICE)); // local wins + assertEquals("web", child.getObject(COMPONENT)); // still inherited + assertEquals(3, child.size()); // base_service counted once (shadowed, not doubled) + + Map union = new HashMap<>(); + child.forEach(reader -> union.put(reader.tag(), reader.objectValue())); + assertEquals(3, union.size()); + assertEquals("shipping", union.get(BASE_SERVICE)); // shadow value, parent suppressed + } + + @Test + void removingParentDenseKeyTombstonesIt() { + OptimizedTagMap child = map(); + child.withParent(frozenParent()); + + TagMap.Entry removed = child.getAndRemove(BASE_SERVICE); // parent-only dense key + assertEquals("billing", removed.objectValue()); // prior visible value was the parent's + assertNull(child.getObject(BASE_SERVICE)); // tombstoned: no read-through + assertEquals("web", child.getObject(COMPONENT)); // sibling still inherited + assertEquals(2, child.size()); // 3 parent - 1 tombstoned + + Map union = new HashMap<>(); + child.forEach(reader -> union.put(reader.tag(), reader.objectValue())); + assertEquals(2, union.size()); + assertFalse(union.containsKey(BASE_SERVICE)); + child.checkIntegrity(); + } +} diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapDenseFuzzForkedTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapDenseFuzzForkedTest.java new file mode 100644 index 00000000000..d5eaa762c50 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/api/TagMapDenseFuzzForkedTest.java @@ -0,0 +1,201 @@ +package datadog.trace.api; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.api.TagMapFuzzTest.MapAction; +import datadog.trace.api.TagMapFuzzTest.TestCase; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; +import java.util.function.Supplier; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * Fuzz test for the dense store under a LIVE resolver, across three key regimes. Reuses {@link + * TagMapFuzzTest}'s oracle machinery ({@code test(TestCase)} replays a random action sequence + * against a {@code HashMap}, verifying each step + {@code checkIntegrity}). + * + *

Uses a synthetic prefix resolver ({@code known-N} -> stored / dense, anything else -> bucket) + * rather than the real {@link KnownTagIds}: it gives an UNBOUNDED known key space, so the dense + * array actually grows past its initial capacity and the linear scan gets long, and it lets each + * test pin the known/custom ratio. The three regimes exercise paths the mixed run alone would miss: + * + *

    + *
  • known-only — the all-dense map (dense growth, dense-only putAll/copy/clear/iterate + * with no bucket phase, knownCount-only size). + *
  • custom-only — confirms the dense branches stay inert when nothing resolves, even + * with a resolver registered. + *
  • mixed — both regions and their interaction. + *
+ * + *

Forked (isolated JVM) because resolver registration is a global static with no un-register. + */ +class TagMapDenseFuzzForkedTest { + static final int SINGLE_MAP_CASES = 1500; + static final int MERGE_CASES = 400; + static final int MAX_ACTIONS = 40; + static final int MIN_ACTIONS = 8; + + // unbounded synthetic key spaces — large enough to grow the dense array past cap-8 several times + static final int KNOWN_SPACE = 48; + static final int CUSTOM_SPACE = 48; + + enum Regime { + KNOWN_ONLY, + CUSTOM_ONLY, + MIXED + } + + /** + * Synthetic resolver: {@code known-N} -> stored id (serial = FIRST_STORED_SERIAL + N); else 0. + */ + static final KnownTags.Resolver FUZZ_RESOLVER = + new KnownTags.Resolver() { + @Override + public long keyOf(String name) { + if (name.startsWith("known-")) { + int n = Integer.parseInt(name.substring("known-".length())); + return KnownTags.tagId(KnownTags.FIRST_STORED_SERIAL + n, name); + } + return 0L; + } + + @Override + public String nameOf(long tagId) { + int serial = KnownTags.globalSerial(tagId); + return serial >= KnownTags.FIRST_STORED_SERIAL + ? "known-" + (serial - KnownTags.FIRST_STORED_SERIAL) + : null; + } + + @Override + public int slotCount() { + return 0; // positional unused + } + }; + + @BeforeAll + static void registerResolver() { + KnownTags.register(FUZZ_RESOLVER); + assertTrue(KnownTags.isActive(), "resolver must be live"); + assertTrue(KnownTags.isStored(KnownTags.keyOf("known-0")), "known- routes dense"); + assertFalse(KnownTags.isStored(KnownTags.keyOf("custom-0")), "custom- stays in buckets"); + // round-trip the synthetic encoding + long id = KnownTags.keyOf("known-7"); + assertTrue("known-7".equals(KnownTags.nameOf(id)), "name<->id round-trips"); + } + + @Test + void knownOnlyFuzz() { + runRegime(Regime.KNOWN_ONLY); + } + + @Test + void customOnlyFuzz() { + runRegime(Regime.CUSTOM_ONLY); + } + + @Test + void mixedFuzz() { + runRegime(Regime.MIXED); + } + + private static void runRegime(Regime regime) { + for (int i = 0; i < SINGLE_MAP_CASES; ++i) { + TagMapFuzzTest.test(generateTest(regime)); + } + for (int i = 0; i < MERGE_CASES; ++i) { + OptimizedTagMap mapA = TagMapFuzzTest.test(generateTest(regime)); + OptimizedTagMap mapB = TagMapFuzzTest.test(generateTest(regime)); + + HashMap hashA = new HashMap<>(mapA); + HashMap hashB = new HashMap<>(mapB); + + mapA.putAll(mapB); + hashA.putAll(hashB); + + TagMapFuzzTest.assertMapEquals(hashA, mapA); + } + } + + // --- action generation (mirrors TagMapFuzzTest.randomAction, regime-driven key pool) --- + + private static TestCase generateTest(Regime regime) { + ThreadLocalRandom r = ThreadLocalRandom.current(); + int numActions = r.nextInt(MAX_ACTIONS - MIN_ACTIONS) + MIN_ACTIONS; + List actions = new ArrayList<>(numActions); + for (int i = 0; i < numActions; ++i) { + actions.add(randomAction(regime)); + } + return new TestCase(actions); + } + + private static MapAction randomAction(Regime regime) { + switch (randomChoice(0.02, 0.1, 0.2)) { + case 0: + return TagMapFuzzTest.clear(); + case 1: + return choose( + () -> TagMapFuzzTest.putAll(randomKeysAndValues(regime)), + () -> TagMapFuzzTest.putAllTagMap(randomKeysAndValues(regime)), + () -> TagMapFuzzTest.putAllLedger(randomKeysAndValues(regime))); + case 2: + return choose( + () -> TagMapFuzzTest.remove(randomKey(regime)), + () -> TagMapFuzzTest.removeLight(randomKey(regime)), + () -> TagMapFuzzTest.getAndRemove(randomKey(regime))); + default: + return choose( + () -> TagMapFuzzTest.put(randomKey(regime), randomValue()), + () -> TagMapFuzzTest.set(randomKey(regime), randomValue()), + () -> TagMapFuzzTest.getAndSet(randomKey(regime), randomValue())); + } + } + + private static String randomKey(Regime regime) { + ThreadLocalRandom r = ThreadLocalRandom.current(); + boolean known; + switch (regime) { + case KNOWN_ONLY: + known = true; + break; + case CUSTOM_ONLY: + known = false; + break; + default: + known = r.nextBoolean(); + } + return known ? "known-" + r.nextInt(KNOWN_SPACE) : "custom-" + r.nextInt(CUSTOM_SPACE); + } + + private static String randomValue() { + return "values-" + ThreadLocalRandom.current().nextInt(); + } + + private static String[] randomKeysAndValues(Regime regime) { + int numEntries = ThreadLocalRandom.current().nextInt(KNOWN_SPACE + CUSTOM_SPACE); + String[] keysAndValues = new String[numEntries << 1]; + for (int i = 0; i < keysAndValues.length; i += 2) { + keysAndValues[i] = randomKey(regime); + keysAndValues[i + 1] = randomValue(); + } + return keysAndValues; + } + + private static int randomChoice(double... proportions) { + double selector = ThreadLocalRandom.current().nextDouble(); + for (int i = 0; i < proportions.length; ++i) { + if (selector < proportions[i]) return i; + selector -= proportions[i]; + } + return proportions.length; + } + + @SafeVarargs + private static MapAction choose(Supplier... choices) { + return choices[ThreadLocalRandom.current().nextInt(choices.length)].get(); + } +} diff --git a/internal-api/src/test/java/datadog/trace/api/TagMapReadThroughTest.java b/internal-api/src/test/java/datadog/trace/api/TagMapReadThroughTest.java new file mode 100644 index 00000000000..23de03cdc86 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/api/TagMapReadThroughTest.java @@ -0,0 +1,339 @@ +package datadog.trace.api; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Set; +import org.junit.jupiter.api.Test; + +/** + * Read-through support, slice 1 (read path): a child {@link OptimizedTagMap} with a frozen parent + * reads through to the parent on a local miss, while local entries shadow the parent (local-wins). + * Removal/tombstones and bulk (iteration/serialize) union come in later slices. + */ +class TagMapReadThroughTest { + + private static OptimizedTagMap frozenParent() { + OptimizedTagMap parent = (OptimizedTagMap) TagMap.create(); + parent.set("a", "parent-a"); + parent.set("b", "parent-b"); + parent.freeze(); + return parent; + } + + @Test + void readsThroughToParentOnMiss() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("c", "child-c"); + child.withParent(frozenParent()); + + assertEquals("parent-a", child.getString("a")); // miss locally -> read through + assertEquals("parent-b", child.getString("b")); + assertEquals("child-c", child.getString("c")); // local + assertNull(child.getString("missing")); + assertTrue(child.containsKey("a")); + assertFalse(child.containsKey("missing")); + } + + @Test + void localEntryShadowsParent() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // same key as parent + child.withParent(frozenParent()); + + assertEquals("child-b", child.getString("b")); // local wins + assertEquals("parent-a", child.getString("a")); // parent still visible + } + + @Test + void estimateSizeIsUpperBound() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows parent "b" + child.set("c", "child-c"); + child.withParent(frozenParent()); + + // true union = {a, b, c} = 3; estimate over-counts the shadowed "b": local 2 + parent 2 = 4 + assertEquals(4, child.estimateSize()); + assertTrue(child.estimateSize() >= 3, "estimateSize must be an upper bound on the true size"); + } + + @Test + void emptinessSemantics() { + OptimizedTagMap emptyOverEmpty = (OptimizedTagMap) TagMap.create(); + emptyOverEmpty.withParent((OptimizedTagMap) TagMap.create().freeze()); + assertTrue(emptyOverEmpty.isEmpty()); + assertTrue(emptyOverEmpty.isDefinitelyEmpty()); + + OptimizedTagMap emptyOverNonEmpty = (OptimizedTagMap) TagMap.create(); + emptyOverNonEmpty.withParent(frozenParent()); + assertFalse(emptyOverNonEmpty.isEmpty(), "a non-empty parent makes the map non-empty"); + assertFalse(emptyOverNonEmpty.isDefinitelyEmpty()); + + assertTrue(((OptimizedTagMap) TagMap.create()).isDefinitelyEmpty()); + } + + @Test + void parentMustBeFrozen() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + OptimizedTagMap mutableParent = (OptimizedTagMap) TagMap.create(); + assertThrows(IllegalStateException.class, () -> child.withParent(mutableParent)); + } + + // --- slice 2: removal / tombstones --- + + @Test + void removingParentKeyHidesItFromChildButNotFromParent() { + OptimizedTagMap parent = frozenParent(); + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.withParent(parent); + + assertEquals("parent-a", child.getString("a")); // visible before removal + child.remove("a"); + + assertNull(child.getString("a")); // tombstoned: no longer reads through + assertFalse(child.containsKey("a")); + assertEquals("parent-b", child.getString("b")); // other parent keys unaffected + assertEquals("parent-a", parent.getString("a")); // frozen parent untouched + } + + @Test + void removeReturnsPriorVisibleValueViaParent() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.withParent(frozenParent()); + + // Map.remove contract: the key was present (via read-through), so removal reports it. + assertTrue(child.remove("a"), "removing a parent-exposed key should report it was present"); + assertNull(child.getString("a")); + } + + @Test + void reSettingARemovedKeyRestoresVisibility() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.withParent(frozenParent()); + + child.remove("a"); + assertNull(child.getString("a")); + + child.set("a", "child-a"); // re-set clears the tombstone + assertEquals("child-a", child.getString("a")); + } + + @Test + void removingAKeyThatIsBothLocalAndParentHidesBoth() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows parent "b" + child.withParent(frozenParent()); + + assertEquals("child-b", child.getString("b")); + child.remove("b"); + + assertNull(child.getString("b"), "removal must hide both the local entry and the parent's"); + assertEquals("parent-b", frozenParent().getString("b")); // parent still has it + } + + // --- slice 3a: bulk forEach union + exact size/isEmpty --- + + private static Map collect(OptimizedTagMap map) { + Map out = new HashMap<>(); + map.forEach(e -> out.put(e.tag(), e.objectValue())); + return out; + } + + @Test + void forEachEmitsDedupedUnionLocalWins() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows parent "b" + child.set("c", "child-c"); + child.withParent(frozenParent()); // parent {a, b} + + Map u = collect(child); + assertEquals(3, u.size(), "union {a, b, c} with b deduped"); + assertEquals("parent-a", u.get("a")); // read-through + assertEquals("child-b", u.get("b")); // local wins (no duplicate emit) + assertEquals("child-c", u.get("c")); + } + + @Test + void forEachSkipsTombstonedParentKeys() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("c", "child-c"); + child.withParent(frozenParent()); + child.remove("a"); // tombstone parent's "a" + + Map u = collect(child); + assertEquals(2, u.size()); + assertFalse(u.containsKey("a")); + assertEquals("parent-b", u.get("b")); + assertEquals("child-c", u.get("c")); + } + + @Test + void biConsumerForEachAlsoEmitsUnion() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("c", "child-c"); + child.withParent(frozenParent()); + + Map out = new HashMap<>(); + child.forEach(out, (m, e) -> m.put(e.tag(), e.objectValue())); // non-capturing: alloc-free path + assertEquals(3, out.size()); + assertEquals("parent-a", out.get("a")); + assertEquals("child-c", out.get("c")); + } + + @Test + void sizeIsExactUnion() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows + child.set("c", "child-c"); + child.withParent(frozenParent()); + assertEquals(3, child.size()); // {a, b, c} — b deduped, not 4 + + child.remove("a"); + assertEquals(2, child.size()); // {b, c} + } + + @Test + void isEmptyExactWhenAllParentKeysTombstonedAndNoLocal() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.withParent(frozenParent()); // parent {a, b} + assertFalse(child.isEmpty()); + + child.remove("a"); + child.remove("b"); + assertTrue(child.isEmpty(), "all parent keys tombstoned and no local entries -> empty"); + assertEquals(0, child.size()); + } + + // --- slice 3b: pull-based iterators / collection views --- + + @Test + void iteratorEmitsDedupedUnion() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows parent "b" + child.set("c", "child-c"); + child.withParent(frozenParent()); + + Map u = new HashMap<>(); + Iterator it = child.iterator(); + while (it.hasNext()) { + TagMap.EntryReader e = it.next(); + u.put(e.tag(), e.objectValue()); + } + assertEquals(3, u.size()); + assertEquals("parent-a", u.get("a")); + assertEquals("child-b", u.get("b")); // local wins, emitted once + assertEquals("child-c", u.get("c")); + } + + @Test + void keySetReflectsUnionAndTombstones() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("c", "child-c"); + child.withParent(frozenParent()); + + Set keys = child.keySet(); + assertEquals(3, keys.size()); // a, b, c + assertTrue(keys.contains("a")); + assertTrue(keys.contains("c")); + + child.remove("a"); + assertEquals(2, child.keySet().size()); + assertFalse(child.keySet().contains("a")); + } + + @Test + void valuesAndEntrySetReflectUnion() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows parent "b" + child.withParent(frozenParent()); + + assertEquals(2, child.entrySet().size()); // {a, b} — b deduped + assertTrue(child.values().contains("child-b")); // local-won value + assertTrue(child.values().contains("parent-a")); + assertFalse(child.values().contains("parent-b"), "shadowed parent value must not appear"); + } + + // --- slice 4: behavior-identical to a copy-down / flat map --- + + @Test + void copyIsObservationallyIdentical() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); // shadows parent "b" + child.set("c", "child-c"); + child.withParent(frozenParent()); // {a, b} + + OptimizedTagMap copy = (OptimizedTagMap) child.copy(); + assertEquals(child.size(), copy.size()); + assertEquals("parent-a", copy.getString("a")); // copy still reads through + assertEquals("child-b", copy.getString("b")); + assertEquals("child-c", copy.getString("c")); + assertEquals(collect(child), collect(copy)); // same union + } + + @Test + void copyIsIndependentlyMutable() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("c", "child-c"); + child.withParent(frozenParent()); + + OptimizedTagMap copy = (OptimizedTagMap) child.copy(); + copy.set("c", "copy-c"); // mutate copy's local + copy.remove("a"); // tombstone on copy only + + assertEquals("child-c", child.getString("c"), "original unaffected by copy mutation"); + assertEquals("parent-a", child.getString("a"), "original still reads through a"); + assertEquals("copy-c", copy.getString("c")); + assertNull(copy.getString("a")); + } + + @Test + void copyPreservesTombstones() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.withParent(frozenParent()); + child.remove("a"); // tombstone "a" + + OptimizedTagMap copy = (OptimizedTagMap) child.copy(); + assertNull(copy.getString("a"), "tombstone must carry into the copy"); + assertEquals("parent-b", copy.getString("b")); + } + + /** The contract that lets the consumer flip mergedTracerTags to a parent. */ + @Test + void readThroughMatchesAnEquivalentFlatMap() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("b", "child-b"); + child.set("c", "child-c"); + child.withParent(frozenParent()); + + OptimizedTagMap flat = (OptimizedTagMap) TagMap.create(); + flat.set("a", "parent-a"); + flat.set("b", "child-b"); + flat.set("c", "child-c"); + + assertEquals(flat.size(), child.size()); + assertEquals(collect(flat), collect(child)); + assertEquals(flat.keySet(), child.keySet()); + for (String k : new String[] {"a", "b", "c", "missing"}) { + assertEquals(flat.getString(k), child.getString(k), "mismatch for key " + k); + } + } + + @Test + void immutableCopyOfReadThroughIsFrozenAndStillReadsThrough() { + OptimizedTagMap child = (OptimizedTagMap) TagMap.create(); + child.set("c", "child-c"); + child.withParent(frozenParent()); + + OptimizedTagMap frozen = (OptimizedTagMap) child.immutableCopy(); + assertTrue(frozen.isFrozen()); + assertEquals("parent-a", frozen.getString("a")); // union preserved + assertEquals("child-c", frozen.getString("c")); + assertThrows(IllegalStateException.class, () -> frozen.set("x", "y")); // frozen blocks writes + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/StringIndexFootprintTest.java b/internal-api/src/test/java/datadog/trace/util/StringIndexFootprintTest.java new file mode 100644 index 00000000000..9a3b1db2571 --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/StringIndexFootprintTest.java @@ -0,0 +1,88 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeSet; +import org.junit.jupiter.api.Test; +import org.openjdk.jol.info.GraphLayout; + +/** + * Retained-footprint comparison (JOL) for {@link StringIndex} vs the JDK set representations, over + * a fixed read-only string set. Footprint is deterministic, so this is safe to run under load + * (unlike the throughput benchmarks). + * + *

All structures hold the same String instances, so the shared strings cancel out and the + * differences reflect structural overhead. We report total retained bytes and the overhead above a + * plain {@code String[]} (which is just the strings + a reference array). {@code Set.copyOf} yields + * the JDK's compact {@code SetN} only on Java 10+ (it falls back to {@code HashSet} pre-10), so the + * copyOf row is only meaningful on a 10+ test JVM. + * + *

The one robust cross-JVM invariant we assert is that {@code StringIndex} is lighter than + * {@code HashSet} (no per-element {@code Node} objects). The {@code StringIndex} vs {@code SetN} + * comparison is left as reported data rather than an assertion: {@code StringIndex} caches an + * {@code int[]} of hashes that {@code SetN} does not, so which one wins on bytes is genuinely worth + * measuring. + * + *

Measured retained bytes (Java 17, JOL estimate mode — relative ordering reliable, exact bytes + * approximate): + * + *

{@code
+ * n      array   hashSet  treeSet   copyOf  stringIndex
+ * 8        496      864      848      552      760
+ * 32      1936     3168     3152     2088     2872
+ * 128     7696    12384    12368     8232    11320
+ * }
+ * + * Finding: {@code StringIndex} is ~9% lighter than {@code HashSet}/{@code TreeSet} (no per-element + * {@code Node} objects), but {@code Set.copyOf} ({@code SetN}) is the most compact by a wide margin + * (~27% under {@code StringIndex} at n=128) — {@code StringIndex} pays for its cached {@code int[]} + * hashes and 2x-oversized {@code String[]}. So {@code StringIndex}'s edge over {@code SetN} is + * speed and the {@code indexOf}->parallel-array capability, not footprint. + */ +class StringIndexFootprintTest { + + static String[] elements(int n) { + String[] a = new String[n]; + for (int i = 0; i < n; ++i) { + a[i] = "element-key-" + i; + } + return a; + } + + static long bytes(Object root) { + return GraphLayout.parseInstance(root).totalSize(); + } + + @Test + void footprintComparison() { + System.out.printf( + "%-6s %12s %12s %12s %12s %12s%n", + "n", "array", "hashSet", "treeSet", "copyOf", "stringIndex"); + System.out.printf( + "%-6s %12s %12s %12s %12s %12s (overhead above array)%n", "", "", "", "", "", ""); + + for (int n : new int[] {8, 32, 128}) { + String[] el = elements(n); + + long array = bytes((Object) el); // baseline: strings + reference array + long hashSet = bytes(new HashSet<>(Arrays.asList(el))); + long treeSet = bytes(new TreeSet<>(Arrays.asList(el))); + Set copy = CollectionUtils.tryMakeImmutableSet(Arrays.asList(el)); + long copyOf = bytes(copy); + long stringIndex = bytes(StringIndex.of(el)); + + System.out.printf( + "%-6d %12d %12d %12d %12d %12d%n", n, array, hashSet, treeSet, copyOf, stringIndex); + System.out.printf( + "%-6s %12s %12d %12d %12d %12d%n", + "", "", hashSet - array, treeSet - array, copyOf - array, stringIndex - array); + + // Robust cross-JVM invariant: no per-element Node objects -> lighter than HashSet. + assertTrue( + stringIndex < hashSet, "StringIndex should retain fewer bytes than HashSet at n=" + n); + } + } +} diff --git a/internal-api/src/test/java/datadog/trace/util/StringIndexTest.java b/internal-api/src/test/java/datadog/trace/util/StringIndexTest.java new file mode 100644 index 00000000000..3fcdb729d7d --- /dev/null +++ b/internal-api/src/test/java/datadog/trace/util/StringIndexTest.java @@ -0,0 +1,171 @@ +package datadog.trace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import datadog.trace.util.StringIndex.Data; +import datadog.trace.util.StringIndex.Support; +import org.junit.jupiter.api.Test; + +class StringIndexTest { + + @Test + void hash_spread_and_zeroSentinel() { + // "".hashCode() == 0 -> remapped to the non-zero sentinel so 0 can mean "empty slot" + assertEquals(0xDD06, Support.hash("")); + + int raw = "foo".hashCode(); + assertEquals(raw ^ (raw >>> 16), Support.hash("foo")); + assertNotEquals(0, Support.hash("foo")); + } + + @Test + void tableSizeFor_isPow2_andOversized() { + assertEquals(2, Support.tableSizeFor(0)); + assertEquals(4, Support.tableSizeFor(1)); + assertEquals(8, Support.tableSizeFor(3)); + assertEquals(16, Support.tableSizeFor(4)); + } + + @Test + void instance_contains_internedAndCopy_andMiss() { + StringIndex set = StringIndex.of("foo", "bar", "baz"); + + assertEquals(8, set.slots()); // 3 names -> tableSizeFor(3) == 8 + + assertTrue(set.contains("foo")); // interned literal -> == fast path in eq + assertTrue(set.contains(new String("bar"))); // non-interned -> .equals path + assertFalse(set.contains("nope")); + + assertTrue(set.indexOf("baz") >= 0); + assertEquals(-1, set.indexOf("nope")); + } + + @Test + void support_create_then_indexOf() { + Data d = Support.create("x", "y"); + + int slot = Support.indexOf(d.hashes, d.names, "x"); // 3-arg overload computes the hash + assertTrue(slot >= 0); + assertEquals("x", d.names[slot]); + + assertEquals(-1, Support.indexOf(d.hashes, d.names, "q")); + } + + /** Controlled hashes force collision, linear-probe wraparound, and the already-present path. */ + @Test + void put_and_indexOf_collisionAndWraparound() { + int[] hashes = new int[4]; // mask = 3 + String[] names = new String[4]; + + assertEquals(3, Support.put(hashes, names, "a", 7)); // 7 & 3 == 3 + assertEquals(0, Support.put(hashes, names, "b", 7)); // collides at 3, probes (3+1)&3 == 0 + assertEquals(3, Support.put(hashes, names, "a", 7)); // already present -> existing slot + + assertEquals(3, Support.indexOf(hashes, names, "a", 7)); // direct hit + assertEquals(0, Support.indexOf(hashes, names, "b", 7)); // hit after collision + wraparound + assertEquals( + -1, Support.indexOf(hashes, names, "c", 7)); // miss after probing 3 -> 0 -> 1(empty) + assertEquals(-1, Support.indexOf(hashes, names, "z", 6)); // 6 & 3 == 2, empty -> immediate miss + } + + @Test + void put_throwsWhenFull() { + int[] hashes = new int[2]; // mask = 1 + String[] names = new String[2]; + + Support.put(hashes, names, "a", 4); // 4 & 1 == 0 + Support.put(hashes, names, "b", 5); // 5 & 1 == 1 + + // both slots occupied, no match -> probe exhausts -> throw + assertThrows(IllegalStateException.class, () -> Support.put(hashes, names, "c", 6)); + } + + /** The documented usage: build a StringIndex, attach a parallel payload indexed by slot. */ + @Test + void parallelPayloadBySlot() { + String[] names = {"a", "b", "c"}; + Data d = Support.create(names); + + long[] ids = new long[d.names.length]; + for (int j = 0; j < names.length; j++) { + ids[Support.indexOf(d.hashes, d.names, names[j])] = j + 1L; + } + + assertEquals(1L, ids[Support.indexOf(d.hashes, d.names, "a")]); + assertEquals(2L, ids[Support.indexOf(d.hashes, d.names, "b")]); + assertEquals(3L, ids[Support.indexOf(d.hashes, d.names, "c")]); + } + + @Test + void mapIntValues_slotAligned_andLookup() { + StringIndex idx = StringIndex.of("a", "b", "c"); + // 1-based ids; 0 stays the empty-slot / not-found sentinel. + int[] ids = idx.mapIntValues(s -> s.charAt(0) - 'a' + 1); + assertEquals(idx.slots(), ids.length); // sized to the table, not the name count + + assertEquals(1, idx.lookup(ids, "a")); + assertEquals(2, idx.lookup(ids, "b")); + assertEquals(3, idx.lookup(ids, "c")); + assertEquals(0, idx.lookup(ids, "z")); // miss -> 0 + assertEquals(-1, idx.lookupOrDefault(ids, "z", -1)); // miss -> supplied default + } + + @Test + void mapLongValues_slotAligned_andLookup() { + Data d = Support.create("a", "b", "c"); + long[] vals = Support.mapLongValues(d.names, s -> s.charAt(0) - 'a' + 1L); + + assertEquals(1L, Support.lookup(d.hashes, d.names, vals, "a")); + assertEquals(3L, Support.lookup(d.hashes, d.names, vals, "c")); + assertEquals(0L, Support.lookup(d.hashes, d.names, vals, "z")); // miss -> 0 + assertEquals(-1L, Support.lookupOrDefault(d.hashes, d.names, vals, "z", -1L)); + } + + @Test + void mapValues_objects_typedArray_andLookup() { + StringIndex idx = StringIndex.of("a", "bb", "ccc"); + Integer[] lengths = idx.mapValues(Integer.class, String::length); + + // Class drives a real Integer[], not an Object[]. + assertEquals(Integer[].class, lengths.getClass()); + + assertEquals(Integer.valueOf(1), idx.lookup(lengths, "a")); + assertEquals(Integer.valueOf(3), idx.lookup(lengths, "ccc")); + assertNull(idx.lookup(lengths, "z")); // miss -> null + assertEquals(Integer.valueOf(-1), idx.lookupOrDefault(lengths, "z", -1)); + } + + @Test + void support_mapValues_objects_sizedToSlots_emptyStayNull() { + Data d = Support.create("a", "b", "c"); + String[] tagged = Support.mapValues(d.names, String.class, s -> s + "!"); + + assertEquals(d.names.length, tagged.length); // sized to the table + int nonNull = 0; + for (String s : tagged) { + if (s != null) { + nonNull++; + } + } + assertEquals(3, nonNull); // only the placed names map; unfilled slots stay null + + assertEquals("a!", Support.lookup(d.hashes, d.names, tagged, "a")); + assertEquals("dflt", Support.lookupOrDefault(d.hashes, d.names, tagged, "z", "dflt")); + } + + @Test + void instance_lookup_delegatesToSupportArrays() { + StringIndex idx = StringIndex.of("x", "y"); + int[] ids = idx.mapIntValues(s -> "x".equals(s) ? 7 : 9); + + assertEquals(7, idx.lookup(ids, "x")); + assertEquals(9, idx.lookup(ids, "y")); + assertEquals(0, idx.lookup(ids, "missing")); + assertEquals(42, idx.lookupOrDefault(ids, "missing", 42)); + } +}