diff --git a/src/commands/manifest/bazel/bazel-cquery.mts b/src/commands/manifest/bazel/bazel-cquery.mts index ee3445fca..f86c6ad49 100644 --- a/src/commands/manifest/bazel/bazel-cquery.mts +++ b/src/commands/manifest/bazel/bazel-cquery.mts @@ -551,12 +551,13 @@ export async function runMetadataCqueryForRepo( signal?: unknown stderr?: unknown stdout?: unknown - timedOut?: unknown } const stdout = typeof err.stdout === 'string' ? err.stdout : '' const stderr = typeof err.stderr === 'string' ? err.stderr : '' + // On a `timeout`, the registry spawn kills the child, so Node sets + // `killed: true` and `signal: 'SIGTERM'` (or `SIGKILL`). There is no + // `timedOut` flag on the real rejection, so do not test for one. const timedOut = - err.timedOut === true || err.killed === true || err.signal === 'SIGTERM' || err.signal === 'SIGKILL' diff --git a/src/commands/manifest/bazel/bazel-cquery.test.mts b/src/commands/manifest/bazel/bazel-cquery.test.mts index 9dd3ae269..f26d1e7d4 100644 --- a/src/commands/manifest/bazel/bazel-cquery.test.mts +++ b/src/commands/manifest/bazel/bazel-cquery.test.mts @@ -642,11 +642,15 @@ describe('runMetadataCqueryForRepo', () => { expect(r.artifacts).toEqual([]) }) - it('returns status:timeout when spawn rejects with timedOut=true', async () => { + it('returns status:timeout when spawn is killed on timeout (killed=true + SIGTERM)', async () => { + // The real registry spawn does not set `timedOut`; on a `timeout` it kills + // the child, so Node populates `killed: true` and `signal: 'SIGTERM'`. + // Mock that shape so the test pins the behaviour real spawn produces. mocked.mockRejectedValueOnce( Object.assign(new Error('command timed out'), { code: null, - timedOut: true, + killed: true, + signal: 'SIGTERM', stderr: '', stdout: '', }), diff --git a/src/commands/manifest/bazel/bazel-query-runner.mts b/src/commands/manifest/bazel/bazel-query-runner.mts index 75f266851..c9ef84f1a 100644 --- a/src/commands/manifest/bazel/bazel-query-runner.mts +++ b/src/commands/manifest/bazel/bazel-query-runner.mts @@ -85,6 +85,10 @@ function buildBazelModShowMavenExtensionArgv( 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven', + // A read-only scan must never rewrite the user's MODULE.bazel.lock; pin + // the lockfile read-only before user flags, mirroring the query/cquery + // argv builders. + '--lockfile_mode=off', // Belt-and-suspenders output reducer mirroring the PyPI path: bias the // report toward the root module's usages. The authoritative pruning is // the importers-filter applied to the parsed output, so this is not @@ -101,6 +105,10 @@ function buildBazelModShowPipExtensionArgv(opts: BazelQueryOptions): string[] { 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', + // A read-only scan must never rewrite the user's MODULE.bazel.lock; pin + // the lockfile read-only before user flags, mirroring the query/cquery + // argv builders. + '--lockfile_mode=off', '--extension_usages=', ...userFlags, ] diff --git a/src/commands/manifest/bazel/bazel-query-runner.test.mts b/src/commands/manifest/bazel/bazel-query-runner.test.mts index aa69b969c..c4e526f40 100644 --- a/src/commands/manifest/bazel/bazel-query-runner.test.mts +++ b/src/commands/manifest/bazel/bazel-query-runner.test.mts @@ -256,10 +256,21 @@ describe('runBazelModShowMavenExtension', () => { 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven', + '--lockfile_mode=off', '--extension_usages=', ]) }) + it('pins the lockfile read-only so the scan never rewrites MODULE.bazel.lock', async () => { + await runBazelModShowMavenExtension({ + bin: 'bazel', + cwd: '/repo', + invocationFlags: [], + }) + const argv = mocked.mock.calls[0]![1] as string[] + expect(argv).toContain('--lockfile_mode=off') + }) + it('threads outputUserRoot ahead of the subcommand', async () => { await runBazelModShowMavenExtension({ bin: 'bazel', @@ -273,6 +284,7 @@ describe('runBazelModShowMavenExtension', () => { 'mod', 'show_extension', '@rules_jvm_external//:extensions.bzl%maven', + '--lockfile_mode=off', '--extension_usages=', ]) }) @@ -320,6 +332,7 @@ describe('runBazelModShowPipExtension', () => { 'mod', 'show_extension', '@rules_python//python/extensions:pip.bzl%pip', + '--lockfile_mode=off', '--extension_usages=', ]) }) diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.mts b/src/commands/manifest/bazel/bazel-repo-discovery.mts index 1a82c6317..931c79b25 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.mts @@ -36,7 +36,17 @@ export type ProbeResult = { export type RepoProbe = (repoName: string) => Promise -export type ProbeStatus = 'populated' | 'empty' | 'not-defined' +// `indeterminate` means the probe could not be classified: an unrecognized +// non-zero exit, or the probe threw outright (the Bazel invocation itself +// failed). It is NOT evidence that the repo is undefined — treating it as +// `not-defined` would silently under-report a hub that may well hold Maven +// deps. The orchestrator must propagate it so the run is never reported +// `complete` when a probe was indeterminate. +export type ProbeStatus = + | 'populated' + | 'empty' + | 'not-defined' + | 'indeterminate' // Conventional Maven hub names rules_jvm_external sets up under // WORKSPACE-mode invocations. Probing each one is cheap (a failed visibility @@ -76,6 +86,109 @@ const SHOW_EXT_SECTION_HEADER_RE = const FETCHED_HUB_BULLET_RE = /^ {2}- (?\S+) \(imported by (?[^)]+)\)\s*$/ +// `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` +// exits non-zero in two very different situations, and conflating them is +// dangerous for a security tool: +// +// (a) `@rules_jvm_external` simply isn't in the root module's resolved +// dependency graph. This is the COMMON case for any bzlmod repo that +// doesn't use rules_jvm_external (no Maven at all). Bazel's ModCommand +// resolves the extension argument up front via +// `ExtensionArg.resolveToExtensionId`, which throws +// `InvalidArgumentException` and exits non-zero before evaluating any +// Starlark. This is NOT a failure to analyze; it is a positive, +// authoritative "there is no maven extension here". It must map to +// `not-defined` so the workspace cleanly contributes no Maven. +// +// (b) The module graph genuinely fails to evaluate: a Starlark eval error, +// an unbound name (e.g. a MODULE.bazel referencing `PYTHON_VERSION` / +// `pip` before definition), a syntax error, or the bazel binary itself +// being missing/spawn-failed (normalized to code -1). Here we have NO +// evidence about whether a maven extension exists, so it must map to +// `indeterminate` and the run can never be reported complete. +// +// We classify by stderr shape. The exact wording differs across Bazel +// versions; the regex families below are intentionally broad and SHOULD be +// confirmed against live `bazel mod show_extension` output. + +// Family (a): the extension / module is not resolvable in the dependency +// graph — an argument-resolution error, not an evaluation failure. These all +// mean "rules_jvm_external (and thus the maven extension) is not present", +// i.e. legitimately not-defined. The `no module ... exists in the dependency +// graph` branch is Bazel's verified real wording (`bazel mod show_extension` +// against a bzlmod repo without rules_jvm_external: "No module with the +// apparent repo name @rules_jvm_external exists in the dependency graph"). +const SHOW_EXT_NOT_IN_GRAPH_STDERR_RE = + /(?:in extension argument|extension argument)?.*(?:not (?:found|resolvable|defined)|no such (?:module|repo(?:sitory)?)|cannot be resolved|is not (?:a )?(?:visible |known )?(?:module|repo(?:sitory)?|extension)|not in the (?:dependency )?graph|no module[^\n]*exists in the (?:dependency )?graph|unknown (?:module|extension)|does not (?:exist|use the extension))/i +// Bazel's canonical phrasing when the named module backing the extension +// (here `rules_jvm_external`) isn't a dependency of the root module. +const SHOW_EXT_MODULE_NOT_DEP_STDERR_RE = + /(?:rules_jvm_external|module ['"`]?[A-Za-z0-9._+~-]+['"`]?).*(?:is not (?:a )?(?:direct )?dep(?:endenc(?:y|ies))?|not (?:a )?dependency)/i + +// Family (b): a genuine evaluation / load failure of the module graph. These +// mean we could not determine whether a maven extension exists, so the result +// is indeterminate, never a clean not-defined. +const SHOW_EXT_EVAL_FAILURE_STDERR_RE = + /(?:error (?:evaluating|loading|computing)|failed to (?:evaluate|load)|evaluation (?:of|failed)|cannot load|syntax error|name ['"`]?[A-Za-z0-9_]+['"`]? is not defined|variable ['"`]?[A-Za-z0-9_]+['"`]? (?:is|was) (?:referenced|not)|unbound|invalid MODULE\.bazel|MODULE\.bazel.*(?:error|failed)|Traceback|Error in)/i + +// Outcome of running `bazel mod show_extension` for the maven extension, +// distinct from the per-repo `ProbeStatus`: +// `not-defined` — authoritative: no maven extension in this workspace +// (clean run with zero kept hubs, OR rules_jvm_external is +// not in the dependency graph). +// `indeterminate` — enumeration could not be performed (eval/load failure, +// binary missing); the run must not be reported complete. +// `defined` — the report parsed and yielded one or more root hubs; +// the caller uses the parsed hub list directly. +export type ShowExtensionStatus = 'defined' | 'indeterminate' | 'not-defined' + +// Classify a `bazel mod show_extension` result. `keptRootHubCount` is the +// number of root-imported hubs the caller parsed from a code-0 run (see +// `parseShowExtensionOutput` + the `` importer filter); it disambiguates +// the code-0 cases without re-parsing here. +// +// IMPORTANT (security correctness): a non-zero exit is the DEFAULT outcome for +// every bzlmod repo that does not use rules_jvm_external, so we must NOT treat +// non-zero as indeterminate by default. We only escalate to `indeterminate` +// when stderr looks like a real evaluation/load failure; an argument/resolution +// error about the missing extension is the legitimate no-Maven case. +export function classifyShowExtensionResult( + result: ProbeResult, + keptRootHubCount: number, +): ShowExtensionStatus { + if (result.code === 0) { + // Clean run. Either it enumerated root hubs (`defined`) or it ran fine and + // found no maven extension for the root (`not-defined`). + return keptRootHubCount > 0 ? 'defined' : 'not-defined' + } + // A spawn failure / missing binary is normalized to code -1 upstream; there + // is no usable stderr classification and we definitely could not enumerate. + if (result.code === -1) { + return 'indeterminate' + } + const { stderr } = result + // A genuine module-graph evaluation/load failure wins: we cannot conclude + // anything about maven presence, so surface it as indeterminate. + if (SHOW_EXT_EVAL_FAILURE_STDERR_RE.test(stderr)) { + return 'indeterminate' + } + // The maven extension / rules_jvm_external is simply not in the dependency + // graph: an argument-resolution error. This is the common no-Maven bzlmod + // repo and is authoritatively not-defined. + if ( + SHOW_EXT_NOT_IN_GRAPH_STDERR_RE.test(stderr) || + SHOW_EXT_MODULE_NOT_DEP_STDERR_RE.test(stderr) + ) { + return 'not-defined' + } + // Truly unrecognized non-zero exit. Bias toward not-defined: the dominant + // real-world non-zero case is "extension not in the graph", and a missing + // bullet here would otherwise abort the user's entire scan. We only reach + // `indeterminate` above when stderr positively looks like an eval/load + // failure, which is the case the flag exists for. + return 'not-defined' +} + // Pure parser for `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` // stdout. Returns the hub repos listed under `Fetched repositories:` — i.e. // items annotated with `(imported by ...)` — each carrying the set of modules @@ -154,13 +267,16 @@ export function classifyProbeResult(result: ProbeResult): ProbeStatus { return 'empty' } // Code 0 with empty stdout: WORKSPACE-mode probes do this when the repo - // name isn't declared (Exp 5c). Treat as not-defined. + // name isn't declared. Treat as not-defined. if (result.code === 0) { return 'not-defined' } - // Code 1 with no recognizable message: be conservative and call it - // not-defined so the orchestrator skips it without erroring the workspace. - return 'not-defined' + // Non-zero exit with no recognizable message: the probe failed for a reason + // we can't classify (Bazel infra error, analysis crash, unexpected stderr). + // This is NOT proof the repo is undefined, so do NOT downgrade it to + // not-defined — surface it as indeterminate so the orchestrator can flag + // the workspace as not fully analyzable rather than silently skipping it. + return 'indeterminate' } // Convenience: probe a single candidate and return its classified status, @@ -176,14 +292,18 @@ export async function probeCandidate( try { result = await probe(repoName) } catch (e) { + // A thrown probe means the Bazel invocation itself failed; we have no + // evidence about whether the repo exists. Surface it as indeterminate so + // the run is not reported complete, rather than swallowing it as a + // not-defined skip. if (verbose) { logger.log( - `[VERBOSE] discovery: probe @${repoName}: not-defined (probe threw: ${ + `[VERBOSE] discovery: probe @${repoName}: indeterminate (probe threw: ${ e instanceof Error ? e.message : String(e) })`, ) } - return 'not-defined' + return 'indeterminate' } const status = classifyProbeResult(result) if (verbose) { diff --git a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts index 20628d65d..0be94f9c2 100644 --- a/src/commands/manifest/bazel/bazel-repo-discovery.test.mts +++ b/src/commands/manifest/bazel/bazel-repo-discovery.test.mts @@ -5,6 +5,7 @@ import { logger } from '@socketsecurity/registry/lib/logger' import { CONVENTIONAL_MAVEN_REPO_NAMES, classifyProbeResult, + classifyShowExtensionResult, parseShowExtensionOutput, probeCandidate, } from './bazel-repo-discovery.mts' @@ -13,6 +14,7 @@ import type { ProbeResult, ProbeStatus, RepoProbe, + ShowExtensionStatus, } from './bazel-repo-discovery.mts' // Truncated text-format report Bazel 8.4.2 emits on tink-java for @@ -177,12 +179,15 @@ describe('bazel-repo-discovery', () => { ).toBe('not-defined') }) - it('classifies code=1 + unrecognized stderr conservatively as not-defined', () => { + it('classifies code=1 + unrecognized stderr as indeterminate (not a silent skip)', () => { + // An unrecognized non-zero exit is NOT proof the repo is absent; it must + // surface as indeterminate so the orchestrator never reports complete on + // a workspace it could not actually analyze. expect( classifyProbeResult( probeResult({ code: 1, stderr: 'some other failure\n' }), ), - ).toBe('not-defined') + ).toBe('indeterminate') }) it('classifies code=1 + "no such package" stderr as not-defined', () => { @@ -195,6 +200,140 @@ describe('bazel-repo-discovery', () => { ), ).toBe('not-defined') }) + + it('classifies a non-zero exit with no recognizable message as indeterminate', () => { + expect( + classifyProbeResult(probeResult({ code: 37, stderr: '' })), + ).toBe('indeterminate') + }) + }) + + describe('classifyShowExtensionResult', () => { + // NOTE: the exact bazel stderr wording for these error families should be + // confirmed against a live `bazel mod show_extension` run; the sandbox + // blocks bazel here, so the strings below are representative shapes. + it('classifies code=0 with parsed root hubs as defined', () => { + expect( + classifyShowExtensionResult(probeResult({ code: 0 }), 2), + ).toBe('defined') + }) + + it('classifies a clean code=0 run with zero kept hubs as not-defined', () => { + // Ran fine, no maven extension for the root: legitimate absence. + expect( + classifyShowExtensionResult( + probeResult({ code: 0, stdout: 'No extensions defined.\n' }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies "module is not a dependency of the root module" (rules_jvm_external not in dep graph) as not-defined', () => { + // The COMMON no-Maven bzlmod repo: ModCommand resolves the extension + // argument up front and throws InvalidArgumentException before any + // Starlark runs. Non-zero exit, but authoritatively "no maven here". + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: + "ERROR: In extension argument '@rules_jvm_external//:extensions.bzl%maven': module 'rules_jvm_external' is not a dependency of the root module\n", + }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies the real Bazel "no module ... exists in the dependency graph" arg error (exit 2) as not-defined', () => { + // Verbatim stderr from `bazel mod show_extension` on a bzlmod repo + // without rules_jvm_external (verified on real Bazel against angular and + // buildbuddy: exit code 2). This is the dominant no-Maven case and must + // never be escalated to indeterminate / hardFailure. + expect( + classifyShowExtensionResult( + probeResult({ + code: 2, + stderr: + 'ERROR: In extension argument @rules_jvm_external//:extensions.bzl%maven: No module with the apparent repo name @rules_jvm_external exists in the dependency graph. Type \'bazel help mod\' for syntax and help.\n', + }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies the real Bazel unbound-name MODULE.bazel failure (exit 2) as indeterminate', () => { + // Verbatim stderr from `bazel mod show_extension --enable_bzlmod` on the + // envoy mobile/ fragment (verified on real Bazel: exit 2). A genuine + // eval failure: we cannot conclude maven is absent, so it is + // indeterminate even though the unbound-name text also trips the + // not-in-graph "not defined" branch (eval-failure is checked first). + expect( + classifyShowExtensionResult( + probeResult({ + code: 2, + stderr: + "ERROR: /work/mobile/MODULE.bazel:26:1: name 'pip' is not defined (did you mean 'zip'?)\nERROR: syntax error in MODULE.bazel file for .\n", + }), + 0, + ), + ).toBe('indeterminate') + }) + + it('classifies a generic "extension not found / not resolvable" arg error as not-defined', () => { + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: + 'ERROR: extension argument: no such module @rules_jvm_external\n', + }), + 0, + ), + ).toBe('not-defined') + }) + + it('classifies a genuine MODULE.bazel evaluation failure (unbound name) as indeterminate', () => { + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: + "ERROR: Error evaluating MODULE.bazel: name 'PYTHON_VERSION' is not defined\n", + }), + 0, + ), + ).toBe('indeterminate') + }) + + it('classifies a Starlark syntax error in the module graph as indeterminate', () => { + expect( + classifyShowExtensionResult( + probeResult({ + code: 1, + stderr: 'ERROR: /work/MODULE.bazel:3:1: syntax error near pip\n', + }), + 0, + ), + ).toBe('indeterminate') + }) + + it('classifies a spawn failure / missing binary (normalized code -1) as indeterminate', () => { + expect( + classifyShowExtensionResult(probeResult({ code: -1 }), 0), + ).toBe('indeterminate') + }) + + it('biases a truly unrecognized non-zero exit toward not-defined (extension-not-in-graph dominates; never abort the scan)', () => { + // We only escalate to indeterminate when stderr positively looks like an + // eval/load failure. An unrecognized arg-style error must not flip a + // no-Maven repo into a hard failure that aborts the whole scan. + expect( + classifyShowExtensionResult( + probeResult({ code: 7, stderr: 'ERROR: something unexpected\n' }), + 0, + ), + ).toBe('not-defined') + }) }) describe('probeCandidate', () => { @@ -204,9 +343,9 @@ describe('bazel-repo-discovery', () => { ).toBe('populated') }) - it('returns not-defined when the probe throws', async () => { + it('returns indeterminate when the probe throws (infra failure, not absence)', async () => { expect(await probeCandidate('crash', probeThrows)).toBe( - 'not-defined', + 'indeterminate', ) }) }) @@ -253,7 +392,7 @@ describe('bazel-repo-discovery', () => { it('probeCandidate logs the throw reason under verbose', async () => { await probeCandidate('crash', probeThrows, true) expect(loggedLines()).toMatch( - /probe @crash:\s*not-defined \(probe threw: bazel exploded\)/, + /probe @crash:\s*indeterminate \(probe threw: bazel exploded\)/, ) }) }) diff --git a/src/commands/manifest/bazel/cmd-manifest-bazel.mts b/src/commands/manifest/bazel/cmd-manifest-bazel.mts index efb3f0e30..0ab10e861 100644 --- a/src/commands/manifest/bazel/cmd-manifest-bazel.mts +++ b/src/commands/manifest/bazel/cmd-manifest-bazel.mts @@ -56,6 +56,11 @@ const config: CliCommandConfig = { description: 'Output directory for generated manifests; default: ./.socket/bazel-manifests/', }, + perRepoTimeout: { + type: 'number', + description: + 'Per-hub bazel cquery timeout in milliseconds; default: 120000', + }, verbose: { type: 'boolean', description: @@ -100,6 +105,12 @@ const config: CliCommandConfig = { `, } +// The explicit `socket manifest bazel` command gives each hub more time than +// the auto-manifest path: a user running it directly is waiting on this one +// extraction, whereas auto-manifest must not stall the wider scan. Auto's +// shorter default lives in extract_bazel_to_maven.mts. +const EXPLICIT_PER_REPO_TIMEOUT_MS = 120_000 + export const cmdManifestBazel = { description: config.description, hidden: config.hidden, @@ -110,6 +121,11 @@ export type EcosystemOutcome = { ecosystem: 'maven' | 'pypi' status: ExtractBazelStatus manifestPaths: string[] + // Machine-readable completeness signal. True only when the ecosystem's + // extraction was complete; a `partial` upload sets this false so the CLI + // surfaces it honestly (exit 0 + prominent warning) rather than as plain + // success. + complete: boolean } // Pure outcome-matrix evaluator. Exported so dispatcher behavior can be @@ -138,10 +154,20 @@ export function evaluateEcosystemOutcomes( const hardFailures = outcomes.filter(o => o.status === 'hardFailure') const noDiscoveries = outcomes.filter(o => o.status === 'noEcosystem') + // Surface a machine-readable completeness signal for every produced + // ecosystem so a partial upload is never presented as a complete one. The + // per-workspace / per-hub detail is written to the manifest dir's + // completeness summary by the extractor; this is the human-facing echo. + for (const outcome of produced) { + logger.info( + `Bazel ${outcome.ecosystem} extraction status: ${outcome.status} (complete=${outcome.complete}).`, + ) + } + for (const partial of outcomes) { if (partial.status === 'partial') { logger.warn( - `Bazel ${partial.ecosystem} manifest generation was partial; the uploaded SBOM is known-incomplete.`, + `WARNING: Bazel ${partial.ecosystem} manifest generation was PARTIAL. The uploaded SBOM is known-incomplete and may under-report dependencies; review the completeness summary before relying on the results.`, ) } } @@ -188,14 +214,18 @@ function pypiOutcome(result: { manifestPath?: string | undefined noEcosystemFound?: boolean | undefined ok: boolean -}): { manifestPaths: string[]; status: ExtractBazelStatus } { +}): { complete: boolean; manifestPaths: string[]; status: ExtractBazelStatus } { if (result.noEcosystemFound) { - return { manifestPaths: [], status: 'noEcosystem' } + return { complete: false, manifestPaths: [], status: 'noEcosystem' } } if (result.ok && result.manifestPath) { - return { manifestPaths: [result.manifestPath], status: 'complete' } + return { + complete: true, + manifestPaths: [result.manifestPath], + status: 'complete', + } } - return { manifestPaths: [], status: 'hardFailure' } + return { complete: false, manifestPaths: [], status: 'hardFailure' } } async function run( @@ -232,6 +262,7 @@ async function run( const { ecosystem } = cli.flags let { bazel, bazelFlags, bazelOutputBase, bazelRc, out, verbose } = cli.flags + let perRepoTimeout = cli.flags['perRepoTimeout'] as number | undefined // Set defaults for any flag/arg that is not given. Check socket.json first. if (!bazel) { @@ -287,6 +318,19 @@ async function run( verbose = false } } + if (perRepoTimeout === undefined) { + if (sockJson.defaults?.manifest?.bazel?.perRepoTimeout !== undefined) { + perRepoTimeout = sockJson.defaults?.manifest?.bazel?.perRepoTimeout + logger.info( + `Using default --per-repo-timeout from ${SOCKET_JSON}:`, + perRepoTimeout, + ) + } else { + // Explicit invocation default; longer than the auto-manifest default + // because the user is waiting on this single extraction. + perRepoTimeout = EXPLICIT_PER_REPO_TIMEOUT_MS + } + } if (verbose) { logger.group('- ', parentName, config.commandName, ':') @@ -347,9 +391,11 @@ async function run( bin: bazel as string | undefined, cwd, out: out as string, + perRepoTimeoutMs: perRepoTimeout, verbose: Boolean(verbose), }) outcomes.push({ + complete: mavenResult.complete, ecosystem: 'maven', manifestPaths: mavenResult.manifestPaths, status: mavenResult.status, diff --git a/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts b/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts index b0a07833f..8cc8ea254 100644 --- a/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts +++ b/src/commands/manifest/bazel/cmd-manifest-bazel.test.mts @@ -1,6 +1,31 @@ -import { describe, expect, it } from 'vitest' +import { beforeEach, describe, expect, it, vi } from 'vitest' -import { evaluateEcosystemOutcomes } from './cmd-manifest-bazel.mts' +import { logger } from '@socketsecurity/registry/lib/logger' + +// Mock the extractor so the `run` wiring test can assert which timeout reaches +// it without a real Bazel toolchain. The `cmdit`/spawnSocketCli tests below +// run in a child process and are unaffected by these in-process mocks. +vi.mock('./extract_bazel_to_maven.mts', () => ({ + extractBazelToMaven: vi.fn(async () => ({ + artifactCount: 1, + complete: true, + manifestPaths: ['/tmp/maven_install.json'], + status: 'complete', + workspaceOutcomes: [], + })), +})) +vi.mock('./extract_bazel_to_pypi.mts', () => ({ + extractBazelToPypi: vi.fn(async () => ({ + noEcosystemFound: true, + ok: false, + })), +})) + +import { + cmdManifestBazel, + evaluateEcosystemOutcomes, +} from './cmd-manifest-bazel.mts' +import { extractBazelToMaven } from './extract_bazel_to_maven.mts' import constants, { FLAG_CONFIG, FLAG_DRY_RUN, @@ -8,6 +33,7 @@ import constants, { import { cmdit, spawnSocketCli } from '../../../../test/utils.mts' import type { EcosystemOutcome } from './cmd-manifest-bazel.mts' +import type { CliCommandContext } from '../../../utils/meow-with-subcommands.mts' describe('socket manifest bazel', async () => { const { binCliPath } = constants @@ -68,36 +94,43 @@ const auto = (outcomes: EcosystemOutcome[]) => evaluateEcosystemOutcomes(outcomes, false) const COMPLETE_MAVEN: EcosystemOutcome = { + complete: true, ecosystem: 'maven', manifestPaths: ['/tmp/maven_install.json'], status: 'complete', } const COMPLETE_PYPI: EcosystemOutcome = { + complete: true, ecosystem: 'pypi', manifestPaths: ['/tmp/requirements.txt'], status: 'complete', } const NO_MAVEN: EcosystemOutcome = { + complete: false, ecosystem: 'maven', manifestPaths: [], status: 'noEcosystem', } const NO_PYPI: EcosystemOutcome = { + complete: false, ecosystem: 'pypi', manifestPaths: [], status: 'noEcosystem', } const HARDFAIL_MAVEN: EcosystemOutcome = { + complete: false, ecosystem: 'maven', manifestPaths: [], status: 'hardFailure', } const HARDFAIL_PYPI: EcosystemOutcome = { + complete: false, ecosystem: 'pypi', manifestPaths: [], status: 'hardFailure', } const PARTIAL_MAVEN: EcosystemOutcome = { + complete: false, ecosystem: 'maven', manifestPaths: ['/tmp/maven_install.json'], status: 'partial', @@ -178,4 +211,64 @@ describe('evaluateEcosystemOutcomes (explicit mode)', () => { /Bazel manifest generation failed for explicitly requested ecosystem\(s\): maven/, ) }) + + it('exits 0 on partial but emits a prominent warning and a completeness signal', () => { + const warnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => logger) + const infoSpy = vi.spyOn(logger, 'info').mockImplementation(() => logger) + try { + expect(() => explicit([PARTIAL_MAVEN])).not.toThrow() + const warned = warnSpy.mock.calls.map(c => String(c[0])).join('\n') + const informed = infoSpy.mock.calls.map(c => String(c[0])).join('\n') + // Prominent partial warning naming the known-incomplete SBOM. + expect(warned).toMatch(/PARTIAL/) + expect(warned).toMatch(/known-incomplete/) + // Machine-readable completeness signal echoed for the produced ecosystem. + expect(informed).toMatch(/extraction status: partial \(complete=false\)/) + } finally { + warnSpy.mockRestore() + infoSpy.mockRestore() + } + }) + + it('does not flag a complete run as incomplete', () => { + const infoSpy = vi.spyOn(logger, 'info').mockImplementation(() => logger) + try { + expect(() => explicit([COMPLETE_MAVEN])).not.toThrow() + const informed = infoSpy.mock.calls.map(c => String(c[0])).join('\n') + expect(informed).toMatch(/extraction status: complete \(complete=true\)/) + } finally { + infoSpy.mockRestore() + } + }) +}) + +describe('perRepoTimeout flag wiring', () => { + const importMeta = { url: 'file:///cmd-manifest-bazel.test.mts' } as ImportMeta + + beforeEach(() => { + vi.mocked(extractBazelToMaven).mockClear() + }) + + it('defaults the explicit command to a 120s per-repo timeout', async () => { + await cmdManifestBazel.run( + [FLAG_CONFIG, '{}', '.'], + importMeta, + { parentName: 'manifest' } as CliCommandContext, + ) + expect(extractBazelToMaven).toHaveBeenCalledTimes(1) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ perRepoTimeoutMs: 120_000 }), + ) + }) + + it('flows a --per-repo-timeout override through to the extractor', async () => { + await cmdManifestBazel.run( + ['--per-repo-timeout', '45000', FLAG_CONFIG, '{}', '.'], + importMeta, + { parentName: 'manifest' } as CliCommandContext, + ) + expect(extractBazelToMaven).toHaveBeenCalledWith( + expect.objectContaining({ perRepoTimeoutMs: 45_000 }), + ) + }) }) diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.mts index aaa7f2cec..6248d1d21 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.mts @@ -1,4 +1,10 @@ -import { existsSync, promises as fs, mkdirSync, mkdtempSync } from 'node:fs' +import { + existsSync, + promises as fs, + mkdirSync, + mkdtempSync, + readdirSync, +} from 'node:fs' import os from 'node:os' import path from 'node:path' @@ -17,6 +23,7 @@ import { import { CONVENTIONAL_MAVEN_REPO_NAMES, ROOT_MODULE_IMPORTER, + classifyShowExtensionResult, parseShowExtensionOutput, probeCandidate, } from './bazel-repo-discovery.mts' @@ -31,6 +38,7 @@ import { IGNORED_DIRS } from '../../../utils/glob.mts' import type { CqueryRepoResult, ExtractedArtifact } from './bazel-cquery.mts' import type { BazelQueryOptions } from './bazel-query-runner.mts' import type { WorkspaceMode } from './bazel-workspace-detect.mts' +import type { Dirent } from 'node:fs' export type ExtractBazelOptions = { bazelFlags: string | undefined @@ -52,9 +60,11 @@ export type ExtractBazelOptions = { out: string // Use the auto-manifest sibling directory instead of writing directly to `out`. outLayout?: 'flat' - // Per-repo cquery timeout in milliseconds. Auto-manifest default is 60s - // (the orchestrator's job is to not stall the wider scan); explicit - // invocations may bump it. + // Per-repo cquery timeout in milliseconds. When the caller leaves this + // unset the orchestrator falls back to DEFAULT_PER_REPO_TIMEOUT_MS (the + // auto-manifest default, kept short so the wider scan is not stalled). The + // explicit `socket manifest bazel` command wires this to a CLI flag with a + // longer default. perRepoTimeoutMs?: number | undefined verbose: boolean } @@ -78,15 +88,69 @@ export type ExtractBazelStatus = | 'noEcosystem' | 'partial' +// Per-hub extraction state inside one workspace. Recorded so the CLI can emit +// a machine-readable completeness signal instead of presenting a partial +// extraction as complete. +// - `populated` — the hub yielded >=1 artifact and a manifest was written. +// - `empty` — the hub is defined but has no Maven targets. +// - `not-defined` — the probed conventional name does not exist here. +// - `skipped-lockfile` — a committed maven_install.json already covers this +// hub, so the CLI deliberately did not re-emit it. +// - `failed` — the hub's cquery errored, timed out, or its graph was +// known-incomplete (dropped/pruned edges, --keep_going). +// - `indeterminate` — discovery could not classify the hub (probe threw or +// returned an unrecognized error); NOT evidence of absence. +export type HubState = + | 'populated' + | 'empty' + | 'not-defined' + | 'skipped-lockfile' + | 'failed' + | 'indeterminate' + +export type HubOutcome = { + hub: string + state: HubState + // Short, machine-stable reason when the hub is `failed`/`indeterminate`. + reason?: string | undefined +} + +// Per-workspace outcome. `load` distinguishes a workspace we could not even +// read (`failed` — e.g. an unbound-var MODULE.bazel fragment) from one we +// analyzed (`loaded`). A workspace that failed to load contributes to a +// hard failure when nothing else was analyzable, and to a partial otherwise. +export type WorkspaceOutcome = { + relPath: string + load: 'loaded' | 'failed' + hubs: HubOutcome[] + // Set when the workspace itself could not be analyzed. + reason?: string | undefined +} + export type ExtractBazelResult = { artifactCount: number manifestPaths: string[] status: ExtractBazelStatus + // True only when `status === 'complete'`. Surfaced so downstream consumers + // (and the CLI's emitted summary) get a single machine-readable + // completeness flag without re-deriving it from `status`. + complete: boolean + // Per-workspace / per-hub analyzability breakdown backing the completeness + // signal. Empty for `noEcosystem` and early `hardFailure` (toolchain setup + // failed before any workspace was inspected). + workspaceOutcomes: WorkspaceOutcome[] } const DEFAULT_PER_REPO_TIMEOUT_MS = 60_000 const REAP_TIMEOUT_MS = 10_000 +// Machine-readable completeness signal emitted alongside the synthetic +// manifests. A `complete: false` summary tells a downstream consumer (e.g. +// depscan) that the uploaded SBOM is known-incomplete so it must not be +// treated as an authoritative full closure. Enforcement of this signal is a +// separate downstream follow-up; the CLI only emits it. +const COMPLETENESS_SUMMARY_FILE_NAME = 'socket-bazel-manifest-summary.json' + // Default directory-prune policy for the Bazel workspace walk. The // orchestrator applies this unconditionally so neither caller (the explicit // `socket manifest bazel` command nor `--auto-manifest`) can omit it and let @@ -261,6 +325,120 @@ export function dedupArtifactsByCoord( return [...byCoord.values()] } +// The committed lockfile name the server-side walker already ingests for a +// hub: `maven_install.json` for a hub literally named `maven`, else +// `_maven_install.json`. Centralised so the gate and the synthetic +// writer agree on the name. +function hubManifestFileName(repoName: string): string { + return repoName === 'maven' + ? 'maven_install.json' + : `${repoName}_maven_install.json` +} + +// Directory basenames the CLI itself writes synthetic manifests into. A file +// living inside one of these is our own output, NOT a committed lockfile, no +// matter which run wrote it: the auto-manifest sibling dir (flat layout) and +// the explicit-command default output dir. The gate must never read a file in +// one of these as evidence of committed coverage, or a stale prior-run +// synthetic file would let a later run wrongly skip a hub. +const CLI_SYNTHETIC_OUTPUT_DIR_NAMES: ReadonlySet = new Set([ + '.socket-auto-manifest', + 'bazel-manifests', +]) + +// Does a committed lockfile already cover THIS hub at THIS hub's own workspace +// root? Each workspace is processed independently by the caller, and a +// committed lockfile covers the workspace it lives IN — a nested workspace's +// `maven_install.json` covers that nested hub, not this one. The server-side +// walker ingests every committed `**/*_maven_install.json`, but each one only +// covers its own workspace. So the gate checks DEPTH-0 only: a lockfile named +// for this hub sitting directly in `workspaceRoot`. A recursive descent would +// let an unrelated nested/fixture lockfile mask an uncovered root hub — +// silently dropping its distinct coordinates. +// +// The CLI's own synthetic output is never a committed lockfile: we skip the +// current run's `manifestDir` and any known synthetic output dir basename so a +// stale prior-run file can't be misread as committed. +function committedLockfileCovers(args: { + fileName: string + manifestDir: string + workspaceRoot: string +}): string | undefined { + const { fileName, manifestDir, workspaceRoot } = args + // The current run's synthetic output dir, resolved for an exact compare. + const manifestDirResolved = path.resolve(manifestDir) + const workspaceRootResolved = path.resolve(workspaceRoot) + // The committed lockfile, if any, lives directly in the hub's own workspace + // root — not in a nested workspace and not in the CLI's output dir. + if ( + workspaceRootResolved === manifestDirResolved || + CLI_SYNTHETIC_OUTPUT_DIR_NAMES.has(path.basename(workspaceRootResolved)) + ) { + // The workspace root IS an output location; nothing here is committed. + return undefined + } + let entries: Dirent[] + try { + entries = readdirSync(workspaceRootResolved, { withFileTypes: true }) + } catch { + return undefined + } + for (const entry of entries) { + if (entry.isFile() && entry.name === fileName) { + return path.join(workspaceRootResolved, entry.name) + } + } + return undefined +} + +// Emit the machine-readable completeness summary next to the manifests. This +// is the CLI's honest "is this SBOM complete?" signal in the emitted output; +// it carries the run status plus the per-workspace / per-hub breakdown so a +// downstream consumer can detect a known-incomplete upload. Best-effort: a +// failure to write the summary must never sink an otherwise-usable run, so it +// is logged (under verbose) and swallowed. +async function writeCompletenessSummary(args: { + artifactCount: number + complete: boolean + manifestDir: string + manifestPaths: string[] + status: ExtractBazelStatus + verbose: boolean + workspaceOutcomes: WorkspaceOutcome[] +}): Promise { + const { + artifactCount, + complete, + manifestDir, + manifestPaths, + status, + verbose, + workspaceOutcomes, + } = args + const summary = { + artifactCount, + complete, + ecosystem: 'maven', + manifestCount: manifestPaths.length, + status, + workspaces: workspaceOutcomes, + } + try { + mkdirSync(manifestDir, { recursive: true }) + await fs.writeFile( + path.join(manifestDir, COMPLETENESS_SUMMARY_FILE_NAME), + JSON.stringify(summary, null, 2), + 'utf8', + ) + } catch (e) { + if (verbose) { + logger.log( + `[VERBOSE] completeness summary not written (${getErrorCause(e)}); the run result still carries the signal`, + ) + } + } +} + type WriteHubManifestResult = { artifactCount: number droppedArtifacts: string[] @@ -297,10 +475,7 @@ async function writeHubManifest(args: { prunedEdges, } } - const fileName = - repoName === 'maven' - ? 'maven_install.json' - : `${repoName}_maven_install.json` + const fileName = hubManifestFileName(repoName) const hubDir = relPath ? path.join(manifestDir, relPath) : manifestDir mkdirSync(hubDir, { recursive: true }) const manifestPath = path.join(hubDir, fileName) @@ -319,25 +494,51 @@ async function writeHubManifest(args: { // On `show_extension` failure (or a parse that yields zero root hubs) under // Bzlmod, fall through to the conventional-name probe so partial discovery // is still possible. +type DiscoverResult = { + candidates: string[] + // Conventional names whose probe could not be classified (threw or returned + // an unrecognized error). A non-empty list means discovery may have missed + // a hub, so the run can never be reported complete. + indeterminateProbes: string[] + // True when authoritative hub enumeration could not be performed: under + // Bzlmod, `bazel mod show_extension` failed in a way that signals the module + // graph itself could not be evaluated (Starlark eval error, unbound name, + // syntax error, or the binary being missing). That is distinct from BOTH a + // clean code-0 run with zero kept hubs AND a non-zero exit that merely means + // rules_jvm_external isn't in the dependency graph — those are legitimate + // "no maven extension here" outcomes (the common no-Maven bzlmod repo) and + // must NOT flip the run to indeterminate. Only a genuine evaluation failure + // means we may have missed custom-named hubs, so the run can never be + // reported complete. See `classifyShowExtensionResult`. + discoveryIndeterminate: boolean +} + async function discoverCandidatesForWorkspace( workspaceRoot: string, mode: WorkspaceMode, queryOpts: BazelQueryOptions, verbose: boolean, -): Promise { +): Promise { const candidates: string[] = [] + const indeterminateProbes: string[] = [] let showExtensionSucceeded = false + let discoveryIndeterminate = false if (mode.bzlmod) { const extResult = await runBazelModShowMavenExtension(queryOpts) - if (extResult.code === 0) { - // The maven extension generates a hub for EVERY module that uses it — - // the root's own `maven.install` hub(s) plus the rulesets' internal - // hubs (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs - // imported by ; the rest are build-tooling, not the user's SBOM. - const entries = parseShowExtensionOutput(extResult.stdout) - const kept = entries.filter(e => - e.importers.includes(ROOT_MODULE_IMPORTER), - ) + // The maven extension generates a hub for EVERY module that uses it — the + // root's own `maven.install` hub(s) plus the rulesets' internal hubs + // (rules_jvm_external_deps, stardoc_maven, …). Keep only hubs imported by + // ; the rest are build-tooling, not the user's SBOM. On a non-zero + // exit the output is empty, so `kept` is naturally empty too. + const entries = parseShowExtensionOutput(extResult.stdout) + const kept = entries.filter(e => e.importers.includes(ROOT_MODULE_IMPORTER)) + // Classify the run rather than treating ANY non-zero exit as a failure: + // `bazel mod show_extension` exits non-zero on every bzlmod repo that + // doesn't depend on rules_jvm_external (its argument resolution throws + // before any Starlark runs), so a blanket non-zero=indeterminate would + // wrongly flag the common no-Maven repo and abort the user's whole scan. + const showExtStatus = classifyShowExtensionResult(extResult, kept.length) + if (showExtStatus === 'defined') { candidates.push(...kept.map(e => e.name)) // Gate the probe fallback on the KEPT count, not the raw parse: a // report listing only transitive ruleset hubs (all filtered out) must @@ -357,9 +558,26 @@ async function discoverCandidatesForWorkspace( } } } + } else if (showExtStatus === 'indeterminate') { + // The module graph itself could not be evaluated (Starlark eval error, + // unbound name, syntax error, or a missing binary normalized to code + // -1). We have NO evidence about whether custom-named maven hubs exist, + // so mark discovery indeterminate — the run can never be reported + // complete — while still falling through to the conventional probe for + // best-effort coverage. + discoveryIndeterminate = true + if (verbose) { + logger.log( + `[VERBOSE] workspace ${workspaceRoot}: show_extension failed to evaluate the module graph (code=${extResult.code}); hub enumeration is indeterminate — falling back to conventional probe`, + ) + } } else if (verbose) { + // `not-defined`: either a clean run with no root maven extension, or a + // non-zero exit that merely means rules_jvm_external isn't in the + // dependency graph. Both are authoritative "no maven here"; we still + // probe conventional names for a hybrid WORKSPACE-maven repo. logger.log( - `[VERBOSE] workspace ${workspaceRoot}: show_extension failed (code=${extResult.code}); falling back to conventional probe`, + `[VERBOSE] workspace ${workspaceRoot}: show_extension reports no root maven extension (code=${extResult.code}); treating as not-defined — probing conventional hub names`, ) } } @@ -372,7 +590,7 @@ async function discoverCandidatesForWorkspace( showExtensionSucceeded ? [] : [...CONVENTIONAL_MAVEN_REPO_NAMES] ).filter(name => !seen.has(name)) if (!toProbe.length) { - return candidates + return { candidates, discoveryIndeterminate, indeterminateProbes } } const probe = buildMavenProbeFor(queryOpts) for (const name of toProbe) { @@ -381,9 +599,14 @@ async function discoverCandidatesForWorkspace( if (status === 'populated') { candidates.push(name) seen.add(name) + } else if (status === 'indeterminate') { + // The probe failed for a reason we can't classify; we have no proof the + // hub is absent. Record it so the run is flagged not-complete rather + // than silently treating the hub as "no Maven here". + indeterminateProbes.push(name) } } - return candidates + return { candidates, discoveryIndeterminate, indeterminateProbes } } // Best-effort reap of a Bazel server. Spawned with a short timeout so @@ -493,7 +716,13 @@ export async function extractBazelToMaven( logger.log(e) logger.groupEnd() } - return { artifactCount: 0, manifestPaths: [], status: 'hardFailure' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'hardFailure', + workspaceOutcomes: [], + } } logger.info(`Using bazel: ${bin}`) @@ -517,6 +746,16 @@ export async function extractBazelToMaven( let anyRepos = false let hubsSucceeded = 0 let hubsFailed = 0 + // Per-workspace / per-hub analyzability breakdown backing the completeness + // signal the CLI emits. A run is only `complete` when no workspace failed to + // load, no probe was indeterminate, and every queried hub succeeded cleanly. + const workspaceOutcomes: WorkspaceOutcome[] = [] + let anyIndeterminate = false + let anyWorkspaceLoadFailed = false + // A hub we deliberately skipped because a committed lockfile already covers + // it. This is a SUCCESSFUL no-op (the server already ingests that lockfile), + // so it must not be conflated with "discovered a hub we failed to extract". + let anyHubCoveredByLockfile = false try { // Always apply the default prune policy so no caller can forget it; @@ -539,7 +778,13 @@ export async function extractBazelToMaven( logger.warn( `No Bazel workspace found at ${cwd} or beneath (looked for MODULE.bazel / WORKSPACE / WORKSPACE.bazel).`, ) - return { artifactCount: 0, manifestPaths: [], status: 'noEcosystem' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'noEcosystem', + workspaceOutcomes: [], + } } if (verbose) { logger.log( @@ -550,15 +795,31 @@ export async function extractBazelToMaven( for (const workspaceRoot of workspaceRoots) { const relPath = path.relative(cwd, workspaceRoot) + const hubOutcomes: HubOutcome[] = [] let mode: WorkspaceMode try { mode = detectWorkspaceMode(workspaceRoot) } catch (e) { + // A workspace we cannot even read is a load failure, NOT "no Maven + // here": record it so the run is flagged not-complete (a hard failure + // when nothing else was analyzable, partial otherwise) rather than + // silently skipped. + const reason = getErrorCause(e) if (verbose) { logger.log( - `[VERBOSE] workspace ${workspaceRoot}: detect failed (${getErrorCause(e)}); skipping`, + `[VERBOSE] workspace ${workspaceRoot}: load failed (${reason})`, ) } + logger.warn( + `Workspace ${relPath || '.'}: failed to load (${reason}); it could not be analyzed.`, + ) + anyWorkspaceLoadFailed = true + workspaceOutcomes.push({ + hubs: [], + load: 'failed', + reason, + relPath, + }) continue } logger.info( @@ -576,19 +837,74 @@ export async function extractBazelToMaven( verbose, }) - // eslint-disable-next-line no-await-in-loop - const candidates = await discoverCandidatesForWorkspace( - workspaceRoot, - mode, - queryOptsFor(outputUserRoot), - verbose, - ) + const { candidates, discoveryIndeterminate, indeterminateProbes } = + // eslint-disable-next-line no-await-in-loop + await discoverCandidatesForWorkspace( + workspaceRoot, + mode, + queryOptsFor(outputUserRoot), + verbose, + ) + // Authoritative hub enumeration failed to execute (e.g. `bazel mod + // show_extension` errored under Bzlmod): custom-named hubs may have been + // missed, so the run can never be complete. Record it as an + // indeterminate hub outcome under a synthetic name so the completeness + // signal carries the gap. + if (discoveryIndeterminate) { + anyIndeterminate = true + hubOutcomes.push({ + hub: '(enumeration)', + reason: 'show-extension-failed', + state: 'indeterminate', + }) + logger.warn( + `Workspace ${relPath || '.'}: Maven hub enumeration failed; custom-named hubs may be missing. The run is reported known-incomplete.`, + ) + } + for (const indeterminate of indeterminateProbes) { + anyIndeterminate = true + hubOutcomes.push({ + hub: indeterminate, + reason: 'probe-indeterminate', + state: 'indeterminate', + }) + } logger.info( `Workspace ${relPath || '.'}: discovered ${candidates.length} Maven repo(s): ${ candidates.join(', ') || '(none)' }`, ) for (const repoName of candidates) { + // Committed-lockfile gate: the server-side walker already ingests any + // committed maven_install.json / _maven_install.json under the + // workspace; the CLI's synthetic manifest is the COMPLEMENT, not a + // duplicate. Skip emitting when a committed lockfile already covers + // this hub. A skip is a successful no-op, so it runs BEFORE + // `anyRepos` is flipped (which marks "a hub we needed to extract"). + const committed = committedLockfileCovers({ + fileName: hubManifestFileName(repoName), + manifestDir, + workspaceRoot, + }) + if (committed) { + anyHubCoveredByLockfile = true + logger.info( + `@${repoName}: committed lockfile already covers this hub (${path.relative(cwd, committed) || committed}); skipping synthetic manifest.`, + ) + hubOutcomes.push({ + hub: repoName, + reason: 'committed-lockfile', + state: 'skipped-lockfile', + }) + if (verbose) { + logger.log( + `[VERBOSE] @${repoName}: skipped (committed lockfile at ${committed})`, + ) + } + continue + } + // We are about to extract this hub: it is a real candidate we must + // analyze, so mark the ecosystem present. anyRepos = true if (verbose) { logger.log( @@ -608,6 +924,11 @@ export async function extractBazelToMaven( `@${repoName}: cquery timed out after ${perRepoTimeoutMs}ms; reaping server`, ) hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'cquery-timeout', + state: 'failed', + }) // eslint-disable-next-line no-await-in-loop await reapBazelServer(bin, outputUserRoot, verbose) // eslint-disable-next-line no-await-in-loop @@ -624,6 +945,11 @@ export async function extractBazelToMaven( if (result.status === 'error') { logger.warn(`@${repoName}: cquery failed; skipping this hub`) hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'cquery-error', + state: 'failed', + }) continue } // A scan must never silently upload a graph missing edges it knows @@ -661,6 +987,11 @@ export async function extractBazelToMaven( `@${repoName}: failed to write manifest (${getErrorCause(e)}); skipping this hub`, ) hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'manifest-write-failed', + state: 'failed', + }) continue } if (written.droppedArtifacts.length) { @@ -680,8 +1011,14 @@ export async function extractBazelToMaven( totalArtifacts += written.artifactCount if (hubPartial) { hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'incomplete-graph', + state: 'failed', + }) } else { hubsSucceeded += 1 + hubOutcomes.push({ hub: repoName, state: 'populated' }) } if (verbose) { logger.log( @@ -693,6 +1030,13 @@ export async function extractBazelToMaven( // edges were dropped the partial signal still applies. if (hubPartial) { hubsFailed += 1 + hubOutcomes.push({ + hub: repoName, + reason: 'incomplete-graph', + state: 'failed', + }) + } else { + hubOutcomes.push({ hub: repoName, state: 'empty' }) } if (verbose) { logger.log( @@ -701,35 +1045,123 @@ export async function extractBazelToMaven( } } } + workspaceOutcomes.push({ + hubs: hubOutcomes, + load: 'loaded', + relPath, + }) + if (verbose) { + for (const outcome of hubOutcomes) { + logger.log( + `[VERBOSE] workspace ${relPath || '.'} hub @${outcome.hub}: ${outcome.state}${ + outcome.reason ? ` (${outcome.reason})` : '' + }`, + ) + } + } } if (!manifestPaths.length) { - if (!anyRepos) { + // Every discovered hub was already covered by a committed lockfile and + // nothing else needed extraction: writing zero synthetic manifests is + // the CORRECT complement, not a failure. The run is complete only when + // no workspace failed to load and no probe was indeterminate. + if ( + anyHubCoveredByLockfile && + !anyRepos && + !anyWorkspaceLoadFailed && + !anyIndeterminate + ) { + logger.success( + 'All discovered Maven hub(s) are already covered by committed lockfiles; nothing to generate.', + ) + await writeCompletenessSummary({ + artifactCount: 0, + complete: true, + manifestDir, + manifestPaths: [], + status: 'complete', + verbose, + workspaceOutcomes, + }) + return { + artifactCount: 0, + complete: true, + manifestPaths: [], + status: 'complete', + workspaceOutcomes, + } + } + // Nothing was emitted. If nothing was analyzable at all (no repos to + // extract, no committed-lockfile coverage, no workspace load failure, no + // indeterminate probe) this is a genuine absence; otherwise it's a hard + // failure — something was present but we could not extract it. + if ( + !anyRepos && + !anyWorkspaceLoadFailed && + !anyIndeterminate && + !anyHubCoveredByLockfile + ) { if (verbose) { logger.info( 'No Maven artifacts extracted. failureCategory=no-supported-ecosystem', ) } - return { artifactCount: 0, manifestPaths: [], status: 'noEcosystem' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'noEcosystem', + workspaceOutcomes, + } } logger.fail( - 'Discovered Maven repo(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty', + 'Discovered or partially analyzed Maven workspace(s) but wrote zero manifests. failureCategory=ecosystem-detected-but-empty', ) - return { artifactCount: 0, manifestPaths: [], status: 'hardFailure' } + await writeCompletenessSummary({ + artifactCount: 0, + complete: false, + manifestDir, + manifestPaths: [], + status: 'hardFailure', + verbose, + workspaceOutcomes, + }) + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'hardFailure', + workspaceOutcomes, + } } - const status: ExtractBazelStatus = hubsFailed ? 'partial' : 'complete' + // Manifests were written, so the run is not a hard failure. It is only + // `complete` when every queried hub succeeded cleanly AND no workspace + // failed to load AND no probe was indeterminate; any of those means the + // emitted SBOM is known-incomplete (partial under the hybrid rule). + const knownIncomplete = + hubsFailed > 0 || anyWorkspaceLoadFailed || anyIndeterminate + const status: ExtractBazelStatus = knownIncomplete ? 'partial' : 'complete' if (status === 'complete') { logger.success( `Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total.`, ) } else { + const loadNote = anyWorkspaceLoadFailed + ? ', at least one workspace failed to load' + : '' + const indetNote = anyIndeterminate + ? ', at least one hub could not be classified' + : '' logger.warn( - `Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete.`, + `Wrote ${manifestPaths.length} manifest(s), ${totalArtifacts} artifact(s) total — partial run: ${hubsSucceeded} hub(s) succeeded, ${hubsFailed} failed or incomplete${loadNote}${indetNote}. The uploaded SBOM is known-incomplete.`, ) } if (verbose) { logger.log('[VERBOSE] outputs:', { + anyIndeterminate, + anyWorkspaceLoadFailed, artifactCount: totalArtifacts, hubsFailed, hubsSucceeded, @@ -738,7 +1170,22 @@ export async function extractBazelToMaven( status, }) } - return { artifactCount: totalArtifacts, manifestPaths, status } + await writeCompletenessSummary({ + artifactCount: totalArtifacts, + complete: status === 'complete', + manifestDir, + manifestPaths, + status, + verbose, + workspaceOutcomes, + }) + return { + artifactCount: totalArtifacts, + complete: status === 'complete', + manifestPaths, + status, + workspaceOutcomes, + } } catch (e) { logger.fail(`Unexpected error in bazel2maven: ${getErrorCause(e)}`) if (verbose) { @@ -748,7 +1195,13 @@ export async function extractBazelToMaven( } else { logger.info('Re-run with --verbose for the full stack.') } - return { artifactCount: 0, manifestPaths: [], status: 'hardFailure' } + return { + artifactCount: 0, + complete: false, + manifestPaths: [], + status: 'hardFailure', + workspaceOutcomes, + } } finally { for (const dir of mintedRoots) { // eslint-disable-next-line no-await-in-loop diff --git a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts index 9b4a0fbcf..6b50ac7df 100644 --- a/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts +++ b/src/commands/manifest/bazel/extract_bazel_to_maven.test.mts @@ -509,7 +509,8 @@ Fetched repositories: outLayout: 'flat', verbose: false, }) - const call = vi.mocked(findWorkspaceRoots).mock.calls.at(-1)![0] + const calls = vi.mocked(findWorkspaceRoots).mock.calls + const call = calls[calls.length - 1]![0] const names = [...(call.ignoreDirNames ?? [])] expect(names).toContain('node_modules') expect(names).toContain('.git') @@ -536,7 +537,8 @@ Fetched repositories: outLayout: 'flat', verbose: false, }) - const call = vi.mocked(findWorkspaceRoots).mock.calls.at(-1)![0] + const calls = vi.mocked(findWorkspaceRoots).mock.calls + const call = calls[calls.length - 1]![0] const names = [...(call.ignoreDirNames ?? [])] expect(names).toEqual( expect.arrayContaining(['node_modules', 'custom_dir']), @@ -697,6 +699,583 @@ Fetched repositories: } }) + it('flags partial (never complete) when a probe is indeterminate but another hub succeeds', async () => { + // WORKSPACE mode so the conventional-name probe runs. `maven` succeeds and + // extracts; `maven_install` probe returns an unrecognized non-zero exit + // (indeterminate). The run must be partial, never complete, and carry the + // completeness signal. + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: false, + workspace: true, + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'maven') { + return { code: 0, stdout: '@maven//:foo\n', stderr: '' } + } + if (name === 'maven_install') { + // Unrecognized non-zero exit -> indeterminate. + return { code: 1, stdout: '', stderr: 'bazel internal error\n' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toHaveLength(1) + // The indeterminate hub is recorded in the completeness signal. + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).toContain('indeterminate') + }) + + it('hard-fails (never complete) when the only probe is indeterminate and nothing extracts', async () => { + vi.mocked(detectWorkspaceMode).mockReturnValue({ + bzlmod: false, + workspace: true, + }) + // Every conventional name probe returns an unrecognized non-zero exit. + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: 'bazel internal error\n', + })) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // Nothing analyzable was produced, but a probe was indeterminate, so this + // is a hard failure, NOT noEcosystem (which would imply "no Maven here"). + expect(result.status).toBe('hardFailure') + expect(result.complete).toBe(false) + }) + + it('skips emitting a hub manifest when a committed lockfile already covers it', async () => { + // A committed maven_install.json under the workspace means the server-side + // walker already ingests it; the CLI must NOT re-emit a synthetic copy. + writeFileSync( + path.join(tmp, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // The hub was skipped, so no synthetic manifest and the cquery never runs. + expect(result.manifestPaths).toHaveLength(0) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toContain('maven') + }) + + it('extracts the root hub even when a nested dir holds a maven_install.json (no any-depth match)', async () => { + // The root @maven is UNCOVERED: there is no maven_install.json directly in + // the workspace root. A nested fixture/example holds its own + // maven_install.json, which covers ITS workspace, not the root hub. An + // any-depth gate would wrongly judge the root hub covered, skip its + // synthetic emit, and silently drop its distinct coordinates. The gate is + // depth-0, so the root hub must still be extracted. + const nested = path.join(tmp, 'examples', 'nested') + mkdirSync(nested, { recursive: true }) + writeFileSync( + path.join(nested, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:rootonly:1.0', 'rootonly')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // The root hub was NOT skipped: cquery ran and the synthetic manifest + // carrying the root's distinct coordinate was emitted. + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + expect(result.manifestPaths).toHaveLength(1) + const manifest = readManifest(tmp) as { + artifacts: Record + } + expect(Object.keys(manifest.artifacts)).toEqual(['com.example:rootonly']) + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toEqual([]) + }) + + it('reports complete:true with zero synthetic manifests when every hub is covered by a committed root-level lockfile', async () => { + // A committed maven_install.json sits directly in the workspace root, so + // the only discovered hub is covered. The CLI writes zero synthetic + // manifests and the run must headline complete:true. + writeFileSync( + path.join(tmp, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('complete') + expect(result.complete).toBe(true) + expect(result.manifestPaths).toHaveLength(0) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + // The emitted completeness summary also headlines complete:true. + const summary = JSON.parse( + readFileSync( + path.join( + tmp, + '.socket-auto-manifest', + 'socket-bazel-manifest-summary.json', + ), + 'utf8', + ), + ) as { complete: boolean; status: string } + expect(summary.complete).toBe(true) + expect(summary.status).toBe('complete') + }) + + it('does not treat a prior-run synthetic manifest in the output dir as a committed lockfile', async () => { + // A previous run left a synthetic maven_install.json inside the output dir + // (.socket-auto-manifest). A later run must NOT read it as a committed + // lockfile and skip the hub; it must re-extract. + const outputDir = path.join(tmp, '.socket-auto-manifest') + mkdirSync(outputDir, { recursive: true }) + writeFileSync( + path.join(outputDir, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + // The stale synthetic file did not gate the hub: cquery ran and a manifest + // was emitted. + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + expect(result.manifestPaths).toHaveLength(1) + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toEqual([]) + }) + + it('maps a hub named maven to maven_install.json for the committed-lockfile gate', async () => { + // The default `maven` hub is covered by a committed `maven_install.json`. + writeFileSync( + path.join(tmp, 'maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toContain('maven') + }) + + it('maps a non-default hub to _maven_install.json for the committed-lockfile gate', async () => { + // A non-default hub `maven_dev` is covered only by a committed file named + // `maven_dev_maven_install.json`. A bare `maven_install.json` must NOT + // cover it, and the prefixed file must. + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: `## @@rules_jvm_external+//:extensions.bzl%maven: + +Fetched repositories: + - maven_dev (imported by ) +`, + stderr: '', + }) + writeFileSync( + path.join(tmp, 'maven_dev_maven_install.json'), + JSON.stringify({ artifacts: {}, dependencies: {} }), + 'utf8', + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(runMetadataCqueryForRepo).not.toHaveBeenCalled() + const skipped = result.workspaceOutcomes.flatMap(w => + w.hubs.filter(h => h.state === 'skipped-lockfile').map(h => h.hub), + ) + expect(skipped).toContain('maven_dev') + }) + + it('flags partial (never complete) when show_extension fails to evaluate the module graph but a probed hub extracts', async () => { + // show_extension hit a genuine module-graph EVALUATION failure (not merely + // "rules_jvm_external isn't a dependency"): authoritative hub enumeration + // is indeterminate, so custom-named hubs may have been missed. The + // conventional probe still finds @maven and extracts it, but the run must + // be partial — never silently complete. + // NOTE: exact bazel stderr wording for an eval failure should be confirmed + // against a live bazel run (sandbox blocks bazel here). + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 1, + stdout: '', + stderr: + "ERROR: Error evaluating MODULE.bazel: name 'PYTHON_VERSION' is not defined\n", + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'maven') { + return { code: 0, stdout: '@maven//:foo\n', stderr: '' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toHaveLength(1) + // The failed enumeration is recorded as an indeterminate hub outcome. + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).toContain('indeterminate') + }) + + it('stays complete when show_extension runs cleanly and finds no maven extension (legitimate not-defined)', async () => { + // show_extension ran fine (code 0) but the report has no maven section, so + // the parse yields zero hubs. This is the legitimate "no maven extension + // defined" case — NOT an execution failure. The conventional probe then + // finds @maven and extracts it; the run is complete (no indeterminate + // enumeration outcome). + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 0, + stdout: 'No extensions defined.\n', + stderr: '', + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async (name: string) => { + if (name === 'maven') { + return { code: 0, stdout: '@maven//:foo\n', stderr: '' } + } + return { + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('complete') + expect(result.complete).toBe(true) + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).not.toContain('indeterminate') + }) + + it('reports noEcosystem (never hard-fails) when show_extension exits non-zero on a no-Maven bzlmod repo and nothing extracts', async () => { + // `bazel mod show_extension @rules_jvm_external//:extensions.bzl%maven` + // exits non-zero on EVERY bzlmod repo that doesn't depend on + // rules_jvm_external — its argument resolution throws before any Starlark + // runs. This generic non-zero exit (no eval-failure signature) is the + // common no-Maven case, NOT a failed enumeration. With no probed hub + // populating, the run is a clean noEcosystem — it must NOT hard-fail, which + // would abort the user's entire `scan create --auto-manifest`. + // NOTE: exact bazel stderr wording should be confirmed against a live bazel + // run (sandbox blocks bazel here). + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 1, + stdout: '', + stderr: + "ERROR: In extension argument '@rules_jvm_external//:extensions.bzl%maven': module 'rules_jvm_external' is not a dependency of the root module\n", + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + })) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('noEcosystem') + expect(result.complete).toBe(false) + // No hub was flagged indeterminate: the non-zero exit was correctly read as + // "no maven extension here", not a failed enumeration. + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).not.toContain('indeterminate') + }) + + it('hard-fails (never complete) when show_extension fails to evaluate the module graph and nothing extracts', async () => { + // A genuine module-graph evaluation failure (Starlark eval error / unbound + // name) leaves hub enumeration indeterminate. With no probed hub + // populating, nothing analyzable was produced — and because enumeration was + // indeterminate this is NOT a clean "no Maven here", so it must be a hard + // failure, never complete and never silently noEcosystem. + // NOTE: exact bazel stderr wording should be confirmed against a live bazel + // run (sandbox blocks bazel here). + vi.mocked(runBazelModShowMavenExtension).mockResolvedValue({ + code: 1, + stdout: '', + stderr: + "ERROR: Error evaluating MODULE.bazel: name 'pip' is not defined\n", + }) + vi.mocked(buildMavenProbeFor).mockReturnValue(async () => ({ + code: 1, + stdout: '', + stderr: "ERROR: No repository visible as '@x' from main repository\n", + })) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('hardFailure') + expect(result.complete).toBe(false) + const hubStates = result.workspaceOutcomes.flatMap(w => + w.hubs.map(h => h.state), + ) + expect(hubStates).toContain('indeterminate') + }) + + it('never reports complete when one workspace fails to load while another extracts', async () => { + // Two workspaces: the first loads and extracts cleanly; the second throws + // on load (e.g. an unreadable MODULE.bazel). A load failure is NOT "no + // Maven here" — the run must be partial (a manifest was written), never + // complete. + const nested = path.join(tmp, 'broken') + mkdirSync(nested, { recursive: true }) + vi.mocked(findWorkspaceRoots).mockReturnValue([tmp, nested]) + vi.mocked(detectWorkspaceMode).mockImplementation((root: string) => { + if (root === nested) { + throw new Error('unbound variable in MODULE.bazel') + } + return { bzlmod: true, workspace: false } + }) + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toHaveLength(1) + const loadFailed = result.workspaceOutcomes.filter( + w => w.load === 'failed', + ) + expect(loadFailed).toHaveLength(1) + }) + + it('hard-fails (never complete) when the only workspace fails to load', async () => { + // A single workspace that cannot be read produces zero manifests. This is + // a load failure, not noEcosystem — it must be a hard failure, never + // complete and never silently "no Maven here". + vi.mocked(detectWorkspaceMode).mockImplementation(() => { + throw new Error('unbound variable in MODULE.bazel') + }) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('hardFailure') + expect(result.complete).toBe(false) + expect(result.manifestPaths).toEqual([]) + }) + + it('still emits a synthetic manifest when no committed lockfile covers the hub', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.manifestPaths).toHaveLength(1) + expect(runMetadataCqueryForRepo).toHaveBeenCalledTimes(1) + }) + + it('writes a completeness summary carrying the machine-readable signal', async () => { + vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( + mkResult({ + artifacts: [mkArt('com.example:a:1.0', 'a')], + repoName: 'maven', + status: 'partial', + unresolvedLabels: ['@maven//:missing'], + }), + ) + const result = await extractBazelToMaven({ + bazelFlags: undefined, + bazelOutputBase: undefined, + bazelRc: undefined, + bin: undefined, + cwd: tmp, + out: tmp, + outLayout: 'flat', + verbose: false, + }) + expect(result.status).toBe('partial') + const summary = JSON.parse( + readFileSync( + path.join( + tmp, + '.socket-auto-manifest', + 'socket-bazel-manifest-summary.json', + ), + 'utf8', + ), + ) as { complete: boolean; status: string; workspaces: unknown[] } + expect(summary.complete).toBe(false) + expect(summary.status).toBe('partial') + expect(Array.isArray(summary.workspaces)).toBe(true) + }) + it('writes maven_install.json into .socket-auto-manifest in flat layout', async () => { vi.mocked(runMetadataCqueryForRepo).mockResolvedValueOnce( mkResult({ diff --git a/src/commands/manifest/generate_auto_manifest.mts b/src/commands/manifest/generate_auto_manifest.mts index 663891793..0794be55b 100644 --- a/src/commands/manifest/generate_auto_manifest.mts +++ b/src/commands/manifest/generate_auto_manifest.mts @@ -154,8 +154,24 @@ export async function generateAutoManifest({ if (mavenResult.status === 'complete' || mavenResult.status === 'partial') { generatedFiles.push(...mavenResult.manifestPaths) if (mavenResult.status === 'partial') { + // Hybrid handling: still upload the partial SBOM, but be loud AND + // leave a machine-readable trail. The extractor writes a completeness + // summary (complete=false + per-hub/workspace breakdown) into the + // manifest dir; that summary is the structured signal a downstream + // consumer reads to know this upload is known-incomplete. + const incomplete = mavenResult.workspaceOutcomes + .flatMap(w => + w.load === 'failed' + ? [`${w.relPath || '.'} (workspace load failed)`] + : w.hubs + .filter( + h => h.state === 'failed' || h.state === 'indeterminate', + ) + .map(h => `${w.relPath || '.'}@${h.hub} (${h.state})`), + ) + .join(', ') logger.warn( - `Bazel Maven manifest generation was partial (${mavenResult.manifestPaths.length} manifest(s) written); some hubs failed or had incomplete dependency graphs. Uploading what was generated.`, + `WARNING: Bazel Maven manifest generation was PARTIAL (${mavenResult.manifestPaths.length} manifest(s) written); the uploaded SBOM is known-incomplete and may under-report dependencies. Incomplete: ${incomplete || 'see completeness summary'}. Uploading what was generated.`, ) } } else { diff --git a/src/commands/manifest/generate_auto_manifest.test.mts b/src/commands/manifest/generate_auto_manifest.test.mts index f8ecf97af..fe2e59235 100644 --- a/src/commands/manifest/generate_auto_manifest.test.mts +++ b/src/commands/manifest/generate_auto_manifest.test.mts @@ -4,8 +4,10 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' vi.mock('./bazel/extract_bazel_to_maven.mts', () => ({ extractBazelToMaven: vi.fn(async () => ({ artifactCount: 1, + complete: true, manifestPaths: ['/tmp/repo/.socket-auto-manifest/maven_install.json'], status: 'complete', + workspaceOutcomes: [], })), })) vi.mock('./convert_gradle_to_maven.mts', () => ({ @@ -27,6 +29,8 @@ vi.mock('../../utils/socket-json.mts', () => ({ readOrDefaultSocketJson: vi.fn(() => ({})), })) +import { logger } from '@socketsecurity/registry/lib/logger' + import { extractBazelToMaven } from './bazel/extract_bazel_to_maven.mts' import { convertGradleToFacts } from './convert-gradle-to-facts.mts' import { convertGradleToMaven } from './convert_gradle_to_maven.mts' @@ -52,8 +56,10 @@ describe('generateAutoManifest — bazel branch', () => { vi.mocked(readOrDefaultSocketJson).mockReturnValue({} as SocketJson) vi.mocked(extractBazelToMaven).mockResolvedValue({ artifactCount: 1, + complete: true, manifestPaths: ['/tmp/repo/.socket-auto-manifest/maven_install.json'], status: 'complete', + workspaceOutcomes: [], }) }) @@ -151,8 +157,10 @@ describe('generateAutoManifest — bazel branch', () => { it('does not run PyPI by default when Maven has no discovery', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 0, + complete: false, manifestPaths: [], status: 'noEcosystem', + workspaceOutcomes: [], }) const result = await generateAutoManifest({ cwd: '/tmp/repo', @@ -167,8 +175,10 @@ describe('generateAutoManifest — bazel branch', () => { it('throws when Maven hard-fails', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 0, + complete: false, manifestPaths: [], status: 'hardFailure', + workspaceOutcomes: [], }) await expect( generateAutoManifest({ @@ -185,8 +195,10 @@ describe('generateAutoManifest — bazel branch', () => { it('does NOT throw when Maven has no discovery', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 0, + complete: false, manifestPaths: [], status: 'noEcosystem', + workspaceOutcomes: [], }) const result = await generateAutoManifest({ cwd: '/tmp/repo', @@ -198,25 +210,44 @@ describe('generateAutoManifest — bazel branch', () => { expect(result.generatedFiles).toEqual([]) }) - it('pushes manifests and warns on a partial Maven run', async () => { + it('pushes the partial manifests and warns loudly with the incompleteness detail', async () => { vi.mocked(extractBazelToMaven).mockResolvedValueOnce({ artifactCount: 2, + complete: false, manifestPaths: [ '/tmp/repo/.socket-auto-manifest/maven_install.json', '/tmp/repo/.socket-auto-manifest/sub/maven_install.json', ], status: 'partial', + workspaceOutcomes: [ + { + hubs: [{ hub: 'maven', reason: 'cquery-timeout', state: 'failed' }], + load: 'loaded', + relPath: 'sub', + }, + ], }) - const result = await generateAutoManifest({ - cwd: '/tmp/repo', - detected: { ...baseDetected, bazel: true, count: 1 }, - outputKind: 'text', - verbose: false, - }) - expect(result.generatedFiles).toEqual([ - '/tmp/repo/.socket-auto-manifest/maven_install.json', - '/tmp/repo/.socket-auto-manifest/sub/maven_install.json', - ]) + const warnSpy = vi.spyOn(logger, 'warn').mockImplementation(() => logger) + try { + const result = await generateAutoManifest({ + cwd: '/tmp/repo', + detected: { ...baseDetected, bazel: true, count: 1 }, + outputKind: 'text', + verbose: false, + }) + // Hybrid: the partial SBOM is still uploaded. + expect(result.generatedFiles).toEqual([ + '/tmp/repo/.socket-auto-manifest/maven_install.json', + '/tmp/repo/.socket-auto-manifest/sub/maven_install.json', + ]) + const warned = warnSpy.mock.calls.map(c => String(c[0])).join('\n') + expect(warned).toMatch(/PARTIAL/) + expect(warned).toMatch(/known-incomplete/) + // The structured outcome detail surfaces the failing hub. + expect(warned).toMatch(/sub@maven \(failed\)/) + } finally { + warnSpy.mockRestore() + } }) it('runs BOTH bazel and gradle branches when both are detected', async () => { diff --git a/src/utils/socket-json.mts b/src/utils/socket-json.mts index 5bbbb21d4..e8c7ad257 100644 --- a/src/utils/socket-json.mts +++ b/src/utils/socket-json.mts @@ -47,6 +47,7 @@ export interface SocketJson { bin?: string | undefined disabled?: boolean | undefined out?: string | undefined + perRepoTimeout?: number | undefined verbose?: boolean | undefined } conda?: {