From 0788f6f152c0f54562d0aba154e7e9a590320fc9 Mon Sep 17 00:00:00 2001 From: igerber Date: Mon, 22 Jun 2026 16:01:09 -0400 Subject: [PATCH] fix: clamp negative vcov diagonal in LinearRegression SE (no NaN SE) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A high-leverage / degenerate coefficient (e.g. an absorbed-FE dummy near-collinear with the treatment, whose Bell-McCaffrey Satterthwaite DOF already hits the noise-floor guard) can have a CR2/HC variance of ~0 (≈1e-32) whose vcov diagonal lands just-below-zero under BLAS-dependent float rounding. `np.sqrt` of the negative then produced a NaN SE NONDETERMINISTICALLY — passing single-threaded but failing only under the parallel pure-Python full-suite run (the Pure Python Fallback CI leg), on test_methodology_wls_cr2.py::TestLinearRegressionFENanGuardEndToEnd. - linalg.py: clamp the vcov diagonal at 0 before sqrt in both SE sites (get_se, get_inference). No-op for positive variances; the SE is now finite (0 for a genuinely-zero variance), deterministic, BLAS-independent. - test: relax the guarded-coef assertion to finite & >= 0 (rounding-robust) + a deterministic regression test (a tiny-negative diagonal yields a finite 0 SE, not NaN). - REGISTRY (Variance Estimation, Cluster-Robust SE) + CHANGELOG documented. Pre-existing failure surfaced by PR #539's labeled full-suite run; main does not run the Pure Python Fallback leg outside labeled PRs. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 3 +++ diff_diff/linalg.py | 11 +++++++-- docs/methodology/REGISTRY.md | 7 ++++++ tests/test_methodology_wls_cr2.py | 38 ++++++++++++++++++++++++++++++- 4 files changed, 56 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b1f0e7cd4..699849364 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- **`LinearRegression.get_se()` / `get_inference()` no longer return a `NaN` standard error from a tiny-negative variance artifact.** A high-leverage / degenerate coefficient (e.g. an absorbed-FE dummy near-collinear with the treatment, whose Bell-McCaffrey Satterthwaite DOF already hits the noise-floor guard) can have a CR2/HC variance of ~0 (≈1e-32) whose vcov diagonal lands just-below-zero under BLAS-dependent float rounding; `np.sqrt` of the negative then produced a `NaN` SE **nondeterministically** — passing single-threaded but failing under the parallel pure-Python full-suite run (`tests/test_methodology_wls_cr2.py::TestLinearRegressionFENanGuardEndToEnd::test_did_absorbed_fe_lr_inference_nan_for_guarded_coefs`). Both SE sites now clamp the vcov diagonal at 0, so the SE is finite (0 for a genuinely-zero variance), deterministic, and BLAS-independent. **No change for any positive variance** (the clamp is a no-op there); only the previously-`NaN` degenerate case is affected. + ## [3.5.2] - 2026-06-08 ### Added diff --git a/diff_diff/linalg.py b/diff_diff/linalg.py index 34c545e3a..876c39643 100644 --- a/diff_diff/linalg.py +++ b/diff_diff/linalg.py @@ -3865,7 +3865,13 @@ def get_se(self, index: int) -> float: """ self._check_fitted() assert self.vcov_ is not None - return float(np.sqrt(self.vcov_[index, index])) + # Clamp a tiny-negative variance artifact at 0 before sqrt. A high-leverage + # / degenerate coefficient (e.g. an absorbed-FE dummy near-collinear with the + # treatment) can have a CR2/HC variance of ~0 that lands just below zero under + # BLAS-dependent float rounding; without the clamp `np.sqrt` returns NaN + # nondeterministically (passes single-threaded, fails under parallel test + # load). The SE is then finite — 0 for a genuinely-zero variance. + return float(np.sqrt(max(float(self.vcov_[index, index]), 0.0))) def get_inference( self, @@ -3908,7 +3914,8 @@ def get_inference( assert self.vcov_ is not None coef = float(self.coefficients_[index]) - se = float(np.sqrt(self.vcov_[index, index])) + # See get_se: clamp a tiny-negative variance artifact at 0 so SE is finite, not NaN. + se = float(np.sqrt(max(float(self.vcov_[index, index]), 0.0))) # Use instance alpha if not provided effective_alpha = alpha if alpha is not None else self.alpha diff --git a/docs/methodology/REGISTRY.md b/docs/methodology/REGISTRY.md index 9a9c65d2b..7c7c27407 100644 --- a/docs/methodology/REGISTRY.md +++ b/docs/methodology/REGISTRY.md @@ -186,6 +186,13 @@ where V is the VCV sub-matrix for post-treatment δ_e coefficients. DiD panel sizes (n ≤ few thousand); tracked in `TODO.md` under Performance for a follow-up that plumbs the contrast DOF through the existing CR2 vcov path or shares precomputes. +- **Note:** `LinearRegression.get_se()` / `get_inference()` clamp the vcov diagonal at 0 + before `sqrt`. A high-leverage / degenerate coefficient (an absorbed-FE dummy + near-collinear with the treatment, whose Satterthwaite DOF already hits the noise-floor + guard) has a CR2/HC variance of ~0 (≈1e-32) that can land just-below-zero under + BLAS-dependent rounding; the clamp keeps the SE finite (0 for a genuinely-zero variance) + and deterministic across BLAS implementations, never `NaN`. No effect on any positive + variance. Regression: `tests/test_methodology_wls_cr2.py::TestLinearRegressionFENanGuardEndToEnd`. - Optional: Wild cluster bootstrap (complex for multi-coefficient testing; requires joint bootstrap distribution) - Degrees of freedom adjusted for absorbed fixed effects diff --git a/tests/test_methodology_wls_cr2.py b/tests/test_methodology_wls_cr2.py index ab19bc3d5..29feaf02b 100644 --- a/tests/test_methodology_wls_cr2.py +++ b/tests/test_methodology_wls_cr2.py @@ -775,7 +775,13 @@ def test_did_absorbed_fe_lr_inference_nan_for_guarded_coefs(self, goldens): np.isnan(b) for b in inf_i.conf_int ), f"NaN guard must produce NaN conf_int at index {i}; got {inf_i.conf_int}" # SE and coefficient remain valid (vcov matches at machine precision). - assert np.isfinite(inf_i.se) and inf_i.se > 0 + # These guarded high-leverage FE dummies have a genuinely-~0 CR2 + # variance (~1e-32); the SE is therefore ~0 and may clamp to exactly 0 + # when the tiny diagonal lands just-negative under BLAS-dependent + # rounding (get_se/get_inference clamp at 0 so the SE is finite, never + # NaN — previously this produced a nondeterministic NaN that failed + # only under the parallel pure-Python full-suite run). + assert np.isfinite(inf_i.se) and inf_i.se >= 0 assert np.isfinite(inf_i.coefficient) # Non-guarded coefficients still emit finite inference. @@ -789,6 +795,36 @@ def test_did_absorbed_fe_lr_inference_nan_for_guarded_coefs(self, goldens): assert np.isfinite(inf_i.p_value) assert all(np.isfinite(b) for b in inf_i.conf_int) + def test_negative_variance_artifact_yields_finite_se_not_nan(self): + """A tiny-negative vcov diagonal (numerical artifact of a ~0 variance for a + degenerate/high-leverage coefficient) must clamp to a finite SE, never NaN. + + Regression for the pure-Python full-suite flake: the guarded FE-dummy CR2 + variances are ~1e-32 and tip just-below-zero under BLAS-dependent rounding, + so `np.sqrt(vcov[i,i])` returned NaN nondeterministically (passed single- + threaded, failed under the parallel `-n auto` run). get_se / get_inference + now clamp the diagonal at 0 so the SE is finite (0 for a genuinely-0 variance). + """ + from diff_diff.linalg import LinearRegression + + rng = np.random.default_rng(7) + n = 40 + X = np.column_stack([np.ones(n), rng.normal(size=n)]) + y = X @ np.array([1.0, 2.0]) + rng.normal(size=n) + lr = LinearRegression(include_intercept=False) + lr.fit(X, y) + assert lr.vcov_ is not None + + # Sanity: the unperturbed SE is finite and positive. + assert np.isfinite(lr.get_se(1)) and lr.get_se(1) > 0 + + # Inject a tiny-negative diagonal artifact (as parallel-load rounding can). + lr.vcov_[1, 1] = -1e-30 + se = lr.get_se(1) + assert np.isfinite(se) and se == 0.0, f"expected finite 0 SE, got {se}" + inf = lr.get_inference(1) + assert np.isfinite(inf.se) and inf.se == 0.0, f"expected finite 0 SE, got {inf.se}" + class TestUnweightedRegressionStillBitEqual: """Regression safety: existing unweighted goldens must still match bit-equal."""