From 9313df452ee0e7241dd7b1e082364b71120cc757 Mon Sep 17 00:00:00 2001 From: Rohan Borkar Date: Thu, 4 Jun 2026 11:07:52 -0700 Subject: [PATCH 1/3] Add Zstd GPU CI test infrastructure (Phase 1) Add zstdgpu_ci_tests: a thin GTest wrapper that shells out to zstdgpu_demo.exe for correctness and performance validation of GPU Zstd decompression. Tests are parameterized over .zst content files discovered at runtime via --content-path. Test cases: - SimulationCheck (--chk-gpu --chk-cpu --sim-gpu) - D3D12DebugLayer (--d3d-dbg) - ExternalMemory (--ext-mem) - GraphicsQueue (--d3d-gfx) - OverallThroughput (--prf-lvl 0) - PerStageTiming (--prf-lvl 2) Also adds scripts/generate_histogram.py for converting performance CSV output into histogram PNGs suitable for CI reporting. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- zstd/scripts/generate_histogram.py | 67 +++ zstd/zstd.sln | 14 + zstd/zstdgpu_ci_tests/main.cpp | 152 +++++++ zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp | 393 ++++++++++++++++++ zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.h | 73 ++++ .../zstdgpu_ci_tests/zstdgpu_ci_tests.vcxproj | 239 +++++++++++ 6 files changed, 938 insertions(+) create mode 100644 zstd/scripts/generate_histogram.py create mode 100644 zstd/zstdgpu_ci_tests/main.cpp create mode 100644 zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp create mode 100644 zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.h create mode 100644 zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.vcxproj diff --git a/zstd/scripts/generate_histogram.py b/zstd/scripts/generate_histogram.py new file mode 100644 index 0000000..59a1295 --- /dev/null +++ b/zstd/scripts/generate_histogram.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +""" +Generate histogram PNGs from zstdgpu_demo performance CSV output. + +Usage: + python generate_histogram.py --input --output [--title ] + +Supports two CSV formats based on profiling level: + - prf-lvl 0 (OverallThroughput): column "Throughput_GBs" + - prf-lvl 2 (PerStageTiming): column "Microseconds" +""" + +import argparse +import csv +import sys + +def try_import_matplotlib(): + try: + import matplotlib + matplotlib.use("Agg") # Non-interactive backend for CI + import matplotlib.pyplot as plt + return plt + except ImportError: + print( + "WARNING: matplotlib not installed. Install with: pip install matplotlib", + file=sys.stderr, + ) + return None + + +def main(): + parser = argparse.ArgumentParser(description="Generate histogram from zstdgpu perf CSV") + parser.add_argument("--input", required=True, help="Path to input CSV file") + parser.add_argument("--output", required=True, help="Path to output PNG file") + parser.add_argument("--title", default="Throughput", help="Chart title") + args = parser.parse_args() + + plt = try_import_matplotlib() + if plt is None: + return 1 + + data = [] + with open(args.input, newline="") as f: + reader = csv.DictReader(f) + for row in reader: + if "Throughput_GBs" in row: + data.append(float(row["Throughput_GBs"])) + elif "Microseconds" in row: + data.append(float(row["Microseconds"])) + + if not data: + print(f"No Throughput_GBs or Microseconds data found in {args.input}", file=sys.stderr) + return 1 + + plt.figure() + plt.hist(data, bins=20) + plt.xlabel("Throughput (GB/s)" if "throughput" in args.input.lower() else "Time (us)") + plt.ylabel("Count") + plt.title(args.title) + plt.savefig(args.output) + plt.close() + print(f"Generated: {args.output}") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/zstd/zstd.sln b/zstd/zstd.sln index 7c5888c..8c30eb9 100644 --- a/zstd/zstd.sln +++ b/zstd/zstd.sln @@ -11,6 +11,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdgpu_tests", "zstdgpu_te EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "googletest_static", "ThirdParty\googletest_static.vcxproj", "{49811F10-3D14-403E-859D-40DFCBB35C7B}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "zstdgpu_ci_tests", "zstdgpu_ci_tests\zstdgpu_ci_tests.vcxproj", "{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM64 = Debug|ARM64 @@ -69,6 +71,18 @@ Global {49811F10-3D14-403E-859D-40DFCBB35C7B}.Release|x64.Build.0 = Release|x64 {49811F10-3D14-403E-859D-40DFCBB35C7B}.Release|x86.ActiveCfg = Release|Win32 {49811F10-3D14-403E-859D-40DFCBB35C7B}.Release|x86.Build.0 = Release|Win32 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|ARM64.Build.0 = Debug|ARM64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x64.ActiveCfg = Debug|x64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x64.Build.0 = Debug|x64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x86.ActiveCfg = Debug|Win32 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Debug|x86.Build.0 = Debug|Win32 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|ARM64.ActiveCfg = Release|ARM64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|ARM64.Build.0 = Release|ARM64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x64.ActiveCfg = Release|x64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x64.Build.0 = Release|x64 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x86.ActiveCfg = Release|Win32 + {A1B2C3D4-E5F6-7890-ABCD-EF1234567890}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/zstd/zstdgpu_ci_tests/main.cpp b/zstd/zstdgpu_ci_tests/main.cpp new file mode 100644 index 0000000..3a52744 --- /dev/null +++ b/zstd/zstdgpu_ci_tests/main.cpp @@ -0,0 +1,152 @@ +/** + * Copyright (c) Microsoft. All rights reserved. + * This code is licensed under the MIT License (MIT). + * THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF + * ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY + * IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR + * PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. + */ + +// Entry point for the Zstd GPU CI tests. This is a thin GTest wrapper +// that shells out to zstdgpu_demo.exe to validate Zstd GPU decompression shaders. +// +// - parses custom CLI flags (--content-path, --demo-path, etc.), resolves the +// demo executable, then hands off to GTest which runs parameterized tests defined +// in zstdgpu_ci_tests.cpp. Each test spawns the demo as a child process. +// +// If no .zst content files are found, zero tests are instantiated and the test +// binary exits 0 (success). If the demo exe is missing, tests are skipped (not failed). +// +// This file also implements the TestConfig singleton and file discovery helpers +// declared in zstdgpu_ci_tests.h. + +#include "zstdgpu_ci_tests.h" +#include <gtest/gtest.h> +#include <algorithm> +#include <cstring> +#include <filesystem> +#include <iostream> +#include <string> + +// TestConfig singleton +// Implementation of the singleton declared in zstdgpu_ci_tests.h. + +static TestConfig g_testConfig; + +const TestConfig& GetTestConfig() +{ + return g_testConfig; +} + +void SetTestConfig(TestConfig config) +{ + g_testConfig = std::move(config); +} + +// File discovery + +std::vector<std::string> DiscoverZstFiles(const std::string& contentPath) +{ + std::vector<std::string> files; + + if (contentPath.empty() || !std::filesystem::exists(contentPath) || !std::filesystem::is_directory(contentPath)) + { + return files; + } + + for (const auto& entry : std::filesystem::recursive_directory_iterator(contentPath)) + { + if (entry.is_regular_file() && entry.path().extension() == ".zst") + { + files.push_back(entry.path().string()); + } + } + + std::sort(files.begin(), files.end()); + return files; +} + +// CLI and entry point + +// QOL for diagnostics. For running manually +// Activate with --help-ci to avoid conflicting with GTest's own --help output. +static void PrintUsage(const char* exe) +{ + std::cout << "Usage: " << exe << " [gtest_options] [options]\n" + << "\n" + << "Options:\n" + << " --content-path <dir> Directory containing .zst test files\n" + << " --demo-path <path> Path to zstdgpu_demo.exe\n" + << " --log-dir <dir> Directory for logs and CSV output\n" + << " --log-file <path> Consolidated text log file\n" + << " --run-count <N> Perf test iteration count (default: 40)\n" + << " --timeout <seconds> Per-test process timeout (default: no timeout)\n" + << std::endl; +} + +int main(int argc, char** argv) +{ + // Parse custom flags before handing off to GTest. GTest's InitGoogleTest() + // is called later and will consume its own flags (e.g. --gtest_filter). + TestConfig config; + + for (int i = 1; i < argc; ++i) + { + if (std::strcmp(argv[i], "--content-path") == 0 && i + 1 < argc) + { + config.contentPath = argv[++i]; + } + else if (std::strcmp(argv[i], "--demo-path") == 0 && i + 1 < argc) + { + config.demoPath = argv[++i]; + } + else if (std::strcmp(argv[i], "--log-dir") == 0 && i + 1 < argc) + { + config.logDir = argv[++i]; + } + else if (std::strcmp(argv[i], "--log-file") == 0 && i + 1 < argc) + { + config.logFile = argv[++i]; + } + else if (std::strcmp(argv[i], "--run-count") == 0 && i + 1 < argc) + { + config.runCount = std::atoi(argv[++i]); + if (config.runCount <= 0) + config.runCount = 40; + } + else if (std::strcmp(argv[i], "--timeout") == 0 && i + 1 < argc) + { + config.timeoutSeconds = std::atoi(argv[++i]); + if (config.timeoutSeconds < 0) + config.timeoutSeconds = 0; + } + else if (std::strcmp(argv[i], "--help-ci") == 0) + { + PrintUsage(argv[0]); + return 0; + } + } + + if (config.demoPath.empty()) + { + std::cerr << "Warning: --demo-path not set. Tests will skip.\n"; + } + + // Default log dir to current directory. + if (config.logDir.empty()) + { + config.logDir = std::filesystem::current_path().string(); + } + + // Ensure log directory exists. + if (!std::filesystem::exists(config.logDir)) + { + std::filesystem::create_directories(config.logDir); + } + + SetTestConfig(std::move(config)); + + testing::InitGoogleTest(&argc, argv); + testing::GTEST_FLAG(catch_exceptions) = false; + return RUN_ALL_TESTS(); +} diff --git a/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp new file mode 100644 index 0000000..7d60c86 --- /dev/null +++ b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp @@ -0,0 +1,393 @@ +/** + * Copyright (c) Microsoft. All rights reserved. + * This code is licensed under the MIT License (MIT). + * THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF + * ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY + * IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR + * PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. + */ + +// Test definitions and demo runner for the Zstd GPU CI tests. +// +// Contains a single parameterized test suite (ZstdGpuDemoTests) instantiated +// once per .zst file found in the content directory. Each file gets 6 test +// scenarios: +// +// Correctness tests (4 scenarios per file): +// - SimulationCheck: Software GPU simulation (--sim-gpu) with CPU+GPU validation +// - D3D12DebugLayer: Hardware GPU with D3D12 debug layer (--d3d-dbg) +// - ExternalMemory: External memory mode (--ext-mem) +// - GraphicsQueue: Graphics queue instead of compute (--d3d-gfx) +// +// Performance tests (2 scenarios per file): +// - OverallThroughput: Profiling level 0 — CSV: results/throughput_<stem>.csv +// - PerStageTiming: Profiling level 2 — CSV: results/stages_<stem>.csv +// Performance tests use EXPECT (not ASSERT) — they fail on infrastructure +// errors but do not check performance values against thresholds. +// +// If no .zst files are found, GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST +// prevents GTest from reporting an error — zero tests run, exit code 0. +// +// The demo runner at the bottom of this file spawns zstdgpu_demo.exe as a child +// process using Win32 CreateProcess with anonymous pipes. A background thread +// drains stdout to avoid pipe-buffer deadlocks. If the process exceeds the +// configured timeout, it is terminated. This avoids any D3D12/GPU dependency +// in the test binary itself — all GPU work happens inside the demo process. + +#include "zstdgpu_ci_tests.h" +#include <gtest/gtest.h> +#include <array> +#include <chrono> +#include <cstdio> +#include <filesystem> +#include <fstream> +#include <iostream> +#include <sstream> +#include <thread> +#include <Windows.h> + +// Helpers + +// Returns the list of .zst files to parameterize over. +// GTest evaluates this lazily when the test suite is instantiated (after main has parsed CLI args and set TestConfig), so contentPath is available here. +static std::vector<std::string> GetTestFiles() +{ + const auto& config = GetTestConfig(); + return DiscoverZstFiles(config.contentPath); +} + +// Converts a full file path to a valid GTest parameter name. +// GTest names must be alphanumeric + underscore, no leading digits. +static std::string SanitizeTestName(const testing::TestParamInfo<std::string>& info) +{ + std::string name = std::filesystem::path(info.param).stem().string(); + std::string result; + result.reserve(name.size()); + for (char c : name) + { + result += std::isalnum(static_cast<unsigned char>(c)) ? c : '_'; + } + if (!result.empty() && std::isdigit(static_cast<unsigned char>(result[0]))) + { + result = "_" + result; + } + return result.empty() ? "Unknown" : result; +} + +// Appends per-test output to the consolidated log file (--log-file). +static void WriteToLogFile(const std::string& zstFile, const DemoResult& result) +{ + const auto& config = GetTestConfig(); + if (config.logFile.empty()) + return; + + std::ofstream log(config.logFile, std::ios::app); + auto* testInfo = ::testing::UnitTest::GetInstance()->current_test_info(); + log << "=== " << testInfo->test_suite_name() << "." << testInfo->name() << " ===\n"; + log << "File: " << zstFile << "\n"; + log << "Exit code: " << result.exitCode << "\n"; + log << result.stdOut << "\n"; +} + +// Test runners + +// Run a correctness scenario. Spawns zstdgpu_demo.exe with the given .zst file and scenario flags, then asserts exit code == 0. +// Failures include the full command line and demo stdout for diagnostic output. +static void RunCorrectnessTest(const std::string& zstFile, const std::vector<std::string>& scenarioFlags) +{ + const auto& config = GetTestConfig(); + + if (config.demoPath.empty()) + { + GTEST_SKIP() << "zstdgpu_demo.exe not found. Set --demo-path."; + } + + auto args = BuildCorrectnessArgs(zstFile, scenarioFlags); + auto result = RunDemo(config.demoPath, args, config.timeoutSeconds); + + // Write to log file before assertions so logs are captured even if an ASSERT aborts early. + WriteToLogFile(zstFile, result); + + // Log the output regardless of pass/fail. + std::cout << "[DEMO CMD] " << result.commandLine << "\n"; + if (!result.stdOut.empty()) + { + std::cout << "[DEMO OUT] " << result.stdOut << "\n"; + } + + ASSERT_FALSE(result.timedOut) + << "Demo process timed out after " << config.timeoutSeconds << " seconds.\n" + << "Command: " << result.commandLine; + + ASSERT_TRUE(result.launchError.empty()) + << "Failed to launch demo: " << result.launchError << "\n" + << "Command: " << result.commandLine; + + ASSERT_EQ(result.exitCode, 0) + << "Demo process returned non-zero exit code: " << result.exitCode << "\n" + << "Command: " << result.commandLine << "\n" + << "Output:\n" + << result.stdOut; +} + +// Run a performance scenario. Spawns zstdgpu_demo.exe with profiling flags and requests CSV output. Uses EXPECT (not ASSERT) to verify the demo executed successfully and produced CSV output. +static void RunPerformanceTest(const std::string& zstFile, int profilingLevel) +{ + const auto& config = GetTestConfig(); + + if (config.demoPath.empty()) + { + GTEST_SKIP() << "zstdgpu_demo.exe not found. Set --demo-path."; + } + + // Build CSV output path matching spec convention: + // prf-lvl 0 → results/throughput_<stem>.csv + // prf-lvl 2 → results/stages_<stem>.csv + std::string stem = std::filesystem::path(zstFile).stem().string(); + std::string prefix = (profilingLevel == 0) ? "throughput" : "stages"; + std::filesystem::path resultsDir = std::filesystem::path(config.logDir) / "results"; + if (!std::filesystem::exists(resultsDir)) + { + std::filesystem::create_directories(resultsDir); + } + std::string csvPath = (resultsDir / (prefix + "_" + stem + ".csv")).string(); + + auto args = BuildPerformanceArgs(zstFile, profilingLevel, config.runCount, csvPath); + auto result = RunDemo(config.demoPath, args, config.timeoutSeconds); + + // Write to log file before assertions so logs are captured even if a check fails. + WriteToLogFile(zstFile, result); + + std::cout << "[DEMO CMD] " << result.commandLine << "\n"; + if (!result.stdOut.empty()) + { + std::cout << "[DEMO OUT] " << result.stdOut << "\n"; + } + + EXPECT_FALSE(result.timedOut) + << "Demo process timed out after " << config.timeoutSeconds << " seconds.\n" + << "Command: " << result.commandLine; + + EXPECT_TRUE(result.launchError.empty()) + << "Failed to launch demo: " << result.launchError << "\n" + << "Command: " << result.commandLine; + + EXPECT_EQ(result.exitCode, 0) + << "Demo process returned non-zero exit code: " << result.exitCode << "\n" + << "Command: " << result.commandLine << "\n" + << "Output:\n" + << result.stdOut; + + EXPECT_TRUE(std::filesystem::exists(csvPath)) << "CSV not created: " << csvPath; + + if (std::filesystem::exists(csvPath)) + { + std::cout << "[PERF CSV] Written to: " << csvPath << "\n"; + } +} + +// Test fixture and test cases + +// Test fixture parameterized over .zst file paths (spec: ZstdGpuDemoTests). +// Both correctness and performance tests share this fixture — correctness tests +// use ASSERT (hard fail), performance tests use EXPECT (soft fail). +class ZstdGpuDemoTests : public ::testing::TestWithParam<std::string> +{ +}; + +GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(ZstdGpuDemoTests); + +// --- Correctness tests --- + +TEST_P(ZstdGpuDemoTests, SimulationCheck) +{ + RunCorrectnessTest(GetParam(), {"--chk-gpu", "--chk-cpu", "--sim-gpu"}); +} + +TEST_P(ZstdGpuDemoTests, D3D12DebugLayer) +{ + RunCorrectnessTest(GetParam(), {"--chk-gpu", "--d3d-dbg"}); +} + +TEST_P(ZstdGpuDemoTests, ExternalMemory) +{ + RunCorrectnessTest(GetParam(), {"--chk-gpu", "--ext-mem"}); +} + +TEST_P(ZstdGpuDemoTests, GraphicsQueue) +{ + RunCorrectnessTest(GetParam(), {"--chk-gpu", "--d3d-gfx"}); +} + +// --- Performance tests --- + +TEST_P(ZstdGpuDemoTests, OverallThroughput) +{ + RunPerformanceTest(GetParam(), 0); +} + +TEST_P(ZstdGpuDemoTests, PerStageTiming) +{ + RunPerformanceTest(GetParam(), 2); +} + +INSTANTIATE_TEST_SUITE_P( + ContentTests, + ZstdGpuDemoTests, + ::testing::ValuesIn(GetTestFiles()), + SanitizeTestName); + +// Demo runner implementation + +// Builds a command line string with proper quoting for arguments containing spaces. +static std::string BuildCommandLine(const std::string& exe, const std::vector<std::string>& args) +{ + std::ostringstream cmd; + cmd << "\"" << exe << "\""; + for (const auto& arg : args) + { + cmd << " "; + if (arg.find(' ') != std::string::npos) + cmd << "\"" << arg << "\""; + else + cmd << arg; + } + return cmd.str(); +} + +DemoResult RunDemo( + const std::string& demoPath, + const std::vector<std::string>& args, + int timeoutSeconds) +{ + DemoResult result; + result.commandLine = BuildCommandLine(demoPath, args); + + // Create an anonymous pipe for capturing the child process's stdout/stderr. + SECURITY_ATTRIBUTES sa{}; + sa.nLength = sizeof(sa); + sa.bInheritHandle = TRUE; + + HANDLE hReadPipe = nullptr; + HANDLE hWritePipe = nullptr; + if (!CreatePipe(&hReadPipe, &hWritePipe, &sa, 0)) + { + result.launchError = "Failed to create pipe for demo process."; + return result; + } + + // Prevent the read end from being inherited by the child process. + SetHandleInformation(hReadPipe, HANDLE_FLAG_INHERIT, 0); + + // Redirect child's stdout and stderr to the write end of the pipe. + STARTUPINFOA si{}; + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + si.hStdOutput = hWritePipe; + si.hStdError = hWritePipe; + + PROCESS_INFORMATION pi{}; + + std::vector<char> cmdBuf(result.commandLine.begin(), result.commandLine.end()); + cmdBuf.push_back('\0'); + + if (!CreateProcessA( + nullptr, + cmdBuf.data(), + nullptr, + nullptr, + TRUE, // inherit handles + 0, + nullptr, + nullptr, + &si, + &pi)) + { + CloseHandle(hReadPipe); + CloseHandle(hWritePipe); + result.launchError = "Failed to launch demo process. Error: " + std::to_string(GetLastError()); + return result; + } + + // Close the write end in the parent so ReadFile on the read end returns + // EOF when the child exits. + CloseHandle(hWritePipe); + + // Read the child's output on a background thread to prevent pipe buffer + // deadlocks (the pipe has a finite buffer; if it fills, the child blocks). + std::string capturedOutput; + std::thread readerThread([&capturedOutput, hReadPipe]() { + std::array<char, 4096> buf; + DWORD bytesRead = 0; + while (ReadFile(hReadPipe, buf.data(), static_cast<DWORD>(buf.size()), &bytesRead, nullptr) && bytesRead > 0) + { + capturedOutput.append(buf.data(), bytesRead); + } + }); + + // Wait for the child process, enforcing the timeout. + DWORD waitMs = (timeoutSeconds > 0) ? static_cast<DWORD>(timeoutSeconds) * 1000 : INFINITE; + DWORD waitResult = WaitForSingleObject(pi.hProcess, waitMs); + + if (waitResult == WAIT_TIMEOUT) + { + result.timedOut = true; + TerminateProcess(pi.hProcess, 1); + WaitForSingleObject(pi.hProcess, 5000); + } + + DWORD exitCode = 0; + GetExitCodeProcess(pi.hProcess, &exitCode); + result.exitCode = static_cast<int>(exitCode); + + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + // Wait for the reader thread to finish draining the pipe, then clean up. + readerThread.join(); + CloseHandle(hReadPipe); + + result.stdOut = std::move(capturedOutput); + return result; +} + +// Builds argument list for correctness tests: decompress once (--run-cnt 1) +// with GPU and CPU validation enabled, plus scenario-specific flags. +std::vector<std::string> BuildCorrectnessArgs( + const std::string& zstFile, + const std::vector<std::string>& scenarioFlags) +{ + std::vector<std::string> args; + args.push_back("--zst"); + args.push_back(zstFile); + args.push_back("--run-cnt"); + args.push_back("1"); + for (const auto& flag : scenarioFlags) + { + args.push_back(flag); + } + return args; +} + +// Builds argument list for performance tests: run N iterations at the specified +// profiling level, optionally writing per-run timing data to a CSV file. +std::vector<std::string> BuildPerformanceArgs( + const std::string& zstFile, + int profilingLevel, + int runCount, + const std::string& csvOutputPath) +{ + std::vector<std::string> args; + args.push_back("--zst"); + args.push_back(zstFile); + args.push_back("--prf-lvl"); + args.push_back(std::to_string(profilingLevel)); + args.push_back("--run-cnt"); + args.push_back(std::to_string(runCount)); + if (!csvOutputPath.empty()) + { + args.push_back("--out-csv"); + args.push_back(csvOutputPath); + } + return args; +} diff --git a/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.h b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.h new file mode 100644 index 0000000..66c3c88 --- /dev/null +++ b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.h @@ -0,0 +1,73 @@ +/** + * Copyright (c) Microsoft. All rights reserved. + * This code is licensed under the MIT License (MIT). + * THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF + * ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY + * IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR + * PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. + */ + +// Shared header for the Zstd GPU CI tests. Defines the runtime configuration, +// demo process result type, and declarations for the demo runner and file +// discovery helpers. Both main.cpp and zstdgpu_ci_tests.cpp include this. + +#pragma once + +#include <string> +#include <vector> + +// Test configuration — parsed from CLI in main(), read by tests. + +struct TestConfig +{ + std::string contentPath; // Directory containing .zst test files + std::string demoPath; // Full path to zstdgpu_demo.exe + std::string logDir; // Directory for logs, CSVs, and GTest XML output + std::string logFile; // Consolidated text log file path (--log-file) + int runCount = 40; // Number of iterations for performance tests + int timeoutSeconds = 0; // Max seconds before killing a demo process (0 = no timeout) +}; + +// Singleton access — SetTestConfig called once from main(), GetTestConfig +// called from test helpers. +// Spec implies a global but doesn't show accessor pattern. +// Needed for GTest's TEST_P bodies to access config without passing it through parameters. +const TestConfig& GetTestConfig(); +void SetTestConfig(TestConfig config); + +// Demo runner — spawns zstdgpu_demo.exe and captures output. + +// Captures the outcome of a single demo process invocation. +struct DemoResult +{ + int exitCode = -1; // Process exit code (0 = success) + std::string stdOut; // Captured stdout + stderr + std::string launchError; // Error message if the process failed to launch + std::string commandLine; // The exact command line that was executed + bool timedOut = false; // True if the process was killed due to timeout +}; + +// Spawns zstdgpu_demo.exe with the given arguments, captures output, and +// returns the result. timeoutSeconds=0 means no timeout. +DemoResult RunDemo( + const std::string& demoPath, + const std::vector<std::string>& args, + int timeoutSeconds = 0); + +// Convenience: builds the full argument list for a correctness scenario. +// Spec inlines the args in each test. Extracting them avoids repeating --zst, --run-cnt 1, etc +std::vector<std::string> BuildCorrectnessArgs( + const std::string& zstFile, + const std::vector<std::string>& scenarioFlags); + +// Convenience: builds the full argument list for a performance scenario. +std::vector<std::string> BuildPerformanceArgs( + const std::string& zstFile, + int profilingLevel, + int runCount, + const std::string& csvOutputPath); + +// File discovery — scans directories for .zst test content. + +// Recursively scans a directory for *.zst files. Returns sorted full paths. +std::vector<std::string> DiscoverZstFiles(const std::string& contentPath); diff --git a/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.vcxproj b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.vcxproj new file mode 100644 index 0000000..f756ce4 --- /dev/null +++ b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.vcxproj @@ -0,0 +1,239 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|ARM64"> + <Configuration>Debug</Configuration> + <Platform>ARM64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Debug|x64"> + <Configuration>Debug</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|ARM64"> + <Configuration>Release</Configuration> + <Platform>ARM64</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|x64"> + <Configuration>Release</Configuration> + <Platform>x64</Platform> + </ProjectConfiguration> + </ItemGroup> + <PropertyGroup Label="Globals"> + <VCProjectVersion>16.0</VCProjectVersion> + <Keyword>Win32Proj</Keyword> + <ProjectGuid>{A1B2C3D4-E5F6-7890-ABCD-EF1234567890}</ProjectGuid> + <RootNamespace>zstdgpu_ci_tests</RootNamespace> + <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>true</UseDebugLibraries> + <PlatformToolset>v143</PlatformToolset> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>true</UseDebugLibraries> + <PlatformToolset>v143</PlatformToolset> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>true</UseDebugLibraries> + <PlatformToolset>v143</PlatformToolset> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>false</UseDebugLibraries> + <PlatformToolset>v143</PlatformToolset> + <WholeProgramOptimization>true</WholeProgramOptimization> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>false</UseDebugLibraries> + <PlatformToolset>v143</PlatformToolset> + <WholeProgramOptimization>true</WholeProgramOptimization> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>false</UseDebugLibraries> + <PlatformToolset>v143</PlatformToolset> + <WholeProgramOptimization>true</WholeProgramOptimization> + <CharacterSet>Unicode</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="Shared"> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <!-- Output to same directory structure as other projects --> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> + <IntDir>$(Platform)\$(Configuration)\</IntDir> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> + <IntDir>$(Platform)\$(Configuration)\</IntDir> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'"> + <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> + <IntDir>$(Platform)\$(Configuration)\</IntDir> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> + <IntDir>$(Platform)\$(Configuration)\</IntDir> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> + <IntDir>$(Platform)\$(Configuration)\</IntDir> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'"> + <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir> + <IntDir>$(Platform)\$(Configuration)\</IntDir> + </PropertyGroup> + <!-- Compiler settings --> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>_DEBUG;_CONSOLE;WIN32_LEAN_AND_MEAN;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <LanguageStandard>stdcpp17</LanguageStandard> + <AdditionalIncludeDirectories>..\ThirdParty\googletest\googletest\include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;WIN32_LEAN_AND_MEAN;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <LanguageStandard>stdcpp17</LanguageStandard> + <AdditionalIncludeDirectories>..\ThirdParty\googletest\googletest\include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>_DEBUG;_CONSOLE;WIN32_LEAN_AND_MEAN;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <LanguageStandard>stdcpp17</LanguageStandard> + <AdditionalIncludeDirectories>..\ThirdParty\googletest\googletest\include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <FunctionLevelLinking>true</FunctionLevelLinking> + <IntrinsicFunctions>true</IntrinsicFunctions> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>NDEBUG;_CONSOLE;WIN32_LEAN_AND_MEAN;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <LanguageStandard>stdcpp17</LanguageStandard> + <AdditionalIncludeDirectories>..\ThirdParty\googletest\googletest\include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <EnableCOMDATFolding>true</EnableCOMDATFolding> + <OptimizeReferences>true</OptimizeReferences> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <FunctionLevelLinking>true</FunctionLevelLinking> + <IntrinsicFunctions>true</IntrinsicFunctions> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;WIN32_LEAN_AND_MEAN;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <LanguageStandard>stdcpp17</LanguageStandard> + <AdditionalIncludeDirectories>..\ThirdParty\googletest\googletest\include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <EnableCOMDATFolding>true</EnableCOMDATFolding> + <OptimizeReferences>true</OptimizeReferences> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <FunctionLevelLinking>true</FunctionLevelLinking> + <IntrinsicFunctions>true</IntrinsicFunctions> + <SDLCheck>true</SDLCheck> + <PreprocessorDefinitions>NDEBUG;_CONSOLE;WIN32_LEAN_AND_MEAN;NOMINMAX;%(PreprocessorDefinitions)</PreprocessorDefinitions> + <ConformanceMode>true</ConformanceMode> + <LanguageStandard>stdcpp17</LanguageStandard> + <AdditionalIncludeDirectories>..\ThirdParty\googletest\googletest\include</AdditionalIncludeDirectories> + </ClCompile> + <Link> + <SubSystem>Console</SubSystem> + <EnableCOMDATFolding>true</EnableCOMDATFolding> + <OptimizeReferences>true</OptimizeReferences> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <!-- Source files --> + <ItemGroup> + <ClCompile Include="main.cpp" /> + <ClCompile Include="zstdgpu_ci_tests.cpp" /> + </ItemGroup> + <ItemGroup> + <ClInclude Include="zstdgpu_ci_tests.h" /> + </ItemGroup> + <!-- Project references: only googletest --> + <ItemGroup> + <ProjectReference Include="..\ThirdParty\googletest_static.vcxproj"> + <Project>{49811f10-3d14-403e-859d-40dfcbb35c7b}</Project> + </ProjectReference> + </ItemGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> From 9266056e4738ae14d7ebe1259b9519ff01346ad1 Mon Sep 17 00:00:00 2001 From: Rohan Borkar <rohanborkar@microsoft.com> Date: Tue, 23 Jun 2026 20:59:14 -0700 Subject: [PATCH 2/3] zstdgpu_demo: non-fatal CPU-sim break handler + non-zero exit on correctness failure Adds a translation-unit-only override of the ZSTDGPU_BREAK macro for the demo's CPU-sim shader path: instead of the library's __debugbreak() -- which kills the demo with STATUS_BREAKPOINT (0x80000003) on bad input before any [FAIL] message can flush -- breaks now log a [ZGBRK] line and increment a correctness-failure counter. VALIDATE / VALIDATE_CND also increment the counter, the per-frame validation failure block and uncompressed-size early-out increment it, and wmain returns 2 if the counter is non-zero. The validate-on-CPU path early-returns before DecompressSequences if any prior validation failed -- avoids feeding sentinel FSE table indices into the OOB-prone shader code. All changes are local to zstdgpu_demo/main.cpp. zstdgpu library is untouched. Performance CSV emission is provided upstream by PR #118 (zstdgpu_demo's built-in --out-csv flag); this PR no longer adds a separate CSV writer. --- zstd/zstdgpu_ci_tests/main.cpp | 30 +++++++++++ zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp | 36 +++++++++++++- zstd/zstdgpu_demo/main.cpp | 58 ++++++++++++++++++++++ 3 files changed, 122 insertions(+), 2 deletions(-) diff --git a/zstd/zstdgpu_ci_tests/main.cpp b/zstd/zstdgpu_ci_tests/main.cpp index 3a52744..33bb333 100644 --- a/zstd/zstdgpu_ci_tests/main.cpp +++ b/zstd/zstdgpu_ci_tests/main.cpp @@ -132,6 +132,36 @@ int main(int argc, char** argv) std::cerr << "Warning: --demo-path not set. Tests will skip.\n"; } + if (config.contentPath.empty()) + { + std::cerr << "Warning: --content-path not set. Zero tests will be discovered " + "(gtest will print 'This test program does NOT link in any test case').\n"; + } + else if (!std::filesystem::exists(config.contentPath)) + { + std::cerr << "Warning: --content-path '" << config.contentPath + << "' does not exist. Zero tests will be discovered.\n"; + } + else if (!std::filesystem::is_directory(config.contentPath)) + { + std::cerr << "Warning: --content-path '" << config.contentPath + << "' is not a directory. Zero tests will be discovered.\n"; + } + else + { + const size_t fileCount = DiscoverZstFiles(config.contentPath).size(); + if (fileCount == 0) + { + std::cerr << "Warning: --content-path '" << config.contentPath + << "' contains no .zst files. Zero tests will be discovered.\n"; + } + else + { + std::cout << "Discovered " << fileCount << " .zst file(s) at '" + << config.contentPath << "'.\n"; + } + } + // Default log dir to current directory. if (config.logDir.empty()) { diff --git a/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp index 7d60c86..e836463 100644 --- a/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp +++ b/zstd/zstdgpu_ci_tests/zstdgpu_ci_tests.cpp @@ -34,6 +34,8 @@ // configured timeout, it is terminated. This avoids any D3D12/GPU dependency // in the test binary itself — all GPU work happens inside the demo process. +// NOTES: IF THE PATH IS EMPTY, FAIL THE TEST + #include "zstdgpu_ci_tests.h" #include <gtest/gtest.h> #include <array> @@ -57,10 +59,40 @@ static std::vector<std::string> GetTestFiles() } // Converts a full file path to a valid GTest parameter name. -// GTest names must be alphanumeric + underscore, no leading digits. +// GTest names must be alphanumeric + underscore, no leading digits, AND UNIQUE +// across the full INSTANTIATE_TEST_SUITE_P set. Using just the filename stem +// collides when the same leaf name appears across different subdirectories +// (e.g. firefly_albedo.DDS.zst exists under BC1/, BC1mip0/, block4K_*, etc.), +// causing a fatal gtest assertion at startup. Use the path relative to +// --content-path so different folders produce different names. static std::string SanitizeTestName(const testing::TestParamInfo<std::string>& info) { - std::string name = std::filesystem::path(info.param).stem().string(); + const auto& config = GetTestConfig(); + std::filesystem::path full(info.param); + std::filesystem::path rel; + if (!config.contentPath.empty()) + { + std::error_code ec; + rel = std::filesystem::relative(full, config.contentPath, ec); + if (ec || rel.empty() || rel.string().rfind("..", 0) == 0) + { + rel = full.filename(); // fallback: out-of-tree, just use leaf + } + } + else + { + rel = full.filename(); + } + + // Drop the trailing .zst extension for readability; everything else stays. + std::string name = rel.string(); + const std::string ext = ".zst"; + if (name.size() >= ext.size() && + name.compare(name.size() - ext.size(), ext.size(), ext) == 0) + { + name.resize(name.size() - ext.size()); + } + std::string result; result.reserve(name.size()); for (char c : name) diff --git a/zstd/zstdgpu_demo/main.cpp b/zstd/zstdgpu_demo/main.cpp index 421cb4f..f6e951d 100644 --- a/zstd/zstdgpu_demo/main.cpp +++ b/zstd/zstdgpu_demo/main.cpp @@ -52,6 +52,16 @@ extern "C" { #include <assert.h> +// Forward-declare the demo's non-fatal ZSTDGPU_BREAK handler so we can redefine +// the macro BEFORE the shader header expands it inline in this translation unit. +// Library's default __debugbreak() would kill the demo with STATUS_BREAKPOINT +// (0x80000003) on bad input before g_correctnessFailureCount can propagate a +// clean non-zero exit. This override is local to main.cpp's TU — library +// binary is unchanged, other consumers see the original behavior. The function +// itself is defined below alongside g_correctnessFailureCount. See spec § Phase 1. +static void zstdgpu_DemoOnBreak(const char* file, int line); +#define ZSTDGPU_BREAK() zstdgpu_DemoOnBreak(__FILE__, __LINE__) + #include "zstdgpu_reference_store.h" #include "zstdgpu_shaders.h" #include "zstdgpu.h" @@ -133,6 +143,30 @@ static void saveFile(const wchar_t *fileName, const void *data, uint32_t dataSiz } } +/*********************************************************************************************************************** + * + * + * Diagnostic state: correctness-failure counter so wmain can exit non-zero on any check failure. + * Per-frame CSV emission is handled by the demo's own writer (see --out-csv in argv parsing below). + * + * + **********************************************************************************************************************/ + +static uint32_t g_correctnessFailureCount = 0; + +/* Non-fatal ZSTDGPU_BREAK handler for the demo's CPU-sim shader path. The + * `#define ZSTDGPU_BREAK()` at the top of this file redirects every break-site + * in zstdgpu_shaders.h to this function. Without it, library's __debugbreak() + * crashes the process with STATUS_BREAKPOINT (0x80000003) on bad input before + * any [FAIL] message can flush. With it, breaks become loggable failures and + * the demo exits cleanly via the g_correctnessFailureCount path at end of wmain. */ +static void zstdgpu_DemoOnBreak(const char* file, int line) +{ + debugPrint(L"[ZGBRK] %hs:%d\n", file, line); + ++g_correctnessFailureCount; +} + + /*********************************************************************************************************************** * * @@ -230,7 +264,10 @@ static void zstdgpu_Init_FinaliseSequenceOffsets_SRT(zstdgpu_FinaliseSequenceOff do \ { \ if (ZSTDGPU_ENUM_CONST(Validate_Success) != zstdgpu_ReferenceStore_Validate_##name) \ + { \ debugPrint(L"[FAIL] Validation of '"#name"' failed in function: " __FUNCTION__ ", file: " __FILE__ ", line: " STRINGIZE(__LINE__) "\n");\ + ++g_correctnessFailureCount; \ + } \ } \ while(0) @@ -238,7 +275,10 @@ static void zstdgpu_Init_FinaliseSequenceOffsets_SRT(zstdgpu_FinaliseSequenceOff do \ { \ if (!(cnd)) \ + { \ debugPrint(L"[FAIL] Validation of '"#cnd"' failed in function: " __FUNCTION__ ", file: " __FILE__ ", line: " STRINGIZE(__LINE__) "\n");\ + ++g_correctnessFailureCount; \ + } \ } \ while(0) @@ -837,6 +877,16 @@ static void zstdgpu_Validate_GpuDecompressOnCpu(zstdgpu_ResourceDataCpu & zstdCp VALIDATE(DecompressedLiterals(&zstdCpu)); } + // [e006/e007] guard: if any upstream VALIDATE failed (parse, FseTables, etc.), + // sentinel FSE-table indices (0x3FFFFFFE / 0x3FFFFFFF) may have propagated to + // inoutSeqRefs. Feeding them into DecompressSequences would either OOB-index + // inFseInfos (AV) or trip an internal ZSTDGPU_BREAK. Stop here instead. + if (g_correctnessFailureCount > 0) + { + debugPrint(L"[FAIL] Skipping DecompressSequences and downstream stages: %u prior validation failure(s).\n", g_correctnessFailureCount); + return; + } + { zstdgpu_DecompressSequences_SRT srt; zstdgpu_Init_DecompressSequences_SRT(srt, zstdCpu); @@ -1215,6 +1265,7 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp if (fbInfo.frameCount != vcnt) { debugPrint(L"[FAIL] Some frames don't carry uncompressed size. Early Out.\n"); + ++g_correctnessFailureCount; free(zstdOutFrameRefs); free(zstdInFrameRefs); @@ -1585,6 +1636,7 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp if (failedFrameCount > 0) { + g_correctnessFailureCount += failedFrameCount; const char *ref = (char*)zstdReferenceUncompressedData; const char *tst = (char*)zstdUnCompressedFramesMemory.bufMem[0]; @@ -1840,5 +1892,11 @@ int WINAPI wWinMain(_In_ HINSTANCE hInstance, _In_opt_ HINSTANCE, _In_ LPWSTR lp device->SetStablePowerState(FALSE); zstdgpu_Demo_PlatformTerm(device); debugPrint(L"Finished.\n"); + + if (g_correctnessFailureCount > 0) + { + debugPrint(L"[FAIL] %u correctness check failure(s) detected. Exiting with non-zero status.\n", g_correctnessFailureCount); + return 2; + } return 0; } From 14569087103dba5bfed3c7d1d221c3fcaf08196e Mon Sep 17 00:00:00 2001 From: Rohan Borkar <rohanborkar@microsoft.com> Date: Thu, 25 Jun 2026 07:02:13 -0700 Subject: [PATCH 3/3] histogram script handles inf values --- zstd/scripts/generate_histogram.py | 68 +++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/zstd/scripts/generate_histogram.py b/zstd/scripts/generate_histogram.py index 59a1295..b335b0a 100644 --- a/zstd/scripts/generate_histogram.py +++ b/zstd/scripts/generate_histogram.py @@ -5,15 +5,21 @@ Usage: python generate_histogram.py --input <csv_file> --output <png_file> [--title <title>] -Supports two CSV formats based on profiling level: - - prf-lvl 0 (OverallThroughput): column "Throughput_GBs" - - prf-lvl 2 (PerStageTiming): column "Microseconds" +Consumes the wide-format CSV emitted by zstdgpu_demo's --out-csv flag: + RunIdx, Stage 0 (us), Stage 0 :: <scope> (us), ..., Readback 0 (us), + Stage 1 (us), ..., Stage 2 (us), Bandwidth (GB/s) + +Plots a histogram of the 'Bandwidth (GB/s)' column. Per-stage timing columns +are preserved in the CSV but not plotted here (the histogram is the summary +view; users wanting per-stage detail can read the CSV directly). """ import argparse import csv +import math import sys + def try_import_matplotlib(): try: import matplotlib @@ -28,11 +34,18 @@ def try_import_matplotlib(): return None +# Match Pavel's --out-csv column header verbatim ("Bandwidth (GB/s)"). +# Kept case-insensitive and whitespace-tolerant in case the schema spelling +# drifts upstream — the eyeballed match is "bandwidth". +def _is_bandwidth_column(col_name: str) -> bool: + return col_name is not None and "bandwidth" in col_name.strip().lower() + + def main(): parser = argparse.ArgumentParser(description="Generate histogram from zstdgpu perf CSV") parser.add_argument("--input", required=True, help="Path to input CSV file") parser.add_argument("--output", required=True, help="Path to output PNG file") - parser.add_argument("--title", default="Throughput", help="Chart title") + parser.add_argument("--title", default="Bandwidth", help="Chart title") args = parser.parse_args() plt = try_import_matplotlib() @@ -40,21 +53,56 @@ def main(): return 1 data = [] + skipped_non_finite = 0 + bandwidth_col = None with open(args.input, newline="") as f: reader = csv.DictReader(f) + if reader.fieldnames is None: + print(f"CSV has no header row: {args.input}", file=sys.stderr) + return 1 + # Pick the bandwidth column by name (resilient to header drift). + for col in reader.fieldnames: + if _is_bandwidth_column(col): + bandwidth_col = col + break + if bandwidth_col is None: + print( + f"No Bandwidth column found in {args.input} " + f"(headers: {reader.fieldnames})", + file=sys.stderr, + ) + return 1 + for row in reader: - if "Throughput_GBs" in row: - data.append(float(row["Throughput_GBs"])) - elif "Microseconds" in row: - data.append(float(row["Microseconds"])) + raw = row.get(bandwidth_col, "") + if raw is None or raw == "": + continue + try: + val = float(raw) + except ValueError: + # Pavel may write empty strings for skipped iterations; ignore. + continue + if math.isfinite(val): + data.append(val) + else: + skipped_non_finite += 1 + + if skipped_non_finite > 0: + print( + f"WARNING: Skipped {skipped_non_finite} non-finite (Inf/NaN) value(s) from {args.input}", + file=sys.stderr, + ) if not data: - print(f"No Throughput_GBs or Microseconds data found in {args.input}", file=sys.stderr) + print( + f"No finite Bandwidth (GB/s) data found in {args.input}", + file=sys.stderr, + ) return 1 plt.figure() plt.hist(data, bins=20) - plt.xlabel("Throughput (GB/s)" if "throughput" in args.input.lower() else "Time (us)") + plt.xlabel("Bandwidth (GB/s)") plt.ylabel("Count") plt.title(args.title) plt.savefig(args.output)