From bb9f0cc0ba65352f43091e57907fda9eed010ea8 Mon Sep 17 00:00:00 2001 From: Felix Weiglhofer Date: Mon, 22 Jun 2026 12:41:23 +0200 Subject: [PATCH] GPU/TPC: Increace assumed cacheline size to 128 byte in cluster finder --- GPU/GPUTracking/TPCClusterFinder/CfArray2D.h | 10 ++++++++-- .../TPCClusterFinder/GPUTPCCFCheckPadBaseline.h | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h b/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h index e61ec532bf7e0..358cc682c7982 100644 --- a/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h +++ b/GPU/GPUTracking/TPCClusterFinder/CfArray2D.h @@ -86,10 +86,13 @@ class LinearLayout template struct GridSize; +// GridSize for 1 byte and 2 byte elements are adjusted for 128 byte cachelines, +// as these are prevelant on modern GPUs. + template <> struct GridSize<1> { enum { - Width = 8, + Width = 16, Height = 8, }; }; @@ -98,10 +101,13 @@ template <> struct GridSize<2> { enum { Width = 8, - Height = 4, + Height = 8, }; }; +// GridSize for 4 bytes is only used for MC indexing on CPU. +// So assume 64 byte cachelines here instead. + template <> struct GridSize<4> { enum { diff --git a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h index c2c5a1e339256..b96dfaeaada9a 100644 --- a/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h +++ b/GPU/GPUTracking/TPCClusterFinder/GPUTPCCFCheckPadBaseline.h @@ -53,7 +53,6 @@ class GPUTPCCFCheckPadBaseline : public GPUKernelTemplate TimebinsPerCacheline = TPCMapMemoryLayout::Height, EntriesPerCacheline = PadsPerCacheline * TimebinsPerCacheline, NumOfCachedPads = GPUCA_WARP_SIZE / TimebinsPerCacheline, - NumCLsPerWarp = GPUCA_WARP_SIZE / EntriesPerCacheline, NumOfCachedTBs = TimebinsPerCacheline * 8, // Threads index shared memory as [iThread / MaxNPadsPerRow][iThread % MaxNPadsPerRow]. // Rounding up to a multiple of PadsPerCacheline ensures iThread / MaxNPadsPerRow < NumOfCachedTBs