Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions ops/matmul.cc
Original file line number Diff line number Diff line change
Expand Up @@ -274,8 +274,14 @@ class GenerateCandidates {
}

// We know `order` is multiple MC, where more/smaller values of `mc` are
// helpful, especially for two B, hence add iterations.
const size_t reps = 2 + num_B_;
// helpful, especially for two B. For SFC, smaller tile sizes ensure that
// a larger cluster of adjacent tiles along the space-filling curve path
// fits concurrently in L2/L3 cache, maximizing boundary data reuse (rows
// of A or cols of B) as the curve moves. Hence add more iterations.
size_t reps = 2 + num_B_;
if (IsSFC(order)) {
reps += 2;
}
for (size_t rep = 0; rep < reps; ++rep) {
prev = PrevDivisor(mr, prev, rounded_M, mr);
if (prev == 0) break; // none found
Expand Down Expand Up @@ -326,6 +332,9 @@ class GenerateCandidates {
// Large L3, but its behavior and characteristics varies across platforms,
// hence autotune a wider range of nc than the other dimensions.
size_t reps = 9 + num_B_;
if (IsSFC(order)) {
reps += 2;
}
// For small M, we can afford larger NC, hence allow fewer small options.
if (max_M_ <= 2 * mr) reps -= 1;

Expand Down
4 changes: 4 additions & 0 deletions ops/matmul.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,10 @@ static inline bool IsOneKC(MMOrder order) {
order == MMOrder::kSFC;
}

static inline bool IsSFC(MMOrder order) {
return order == MMOrder::kSFC_K || order == MMOrder::kSFC;
}

static inline const char* StringFromOrder(MMOrder order) {
switch (order) {
case MMOrder::kNT_K:
Expand Down
Loading