Skip to content

Commit 73984c2

Browse files
committed
xe: gemm: add Xe2 f16 kernels
1 parent 7f0a3e0 commit 73984c2

1 file changed

Lines changed: 10 additions & 0 deletions

File tree

src/gpu/intel/gemm/jit/selector/db/kernel.db

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,16 @@ auto _CATALOG_ = kcatalog::toArray({
12161216
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 128, -1}, {16, 16, 1}, "ABI"}, "am32+C32@64 at32 aS wg 1x1x16 ikr af vav sr sb256 bk0 bm0 sys rr", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {4, 16, 32}, {1, 1, 16}, 1, (WGType) 1, 4357, 0, 256, {16, 16, 4}, {true, true, true}}, {'E', 17, {-164022, 454944, 5845.92, 1045.76, 0, 0, 0.771733, 1.70851, 273.177, 272.825, 0.111732, 0.0896612, 0.00579836, 0.491637, 1.14958, 0.953248, 1.54584e-12}}},
12171217
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, 128, -1}, {16, 16, 1}, "ABI"}, "at16x2+m32@48 am32+m16@64 aB wg 4x2x4 kr xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {262144, 262144, 16777216}, {262144, 262144, 32}, {16, 16, 32}, {4, 2, 4}, 1, (WGType) 1, 445, 0, 8192, {16, 16, 4}, {true, true, true}}, {'E', 17, {241719, 28050.5, 9262.5, 2137.5, 552672, 876888, 1.91538, 1.82334, 3.84174, 8.3282, 0.015781, 0.00988075, 0.00545694, 0.959871, 1.33252, 0.965141, 5.50087e-12}}},
12181218
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, 1, 3968}, {-1, 1, 4096}, {-1, 1, 3968}, {-1, 1, 4096}, {16, 16, 1}, "ABIH"}, "am32+C32@64 at32 aS wg 1x1x16 ikr af vav sr sb256 bk0 bm0 sys rr", {16, (LoopType) 255, 128, {(LoopType) 0, (LoopType) 1, (LoopType) 2}, {16777216, 8192, 16777216}, {8192, 8192, 16777216}, {4, 16, 32}, {1, 1, 16}, 1, (WGType) 1, 268439813, 0, 256, {16, 16, 4}, {true, true, true}}, {'W', 1, {64}}},
1219+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16+m64@64 am64+m64@24 aB wg 2x8 ca3 ks32 nb 2x0 sys af vav bo sr br sm sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {1048576, 524288, 16777216}, {64, 32, 64}, {2, 8, 1}, 1, (WGType) 1, 268435713, 24576, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {346248, 107172, 0, 0, 0, 0, 2.02435, 2.03545, 3.93522, 9.42995, 0.0115694, 0.015832, 0, 1, 1, 1, -0}}},
1220+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABIs"}, "at16+m64@48 am32+m32@56 aB wg 4x8 xaf st vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {1048576, 655360, 16777216}, {1048576, 655360, 32}, {64, 40, 32}, {4, 8, 1}, 1, (WGType) 1, 268435897, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {366623, 144994, 0, 0, 3.69474e+06, 3.69968e+06, 2.20167, 2.12657, 4.04879, 9.80462, 0.00844333, 0.00844333, 0, 1, 1, 1, -0}}},
1221+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16x2+m64@48 am32+m32@64 aB wg 8x4 xaf vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 32}, {32, 64, 32}, {8, 4, 1}, 1, (WGType) 1, 268435897, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {219626, 142188, 0, 0, 3.33012e+06, 3.15246e+06, 2.23529, 2.42349, 4.06066, 10.0474, 0.00836672, 0.00836672, 0, 1, 1, 1, -0}}},
1222+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16x2+m64@48 am32+m32@64 aB wg 8x4 xaf vav hi pt sr br sb64 bk0 sm sn grf256 sys afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 1048576, 16777216}, {524288, 1048576, 32}, {32, 64, 32}, {8, 4, 1}, 1, (WGType) 1, 268435897, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {219626, 142188, 0, 0, 3.33012e+06, 3.15246e+06, 2.23529, 2.42349, 4.06066, 10.0474, 0.00836672, 0.00836672, 0, 1, 1, 1, -0}}},
1223+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at32+m32@48 am32+m32@48 aB wg 4x2x4 kr xaf vav hi pt sr br sb64 bk0 sm sn grf256 kv afb sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 524288, 16777216}, {524288, 524288, 32}, {32, 32, 32}, {4, 2, 4}, 1, (WGType) 1, 268435901, 0, 32768, {16, 16, 4}, {true, true, true}}, {'E', 17, {297614, 25389.8, -11475.2, 2689.3, 1.20951e+06, 2.12198e+06, 2.1711, 2.18535, 4.04665, 9.6884, 0.0138323, 0.00666052, 0.0028099, 0.118314, 1, 1, -0}}},
1224+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16+m32@32 am32+m32@48 aB wg 8x2x2 kr af vav hi pt sr br sb64 bk0 sm sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 1048576, 16777216}, {524288, 1048576, 16777216}, {32, 64, 32}, {8, 2, 2}, 1, (WGType) 1, 268435717, 0, 65536, {16, 16, 4}, {true, true, true}}, {'E', 17, {273526, 70212.8, -6741.24, 4041.51, 0, 0, 2.37113, 2.86083, 4.08727, 10.0874, 0.0197238, 0.0197238, 0, 0.277839, 1, 1, -0}}},
1225+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16+m32@32 am32+m32@48 aB wg 8x2x2 kr af vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 1048576, 16777216}, {524288, 1048576, 32}, {32, 64, 32}, {8, 2, 2}, 1, (WGType) 1, 268435901, 0, 65536, {16, 16, 4}, {true, true, true}}, {'E', 17, {243121, 57298.4, 8304.07, 11672.1, 1.85905e+06, 3.29591e+06, 2.14183, 2.93917, 4.06822, 10.0741, 0.00988893, 0.00939896, 0.00193675, 1, 1, 1, -0}}},
1226+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16+m64@32 am32+m16@32 aB wg 4x4x2 kr xaf vav hi pt sr br sb64 bk0 sm sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 2}, {524288, 524288, 16777216}, {524288, 524288, 16777216}, {32, 32, 32}, {4, 4, 2}, 1, (WGType) 1, 268435717, 0, 65536, {16, 16, 4}, {true, true, true}}, {'E', 17, {265520, 61102.5, 6503.46, 2559, 0, 0, 2.15584, 2.1281, 3.99382, 9.64953, 0.0210726, 0.0210726, 0, 0.513432, 1, 1, -0}}},
1227+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABIs"}, "at16x2+m32@32 am32+m64@48 aB wg 8x4 xaf vav hi pt sr br sb64 bk0 sm sn grf256 sys kv afb", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {524288, 786432, 16777216}, {524288, 786432, 32}, {32, 48, 32}, {8, 4, 1}, 1, (WGType) 1, 268435897, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {237448, 133657, 0, 0, 2.95803e+06, 2.74846e+06, 2.10086, 2.28064, 4.02922, 10.1686, 0.00852193, 0.00852193, 0, 1, 1, 1, -0}}},
1228+
{{'G', "gemm", {"H", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16+m64@64 am32+m16@64 aB wg 8x4 af vav hi pt sr br sb64 bk0 sm sn grf256 sys", {16, (LoopType) 255, 256, {(LoopType) 208, (LoopType) 255, (LoopType) 255}, {262144, 524288, 16777216}, {262144, 524288, 16777216}, {16, 32, 32}, {8, 4, 1}, 1, (WGType) 1, 268435713, 0, 0, {16, 16, 4}, {true, true, true}}, {'E', 17, {208756, 112902, 0, 0, 0, 0, 1.99211, 1.99408, 3.84064, 10.0943, 0.0108245, 0.0108245, 0, 1, 1, 1, -0}}},
12191229
{{'G', "gemm", {"N", "H", "S"}, {"N", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "am32+m64@8 am32+m16@64 aB wg 4x8 ca3 ks32 nb 4x0 sys xaf rr vav bo ar br sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {1048576, 524288, 16777216}, {1048576, 524288, 16777216}, {64, 32, 32}, {4, 8, 1}, 1, (WGType) 1, 257, 49152, 0, {16, 16, 4}, {true, true, true}}, {'W', 1, {2048}}},
12201230
{{'G', "gemm", {"N", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {-1, -1, -1}, {16, 16, 1}, "ABI"}, "at16x2+m32@40 am16+m16@128 aB wg 32x1 sys xaf rr fx vav bo ar br sm sn dm grf256", {16, (LoopType) 255, 256, {(LoopType) 128, (LoopType) 255, (LoopType) 255}, {262144, 2097152, 16777216}, {262144, 2097152, 16777216}, {16, 128, 16}, {32, 1, 1}, 1, (WGType) 1, 257, 0, 0, {16, 16, 4}, {true, true, true}}, {'W', 1, {2048}}},
12211231
{{'G', "gemm", {"N", "H", "S"}, {"T", "N", "N"}}, {-1, -1, {-1, -1, -1}, {-1, 8, -1}, {-1, -1, -1}, {-1, 8, -1}, {16, 16, 1}, "IAB"}, "at32 am128 aB wg 2x1x8 ikr xaf st vav hi pt sr br sb128 bk0 bm0 nmk sys", {16, (LoopType) 255, 128, {(LoopType) 209, (LoopType) 255, (LoopType) 2}, {16777216, 131072, 16777216}, {262144, 131072, 16777216}, {16, 8, 128}, {2, 1, 8}, 1, (WGType) 0, 4357, 0, 1024, {16, 16, 4}, {true, true, true}}, {'W', 1, {128}}},

0 commit comments

Comments
 (0)