|
20 | 20 | #define DT_UNDEF 1 |
21 | 21 | #include "gpu/intel/include/types.h" |
22 | 22 |
|
23 | | -uint4 philox_4x32_s64_vec4(ulong idx, ulong seed, ulong offset) { |
| 23 | +uint4 philox_4x32_vec4_w_offset(ulong idx, ulong seed, ulong offset) { |
24 | 24 | #define PHILOX_4UINT_ROUND(mul, ctr, key) \ |
25 | 25 | as_uint4(convert_ulong2(ctr.s02) * mul).s3210 \ |
26 | 26 | ^ (uint4)(ctr.s1 ^ key.s0, 0, ctr.s3 ^ key.s1, 0) |
@@ -53,40 +53,45 @@ uint4 philox_4x32_s64_vec4(ulong idx, ulong seed, ulong offset) { |
53 | 53 | return ctr; |
54 | 54 | } |
55 | 55 |
|
56 | | -uint philox_4x32_u64_w_offset(ulong idx, ulong seed, ulong offset) { |
57 | | - return philox_4x32_s64_vec4(idx, seed, offset)[idx & 3L]; |
| 56 | +uint philox_4x32_w_offset(ulong idx, ulong seed, ulong offset) { |
| 57 | + return philox_4x32_vec4_w_offset(idx, seed, offset)[idx & 3L]; |
58 | 58 | } |
59 | 59 |
|
60 | | -uint philox_4x32_u64(ulong idx, ulong seed) { |
| 60 | +uint __attribute__((overloadable)) philox_4x32(ulong idx, ulong seed) { |
61 | 61 | // Note: this is for compatibility with impls that don't support s64 rand |
62 | 62 | ulong x = idx & ~3L; |
63 | 63 | ulong idx_64 = ((x + 3) << 32) + (x + 2); |
64 | 64 | ulong offset_64 = ((x + 1) << 32) + x; |
65 | 65 | ulong seed_64 = (seed << 32) + seed; |
66 | | - return philox_4x32_s64(idx_64, seed_64, offset_64); |
| 66 | + return philox_4x32_w_offset(idx_64, seed_64, offset_64); |
67 | 67 | } |
68 | 68 |
|
69 | | -uint philox_4x32(uint idx, uint seed) { |
70 | | - // Note: preserve old signature for compatibility |
71 | | - return philox_4x32_u64((ulong)idx, (ulong)seed); |
| 69 | +uint __attribute__((overloadable)) philox_4x32(long idx, long seed) { |
| 70 | + // Convert long to ulong and call the existing function |
| 71 | + return philox_4x32((ulong)idx, (ulong)seed); |
| 72 | +} |
| 73 | + |
| 74 | +uint __attribute__((overloadable)) philox_4x32(int idx, int seed) { |
| 75 | + // Convert int to ulong and call the existing overloadable function |
| 76 | + return philox_4x32((ulong)idx, (ulong)seed); |
72 | 77 | } |
73 | 78 |
|
74 | 79 | uint4 philox_4x32_vec4(uint idx, uint seed) { |
75 | 80 | ulong x = idx & ~3L; |
76 | 81 | ulong idx_64 = ((x + 3) << 32) + (x + 2); |
77 | 82 | ulong offset_64 = ((x + 1) << 32) + x; |
78 | 83 | ulong seed_64 = ((ulong)(seed) << 32) + seed; |
79 | | - return philox_4x32_s64_vec4(idx_64, seed_64, offset_64); |
| 84 | + return philox_4x32_vec4_w_offset(idx_64, seed_64, offset_64); |
80 | 85 | } |
81 | 86 |
|
82 | 87 | ushort philox_8x16(long idx, uint seed) { |
83 | 88 | ulong idx_ = (ulong)idx; |
84 | | - return as_ushort2(philox_4x32_u64(idx_ >> 1, (ulong)seed))[idx_ & 1]; |
| 89 | + return as_ushort2(philox_4x32(idx_ >> 1, (ulong)seed))[idx_ & 1]; |
85 | 90 | } |
86 | 91 |
|
87 | 92 | uchar philox_16x8(long idx, uint seed) { |
88 | 93 | ulong idx_ = (ulong)idx; |
89 | | - return as_uchar4(philox_4x32_u64(idx_ >> 2, (ulong)seed))[idx_ & 3]; |
| 94 | + return as_uchar4(philox_4x32(idx_ >> 2, (ulong)seed))[idx_ & 3]; |
90 | 95 | } |
91 | 96 |
|
92 | 97 | #if WITH_SROUND |
@@ -120,4 +125,7 @@ uint get_dropout_threshold(float p) { |
120 | 125 | + !!(mantissa & ((1u << exponent) - 1u)); |
121 | 126 | } |
122 | 127 | #endif |
| 128 | + |
| 129 | + |
| 130 | + |
123 | 131 | #endif |
0 commit comments