2121static inline void typed_simple_zero_pad (__global void * a , ulong type_size ,
2222 ulong step_nelems , ulong nelems_block , ulong step_block , ulong nsteps ,
2323 ulong step_size , zero_pad_mask_t step_bitmask , ulong mode ) {
24- const int i0 = get_global_id (0 );
25- const int istep = get_global_id (1 ) * step_block ;
26- const int iblock = get_global_id (2 );
27- int offset = iblock * step_size + (step_size - nsteps * step_nelems )
24+ const ulong i0 = get_global_id (0 );
25+ const ulong istep = get_global_id (1 ) * step_block ;
26+ const ulong iblock = get_global_id (2 );
27+ ulong offset = iblock * step_size + (step_size - nsteps * step_nelems )
2828 + istep * step_nelems ;
2929
3030 const int step = ZERO_PAD_MASK_DT_BITS ;
@@ -46,7 +46,7 @@ static inline void typed_simple_zero_pad(__global void *a, ulong type_size,
4646
4747 for (int k = 0 ; k < step_block ; k ++ ) {
4848 __attribute__((opencl_unroll_hint )) // attr:no-format
49- for (int i = i0 ; i < step_nelems ; i += nelems_block ) {
49+ for (ulong i = i0 ; i < step_nelems ; i += nelems_block ) {
5050 if (step_bitmask .mask [i / step ] & (1 << (i % step ))) {
5151 switch (type_size ) {
5252 case 8 : a8 [offset + i ] = 0 ; break ;
@@ -137,16 +137,16 @@ simple_zero_pad_subg_16(__global char *a, const uint type_size,
137137 const ulong d1_stride , const ulong d2_stride , const ulong d3_stride ,
138138 const unsigned d0_size , const unsigned d1_size , const unsigned d2_size ,
139139 const unsigned d3_size , const uint b_multiplier ) {
140- const unsigned a_block_id = get_global_id (0 ) / 16 ;
141- const unsigned b_block_id = get_global_id (1 );
142- unsigned mixed_dims = get_global_id (2 );
140+ const ulong a_block_id = get_global_id (0 ) / 16 ;
141+ const ulong b_block_id = get_global_id (1 );
142+ ulong mixed_dims = get_global_id (2 );
143143
144- const unsigned d3_dim = mixed_dims % d3_size ;
144+ const ulong d3_dim = mixed_dims % d3_size ;
145145 mixed_dims /= d3_size ;
146- const unsigned d2_dim = mixed_dims % d2_size ;
146+ const ulong d2_dim = mixed_dims % d2_size ;
147147 mixed_dims /= d2_size ;
148- const unsigned d1_dim = mixed_dims % d1_size ;
149- const unsigned d0_dim = mixed_dims / d1_size ;
148+ const ulong d1_dim = mixed_dims % d1_size ;
149+ const ulong d0_dim = mixed_dims / d1_size ;
150150
151151 __global char * p = a + base_offset ;
152152 p += a_block_id * b_block_size ;
0 commit comments