diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index ae611a5..2712374 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -30,6 +30,7 @@ #include #include #include +#include /* 6.11.2 Math Functions */ #include diff --git a/generic/include/clc/workitem/get_global_offset.h b/generic/include/clc/workitem/get_global_offset.h new file mode 100644 index 0000000..efd21ca --- /dev/null +++ b/generic/include/clc/workitem/get_global_offset.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_global_offset(uint dimidx); diff --git a/generic/lib/workitem/get_global_id.cl b/generic/lib/workitem/get_global_id.cl index fdd83d2..a2df1f0 100644 --- a/generic/lib/workitem/get_global_id.cl +++ b/generic/lib/workitem/get_global_id.cl @@ -1,5 +1,5 @@ #include _CLC_DEF size_t get_global_id(uint dim) { - return get_group_id(dim)*get_local_size(dim) + get_local_id(dim); + return get_group_id(dim)*get_local_size(dim) + get_local_id(dim) + get_global_offset(dim); } diff --git a/ptx-nvidiacl/lib/SOURCES b/ptx-nvidiacl/lib/SOURCES index 7cdbd85..6fd519f 100644 --- a/ptx-nvidiacl/lib/SOURCES +++ b/ptx-nvidiacl/lib/SOURCES @@ -3,3 +3,4 @@ workitem/get_group_id.cl workitem/get_local_id.cl workitem/get_local_size.cl workitem/get_num_groups.cl +workitem/get_global_offset.cl diff --git a/ptx-nvidiacl/lib/workitem/get_global_offset.cl b/ptx-nvidiacl/lib/workitem/get_global_offset.cl new file mode 100644 index 0000000..bdce541 --- /dev/null +++ b/ptx-nvidiacl/lib/workitem/get_global_offset.cl @@ -0,0 +1,10 @@ +#include + +_CLC_DEF size_t get_local_offset(uint dimidx) { + switch (dimidx) { +/* case 0: return __builtin_ptx_read_global_offset_x(); + case 1: return __builtin_ptx_read_global_offset_y(); + case 2: return __builtin_ptx_read_global_offset_z();*/ + default: return 0; + } +} diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES index ef23d83..d0645b7 100644 --- a/r600/lib/SOURCES +++ b/r600/lib/SOURCES @@ -4,6 +4,7 @@ workitem/get_num_groups.ll workitem/get_group_id.ll workitem/get_local_size.ll workitem/get_local_id.ll +workitem/get_global_offset.ll workitem/get_global_size.ll workitem/get_work_dim.ll synchronization/barrier.cl diff --git a/r600/lib/workitem/get_global_offset.ll b/r600/lib/workitem/get_global_offset.ll new file mode 100644 index 0000000..b380d43 --- /dev/null +++ b/r600/lib/workitem/get_global_offset.ll @@ -0,0 +1,18 @@ +declare i32 @llvm.AMDGPU.read.global.offset.x() nounwind readnone +declare i32 @llvm.AMDGPU.read.global.offset.y() nounwind readnone +declare i32 @llvm.AMDGPU.read.global.offset.z() nounwind readnone + +define i32 @get_global_offset(i32 %dimidx) nounwind readnone alwaysinline { + switch i32 %dimidx, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] +x_dim: + %x = call i32 @llvm.AMDGPU.read.global.offset.x() nounwind readnone + ret i32 %x +y_dim: + %y = call i32 @llvm.AMDGPU.read.global.offset.y() nounwind readnone + ret i32 %y +z_dim: + %z = call i32 @llvm.AMDGPU.read.global.offset.z() nounwind readnone + ret i32 %z +default: + ret i32 0 +}