diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h index ae611a5..2712374 100644 --- a/generic/include/clc/clc.h +++ b/generic/include/clc/clc.h @@ -30,6 +30,7 @@ #include #include #include +#include /* 6.11.2 Math Functions */ #include diff --git a/generic/include/clc/workitem/get_global_offset.h b/generic/include/clc/workitem/get_global_offset.h new file mode 100644 index 0000000..efd21ca --- /dev/null +++ b/generic/include/clc/workitem/get_global_offset.h @@ -0,0 +1 @@ +_CLC_DECL size_t get_global_offset(uint dimidx); diff --git a/generic/lib/workitem/get_global_id.cl b/generic/lib/workitem/get_global_id.cl index fdd83d2..a2df1f0 100644 --- a/generic/lib/workitem/get_global_id.cl +++ b/generic/lib/workitem/get_global_id.cl @@ -1,5 +1,5 @@ #include _CLC_DEF size_t get_global_id(uint dim) { - return get_group_id(dim)*get_local_size(dim) + get_local_id(dim); + return get_group_id(dim)*get_local_size(dim) + get_local_id(dim) + get_global_offset(dim); } diff --git a/ptx-nvidiacl/lib/SOURCES b/ptx-nvidiacl/lib/SOURCES index 7cdbd85..6fd519f 100644 --- a/ptx-nvidiacl/lib/SOURCES +++ b/ptx-nvidiacl/lib/SOURCES @@ -3,3 +3,4 @@ workitem/get_group_id.cl workitem/get_local_id.cl workitem/get_local_size.cl workitem/get_num_groups.cl +workitem/get_global_offset.cl diff --git a/ptx-nvidiacl/lib/workitem/get_global_offset.cl b/ptx-nvidiacl/lib/workitem/get_global_offset.cl new file mode 100644 index 0000000..18f7c93 --- /dev/null +++ b/ptx-nvidiacl/lib/workitem/get_global_offset.cl @@ -0,0 +1,10 @@ +#include + +_CLC_DEF size_t get_global_offset(uint dimidx) { + switch (dimidx) { + case 0: return __builtin_ptx_read_global_offset_x(); + case 1: return __builtin_ptx_read_global_offset_y(); + case 2: return __builtin_ptx_read_global_offset_z(); + default: return 0; + } +} diff --git a/r600/lib/OVERRIDES b/r600/lib/OVERRIDES index 3f941d8..e69de29 100644 --- a/r600/lib/OVERRIDES +++ b/r600/lib/OVERRIDES @@ -1,2 +0,0 @@ -workitem/get_group_id.cl -workitem/get_global_size.cl diff --git a/r600/lib/SOURCES b/r600/lib/SOURCES index ef23d83..06b0038 100644 --- a/r600/lib/SOURCES +++ b/r600/lib/SOURCES @@ -1,10 +1,11 @@ atomic/atomic.cl math/nextafter.cl -workitem/get_num_groups.ll -workitem/get_group_id.ll -workitem/get_local_size.ll -workitem/get_local_id.ll -workitem/get_global_size.ll -workitem/get_work_dim.ll +workitem/get_num_groups.cl +workitem/get_group_id.cl +workitem/get_local_size.cl +workitem/get_local_id.cl +workitem/get_global_offset.cl +workitem/get_global_size.cl +workitem/get_work_dim.cl synchronization/barrier.cl synchronization/barrier_impl.ll diff --git a/r600/lib/workitem/get_global_offset.cl b/r600/lib/workitem/get_global_offset.cl new file mode 100644 index 0000000..bc6a43a --- /dev/null +++ b/r600/lib/workitem/get_global_offset.cl @@ -0,0 +1,11 @@ +#include + +_CLC_DEF size_t get_global_offset(uint dimindx) { + switch (dimindx) { + case 0: return __builtin_amdgpu_read_global_offset_x(); + case 1: return __builtin_amdgpu_read_global_offset_y(); + case 2: return __builtin_amdgpu_read_global_offset_z(); + default: return 0; + } +} + diff --git a/r600/lib/workitem/get_global_size.cl b/r600/lib/workitem/get_global_size.cl new file mode 100644 index 0000000..f01fd49 --- /dev/null +++ b/r600/lib/workitem/get_global_size.cl @@ -0,0 +1,11 @@ +#include + +_CLC_DEF size_t get_global_size(uint dimindx) { + switch (dimindx) { + case 0: return __builtin_amdgpu_read_global_size_x(); + case 1: return __builtin_amdgpu_read_global_size_y(); + case 2: return __builtin_amdgpu_read_global_size_z(); + default: return 0; + } +} + diff --git a/r600/lib/workitem/get_global_size.ll b/r600/lib/workitem/get_global_size.ll deleted file mode 100644 index ac2d08d..0000000 --- a/r600/lib/workitem/get_global_size.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.global.size.x() nounwind readnone -declare i32 @llvm.r600.read.global.size.y() nounwind readnone -declare i32 @llvm.r600.read.global.size.z() nounwind readnone - -define i32 @get_global_size(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.global.size.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.global.size.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.global.size.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_group_id.cl b/r600/lib/workitem/get_group_id.cl new file mode 100644 index 0000000..98bce5e --- /dev/null +++ b/r600/lib/workitem/get_group_id.cl @@ -0,0 +1,11 @@ +#include + +_CLC_DEF size_t get_group_id(uint dimindx) { + switch (dimindx) { + case 0: return __builtin_amdgpu_read_tgid_x(); + case 1: return __builtin_amdgpu_read_tgid_y(); + case 2: return __builtin_amdgpu_read_tgid_z(); + default: return 0; + } +} + diff --git a/r600/lib/workitem/get_group_id.ll b/r600/lib/workitem/get_group_id.ll deleted file mode 100644 index 0dc86e5..0000000 --- a/r600/lib/workitem/get_group_id.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.tgid.x() nounwind readnone -declare i32 @llvm.r600.read.tgid.y() nounwind readnone -declare i32 @llvm.r600.read.tgid.z() nounwind readnone - -define i32 @get_group_id(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.tgid.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.tgid.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.tgid.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_local_id.cl b/r600/lib/workitem/get_local_id.cl new file mode 100644 index 0000000..1bceac5 --- /dev/null +++ b/r600/lib/workitem/get_local_id.cl @@ -0,0 +1,11 @@ +#include + +_CLC_DEF size_t get_local_id(uint dimindx) { + switch (dimindx) { + case 0: return __builtin_amdgpu_read_tidig_x(); + case 1: return __builtin_amdgpu_read_tidig_y(); + case 2: return __builtin_amdgpu_read_tidig_z(); + default: return 0; + } +} + diff --git a/r600/lib/workitem/get_local_id.ll b/r600/lib/workitem/get_local_id.ll deleted file mode 100644 index ac5522a..0000000 --- a/r600/lib/workitem/get_local_id.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.tidig.x() nounwind readnone -declare i32 @llvm.r600.read.tidig.y() nounwind readnone -declare i32 @llvm.r600.read.tidig.z() nounwind readnone - -define i32 @get_local_id(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.tidig.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.tidig.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.tidig.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_local_size.cl b/r600/lib/workitem/get_local_size.cl new file mode 100644 index 0000000..18c8d1a --- /dev/null +++ b/r600/lib/workitem/get_local_size.cl @@ -0,0 +1,11 @@ +#include + +_CLC_DEF size_t get_local_size(uint dimindx) { + switch (dimindx) { + case 0: return __builtin_amdgpu_read_local_size_x(); + case 1: return __builtin_amdgpu_read_local_size_y(); + case 2: return __builtin_amdgpu_read_local_size_z(); + default: return 0; + } +} + diff --git a/r600/lib/workitem/get_local_size.ll b/r600/lib/workitem/get_local_size.ll deleted file mode 100644 index 0a98de6..0000000 --- a/r600/lib/workitem/get_local_size.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.local.size.x() nounwind readnone -declare i32 @llvm.r600.read.local.size.y() nounwind readnone -declare i32 @llvm.r600.read.local.size.z() nounwind readnone - -define i32 @get_local_size(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.local.size.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.local.size.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.local.size.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_num_groups.cl b/r600/lib/workitem/get_num_groups.cl new file mode 100644 index 0000000..80dab69 --- /dev/null +++ b/r600/lib/workitem/get_num_groups.cl @@ -0,0 +1,11 @@ +#include + +_CLC_DEF size_t get_num_groups(uint dimindx) { + switch (dimindx) { + case 0: return __builtin_amdgpu_read_ngroups_x(); + case 1: return __builtin_amdgpu_read_ngroups_y(); + case 2: return __builtin_amdgpu_read_ngroups_z(); + default: return 0; + } +} + diff --git a/r600/lib/workitem/get_num_groups.ll b/r600/lib/workitem/get_num_groups.ll deleted file mode 100644 index a708f42..0000000 --- a/r600/lib/workitem/get_num_groups.ll +++ /dev/null @@ -1,18 +0,0 @@ -declare i32 @llvm.r600.read.ngroups.x() nounwind readnone -declare i32 @llvm.r600.read.ngroups.y() nounwind readnone -declare i32 @llvm.r600.read.ngroups.z() nounwind readnone - -define i32 @get_num_groups(i32 %dim) nounwind readnone alwaysinline { - switch i32 %dim, label %default [i32 0, label %x_dim i32 1, label %y_dim i32 2, label %z_dim] -x_dim: - %x = call i32 @llvm.r600.read.ngroups.x() nounwind readnone - ret i32 %x -y_dim: - %y = call i32 @llvm.r600.read.ngroups.y() nounwind readnone - ret i32 %y -z_dim: - %z = call i32 @llvm.r600.read.ngroups.z() nounwind readnone - ret i32 %z -default: - ret i32 0 -} diff --git a/r600/lib/workitem/get_work_dim.cl b/r600/lib/workitem/get_work_dim.cl new file mode 100644 index 0000000..1a29d7d --- /dev/null +++ b/r600/lib/workitem/get_work_dim.cl @@ -0,0 +1,6 @@ +#include + +_CLC_DEF uint get_work_dim() { + return __builtin_amdgpu_read_workdim(); +} + diff --git a/r600/lib/workitem/get_work_dim.ll b/r600/lib/workitem/get_work_dim.ll deleted file mode 100644 index 1f86b5e..0000000 --- a/r600/lib/workitem/get_work_dim.ll +++ /dev/null @@ -1,8 +0,0 @@ -declare i32 @llvm.AMDGPU.read.workdim() nounwind readnone - -define i32 @get_work_dim() nounwind readnone alwaysinline { - %x = call i32 @llvm.AMDGPU.read.workdim() nounwind readnone , !range !0 - ret i32 %x -} - -!0 = !{ i32 1, i32 4 }