From 4be08b68166c8554775ecd0d265fe10ce467d853 Mon Sep 17 00:00:00 2001 From: Bas Vermeulen Date: Mon, 9 Apr 2018 12:38:23 +0200 Subject: [PATCH 2/2] radeonsi: convert dispatch packet to little endian The parameters for the compute engine are wrong when using an E8860 on a big endian machine. To fix this, convert the contents of struct dispatch_packet to little endian. This ensures that get_global_id(0) and similar functions in the OpenCL code get the correct endian values, and makes my simple OpenCL program work correctly. Signed-off-by: Bas Vermeulen --- src/gallium/drivers/radeonsi/si_compute.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index dfede47605..8ac5b262c4 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -564,18 +564,18 @@ static void si_setup_user_sgprs_co_v2(struct si_context *sctx, /* Upload dispatch ptr */ memset(&dispatch, 0, sizeof(dispatch)); - dispatch.workgroup_size_x = info->block[0]; - dispatch.workgroup_size_y = info->block[1]; - dispatch.workgroup_size_z = info->block[2]; + dispatch.workgroup_size_x = util_cpu_to_le16(info->block[0]); + dispatch.workgroup_size_y = util_cpu_to_le16(info->block[1]); + dispatch.workgroup_size_z = util_cpu_to_le16(info->block[2]); - dispatch.grid_size_x = info->grid[0] * info->block[0]; - dispatch.grid_size_y = info->grid[1] * info->block[1]; - dispatch.grid_size_z = info->grid[2] * info->block[2]; + dispatch.grid_size_x = util_cpu_to_le32(info->grid[0] * info->block[0]); + dispatch.grid_size_y = util_cpu_to_le32(info->grid[1] * info->block[1]); + dispatch.grid_size_z = util_cpu_to_le32(info->grid[2] * info->block[2]); - dispatch.private_segment_size = program->private_size; - dispatch.group_segment_size = program->local_size; + dispatch.private_segment_size = util_cpu_to_le32(program->private_size); + dispatch.group_segment_size = util_cpu_to_le32(program->local_size); - dispatch.kernarg_address = kernel_args_va; + dispatch.kernarg_address = util_cpu_to_le64(kernel_args_va); u_upload_data(sctx->b.const_uploader, 0, sizeof(dispatch), 256, &dispatch, &dispatch_offset, @@ -652,9 +652,9 @@ static bool si_upload_compute_input(struct si_context *sctx, if (!code_object) { for (i = 0; i < 3; i++) { - kernel_args[i] = info->grid[i]; - kernel_args[i + 3] = info->grid[i] * info->block[i]; - kernel_args[i + 6] = info->block[i]; + kernel_args[i] = util_cpu_to_le32(info->grid[i]); + kernel_args[i + 3] = util_cpu_to_le32(info->grid[i] * info->block[i]); + kernel_args[i + 6] = util_cpu_to_le32(info->block[i]); } } -- 2.14.1