From 6f55fe626f96ceede3b783958e8248d6ba33cbe6 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 18 Jul 2013 16:47:37 -0400 Subject: [PATCH 2/2] radeonsi: Use pointers rather than resource descriptors for shader constants The TGSI->LLVM pass for radeonsi preloads constants and relies on LLVM's sinking pass to reduce SGPR usage by lowering constant reads to an optimal place in the code. However, LLVM's machine sink pass will not lower instructions that have been selected from llvm.SI.load.const intrinsics, because these instructions do not have a MachineMemOperand, which LLVM needs in order to determine whether or not it is safe to sink a load. Replacing this intrinsic with a real load instruction will enable the sinking optimization and probably a few others. The other advantages of using pointers are: + Reduced register usage (pointers take 2 registers, descriptors take 4) + More code sharing with compute This should also fix some crashes due to the compiler running out of registers like in this bug: https://bugs.freedesktop.org/show_bug.cgi?id=66805 --- src/gallium/drivers/radeonsi/radeonsi_shader.c | 26 ++++++++++++++++++++------ src/gallium/drivers/radeonsi/si_state_draw.c | 4 ++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 4d8a479..eb63fc9 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -115,20 +115,26 @@ static LLVMValueRef build_indexed_load( return result; } -static LLVMValueRef build_constant_load( +static LLVMValueRef build_load_constant( struct si_shader_context * si_shader_ctx, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; +#if HAVE_LLVM <= 0x0303 LLVMValueRef args[2]; args[0] = base_ptr; args[1] = offset; return build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type, args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - +#else + LLVMValueRef dword_offset = LLVMBuildUDiv(base->gallivm->builder, + offset, + lp_build_const_int32(base->gallivm, 4), ""); + return build_indexed_load(si_shader_ctx, base_ptr, dword_offset); +#endif } static LLVMValueRef get_instance_index( @@ -450,7 +456,7 @@ static LLVMValueRef fetch_constant( addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]); - result = build_constant_load(si_shader_ctx, args[0], args[1]); + result = build_load_constant(si_shader_ctx, args[0], args[1]); return bitcast(bld_base, type, result); } @@ -609,7 +615,7 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, args[1] = lp_build_const_int32(base->gallivm, ((reg_index * 4 + chan) * 4 + const_chan) * 4); - base_elt = build_constant_load(si_shader_ctx, args[0], args[1]); + base_elt = build_load_constant(si_shader_ctx, args[0], args[1]); args[5 + chan] = lp_build_add(base, args[5 + chan], lp_build_mul(base, base_elt, @@ -1215,8 +1221,16 @@ static void create_function(struct si_shader_context *si_shader_ctx) v2i32 = LLVMVectorType(i32, 2); v3i32 = LLVMVectorType(i32, 3); - params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), CONST_ADDR_SPACE); +#if HAVE_LLVM <= 0x0303 + params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), + CONST_ADDR_SPACE); params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST]; +#else + params[SI_PARAM_CONST] = LLVMPointerType(LLVMPointerType(f32, + CONST_ADDR_SPACE), CONST_ADDR_SPACE); + params[SI_PARAM_SAMPLER] = LLVMPointerType(LLVMVectorType(i8, 16), + CONST_ADDR_SPACE); +#endif params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), CONST_ADDR_SPACE); if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { @@ -1298,7 +1312,7 @@ static void preload_constants(struct si_shader_context *si_shader_ctx) si_shader_ctx->const_resource, lp_build_const_int32(gallivm, i * 4) }; - si_shader_ctx->constants[i] = build_constant_load(si_shader_ctx, + si_shader_ctx->constants[i] = build_load_constant(si_shader_ctx, args[0], args[1]); } } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 29d960d..efbee0d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -494,6 +494,7 @@ static void si_constant_buffer_update(struct r600_context *rctx) si_pm4_sh_data_add(pm4, va); si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0))); +#if HAVE_LLVM <= 0x0303 si_pm4_sh_data_add(pm4, cb->buffer_size); si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | @@ -501,12 +502,15 @@ static void si_constant_buffer_update(struct r600_context *rctx) S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32)); +#endif } else { /* Fill in an empty T# buffer resource description */ si_pm4_sh_data_add(pm4, 0); si_pm4_sh_data_add(pm4, 0); +#if HAVE_LLVM <= 0x0303 si_pm4_sh_data_add(pm4, 0); si_pm4_sh_data_add(pm4, 0); +#endif } } -- 1.8.1.5