From b4b3de44a9f6c6f98b704d4a221f622ffb24447c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 18 Jul 2013 16:47:37 -0400 Subject: [PATCH] radeonsi: Use pointers rather than resource descriptors for shader constants v3 The TGSI->LLVM pass for radeonsi preloads constants and relies on LLVM's sinking pass to reduce SGPR usage by lowering constant reads to an optimal place in the code. However, LLVM's machine sink pass will not lower instructions that have been selected from llvm.SI.load.const intrinsics, because these instructions do not have a MachineMemOperand, which LLVM needs in order to determine whether or not it is safe to sink a load. Replacing this intrinsic with a real load instruction will enable the sinking optimization and probably a few others. The other advantages of using pointers are: + Reduced register usage (pointers take 2 registers, descriptors take 4) + More code sharing with compute This should also fix some crashes due to the compiler running out of registers like in this bug: https://bugs.freedesktop.org/show_bug.cgi?id=66805 v2: - Mark constant loads as invariant, so the machine sink pass will actually lower them. v3: - Correctly specify the invariant.load metadata --- src/gallium/drivers/radeonsi/radeonsi_shader.c | 31 +++++++++++++++++++++----- src/gallium/drivers/radeonsi/si_state_draw.c | 4 ++++ 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index 4d8a479..d251252 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -115,20 +115,31 @@ static LLVMValueRef build_indexed_load( return result; } -static LLVMValueRef build_constant_load( +static LLVMValueRef build_load_constant( struct si_shader_context * si_shader_ctx, LLVMValueRef base_ptr, LLVMValueRef offset) { struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; +#if HAVE_LLVM <= 0x0303 LLVMValueRef args[2]; args[0] = base_ptr; args[1] = offset; return build_intrinsic(base->gallivm->builder, "llvm.SI.load.const", base->elem_type, args, 2, LLVMReadNoneAttribute | LLVMNoUnwindAttribute); - +#else + unsigned md_invariant_load = LLVMGetMDKindIDInContext( + base->gallivm->context, "invariant.load", 14); + LLVMValueRef dword_offset = LLVMBuildUDiv(base->gallivm->builder, + offset, + lp_build_const_int32(base->gallivm, 4), ""); + LLVMValueRef load = build_indexed_load(si_shader_ctx, base_ptr, dword_offset); + LLVMValueRef md = LLVMMDNodeInContext(base->gallivm->context, NULL, 0); + LLVMSetMetadata(load, md_invariant_load, md); + return load; +#endif } static LLVMValueRef get_instance_index( @@ -450,7 +461,7 @@ static LLVMValueRef fetch_constant( addr = lp_build_mul_imm(&bld_base->uint_bld, addr, 16); args[1] = lp_build_add(&bld_base->uint_bld, addr, args[1]); - result = build_constant_load(si_shader_ctx, args[0], args[1]); + result = build_load_constant(si_shader_ctx, args[0], args[1]); return bitcast(bld_base, type, result); } @@ -609,7 +620,7 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, args[1] = lp_build_const_int32(base->gallivm, ((reg_index * 4 + chan) * 4 + const_chan) * 4); - base_elt = build_constant_load(si_shader_ctx, args[0], args[1]); + base_elt = build_load_constant(si_shader_ctx, args[0], args[1]); args[5 + chan] = lp_build_add(base, args[5 + chan], lp_build_mul(base, base_elt, @@ -1215,8 +1226,16 @@ static void create_function(struct si_shader_context *si_shader_ctx) v2i32 = LLVMVectorType(i32, 2); v3i32 = LLVMVectorType(i32, 3); - params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), CONST_ADDR_SPACE); +#if HAVE_LLVM <= 0x0303 + params[SI_PARAM_CONST] = LLVMPointerType(LLVMVectorType(i8, 16), + CONST_ADDR_SPACE); params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST]; +#else + params[SI_PARAM_CONST] = LLVMPointerType(LLVMPointerType(f32, + CONST_ADDR_SPACE), CONST_ADDR_SPACE); + params[SI_PARAM_SAMPLER] = LLVMPointerType(LLVMVectorType(i8, 16), + CONST_ADDR_SPACE); +#endif params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), CONST_ADDR_SPACE); if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { @@ -1298,7 +1317,7 @@ static void preload_constants(struct si_shader_context *si_shader_ctx) si_shader_ctx->const_resource, lp_build_const_int32(gallivm, i * 4) }; - si_shader_ctx->constants[i] = build_constant_load(si_shader_ctx, + si_shader_ctx->constants[i] = build_load_constant(si_shader_ctx, args[0], args[1]); } } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 29d960d..efbee0d 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -494,6 +494,7 @@ static void si_constant_buffer_update(struct r600_context *rctx) si_pm4_sh_data_add(pm4, va); si_pm4_sh_data_add(pm4, (S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0))); +#if HAVE_LLVM <= 0x0303 si_pm4_sh_data_add(pm4, cb->buffer_size); si_pm4_sh_data_add(pm4, S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | @@ -501,12 +502,15 @@ static void si_constant_buffer_update(struct r600_context *rctx) S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32)); +#endif } else { /* Fill in an empty T# buffer resource description */ si_pm4_sh_data_add(pm4, 0); si_pm4_sh_data_add(pm4, 0); +#if HAVE_LLVM <= 0x0303 si_pm4_sh_data_add(pm4, 0); si_pm4_sh_data_add(pm4, 0); +#endif } } -- 1.8.1.5