From 7e90bda33b5013ac39aa051b738074a5bcd2cec2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Wed, 3 Sep 2014 17:31:33 -0400 Subject: [PATCH] R600/SI: Use S_ADD_U32 and S_SUB_U32 for low half of 64-bit operations https://bugs.freedesktop.org/show_bug.cgi?id=83416 --- lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 2 +- lib/Target/R600/SIInstrInfo.cpp | 8 +++++--- lib/Target/R600/SIInstructions.td | 4 ++-- test/CodeGen/R600/add.ll | 2 +- test/CodeGen/R600/add_i64.ll | 4 ++-- test/CodeGen/R600/ssubo.ll | 2 +- test/CodeGen/R600/sub.ll | 2 +- test/CodeGen/R600/trunc.ll | 2 +- test/CodeGen/R600/uaddo.ll | 2 +- test/CodeGen/R600/usubo.ll | 2 +- 10 files changed, 16 insertions(+), 14 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 2509a2e..3c68e45 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -701,7 +701,7 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; - unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32; + unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; if (!isCFDepth0()) { diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index db40e66..dbe1305 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -494,7 +494,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); // Add 32-bit offset from this instruction to the start of the constant data. - BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_I32), RegLo) + BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) .addReg(RegLo) .addTargetIndex(AMDGPU::TI_CONSTDATA_START) .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); @@ -917,9 +917,11 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_MOV_B32: return MI.getOperand(1).isReg() ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; - case AMDGPU::S_ADD_I32: return AMDGPU::V_ADD_I32_e32; + case AMDGPU::S_ADD_I32: + case AMDGPU::S_ADD_U32: return AMDGPU::V_ADD_I32_e32; case AMDGPU::S_ADDC_U32: return AMDGPU::V_ADDC_U32_e32; - case AMDGPU::S_SUB_I32: return AMDGPU::V_SUB_I32_e32; + case AMDGPU::S_SUB_I32: + case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32; case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32; case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32; case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 212b325..003885f 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1856,11 +1856,11 @@ def : Pat < // SOP2 Patterns //===----------------------------------------------------------------------===// -// V_ADD_I32_e32/S_ADD_I32 produces carry in VCC/SCC. For the vector +// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector // case, the sgpr-copies pass will fix this to use the vector version. def : Pat < (i32 (addc i32:$src0, i32:$src1)), - (S_ADD_I32 $src0, $src1) + (S_ADD_U32 $src0, $src1) >; } // Predicates = [isSI, isCFDepth0] diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll index 711a2bc..f62c9d6 100644 --- a/test/CodeGen/R600/add.ll +++ b/test/CodeGen/R600/add.ll @@ -117,7 +117,7 @@ entry: } ; FUNC-LABEL: @add64 -; SI-CHECK: S_ADD_I32 +; SI-CHECK: S_ADD_U32 ; SI-CHECK: S_ADDC_U32 define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { entry: diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll index dac4f17..5be969c 100644 --- a/test/CodeGen/R600/add_i64.ll +++ b/test/CodeGen/R600/add_i64.ll @@ -43,9 +43,9 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace ; SI-LABEL: @test_v2i64_sreg: -; SI: S_ADD_I32 +; SI: S_ADD_U32 ; SI: S_ADDC_U32 -; SI: S_ADD_I32 +; SI: S_ADD_U32 ; SI: S_ADDC_U32 define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) { %result = add <2 x i64> %a, %b diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll index b330276..066cdf5 100644 --- a/test/CodeGen/R600/ssubo.ll +++ b/test/CodeGen/R600/ssubo.ll @@ -38,7 +38,7 @@ define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 } ; FUNC-LABEL: @s_ssubo_i64 -; SI: S_SUB_I32 +; SI: S_SUB_U32 ; SI: S_SUBB_U32 define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll index 8e64148..a8196a0 100644 --- a/test/CodeGen/R600/sub.ll +++ b/test/CodeGen/R600/sub.ll @@ -40,7 +40,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { } ; FUNC-LABEL: @s_sub_i64: -; SI: S_SUB_I32 +; SI: S_SUB_U32 ; SI: S_SUBB_U32 ; EG-DAG: SETGE_UINT diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll index 4a278fb..f635ac2 100644 --- a/test/CodeGen/R600/trunc.ll +++ b/test/CodeGen/R600/trunc.ll @@ -31,7 +31,7 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) { ; SI-LABEL: @trunc_shl_i64: ; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd -; SI: S_ADD_I32 s[[LO_SREG2:[0-9]+]], s[[LO_SREG]], +; SI: S_ADD_U32 s[[LO_SREG2:[0-9]+]], s[[LO_SREG]], ; SI: S_ADDC_U32 ; SI: S_LSHL_B64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG2]]:{{[0-9]+\]}}, 2 ; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SHL]] diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll index a80e502..0b854b5 100644 --- a/test/CodeGen/R600/uaddo.ll +++ b/test/CodeGen/R600/uaddo.ll @@ -43,7 +43,7 @@ define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 } ; FUNC-LABEL: @s_uaddo_i64 -; SI: S_ADD_I32 +; SI: S_ADD_U32 ; SI: S_ADDC_U32 define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll index d57a2c7..c293ad7 100644 --- a/test/CodeGen/R600/usubo.ll +++ b/test/CodeGen/R600/usubo.ll @@ -40,7 +40,7 @@ define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 } ; FUNC-LABEL: @s_usubo_i64 -; SI: S_SUB_I32 +; SI: S_SUB_U32 ; SI: S_SUBB_U32 define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind { %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind -- 1.8.5.5