Running Steam on arch rolling 32-bit
STEAM_RUNTIME is enabled automatically
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
[2013-07-20 12:18:48] Startup - updater built Jul 19 2013 10:21:47
[2013-07-20 12:18:48] Opted in to client beta 'publicbeta' via beta file
FRAG
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
SI CODE:
7e000280
f8001800
00000000
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  %15 = extractelement <4 x float> %11, i32 3
  %16 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %5)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %19, float %20, float %21, float %22)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float %15)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800704
bf8c007f
e00c2000
80000100
bf8c0770
f800020f
04030201
c0800700
bf8c000f
e00c2000
80000000
bf8c0770
f80008cf
03020100
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
7e000280
f8001800
00000000
bf810000
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
unlinked 0 orphaned pipes
removing stale semaphore last operated on by process 4245 with name 0eBlobRegistryMutex_C1576BC245881240C7079614AD3241B3
removing stale semaphore last operated on by process 4245 with name 0eBlobRegistrySignal_C1576BC245881240C7079614AD3241B3
removing stale semaphore last operated on by process 4245 with name 0emSteamEngineInstance
removing stale semaphore last operated on by process 4245 with name 0eSteamEngineLock

(steam:4791): Gtk-WARNING **: Загружаемый модуль тем не найден в module_path: «qtcurve»,
/home/behem0th/.gtkrc-2.0:12: error: scanner: unterminated string constant
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element
Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element
Fontconfig warning: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 78: saw unknown, expected number
Looks like steam didn't shutdown cleanly, scheduling immediate update check
[2013-07-20 12:18:49] Checking for update on startup
[2013-07-20 12:18:49] Проверка на наличие обновлений...
[2013-07-20 12:18:50] Download skipped: /client/steam_client_publicbeta_ubuntu12 version 1374261093, installed version 1374261093
[2013-07-20 12:18:50] Nothing to do
[2013-07-20 12:18:50] Проверка установки...
[2013-07-20 12:18:50] Performing checksum verification of executable files
[2013-07-20 12:18:50] Verification complete
FRAG
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
[0720/121851:WARNING:proxy_service.cc(958)] PAC support disabled because there is no system implementation
SI CODE:
7e000280
f8001800
00000000
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  %15 = extractelement <4 x float> %11, i32 3
  %16 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %5)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %19, float %20, float %21, float %22)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float %15)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800704
bf8c007f
e00c2000
80000100
bf8c0770
f800020f
04030201
c0800700
bf8c000f
e00c2000
80000000
bf8c0770
f80008cf
03020100
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
7e000280
f8001800
00000000
bf810000
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
PulseAudio connect failed (used only for Mic Volume Control) with error: Access denied
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Errors in resource/styles/steam.styles:
Unknown key 'visible' set in style 'html-findbar' in file 'resource/styles/steam.styles'
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 0
  %32 = extractelement <4 x float> %30, i32 1
  %33 = extractelement <4 x float> %30, i32 2
  %34 = extractelement <4 x float> %30, i32 3
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %31, float %32, float %33, float %34)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800f00
00010002
bf8c0770
f800180f
03020100
bf810000
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 0
  %32 = extractelement <4 x float> %30, i32 1
  %33 = extractelement <4 x float> %30, i32 2
  %34 = extractelement <4 x float> %30, i32 3
  %35 = call i32 @llvm.SI.packf16(float %31, float %32)
  %36 = bitcast i32 %35 to float
  %37 = call i32 @llvm.SI.packf16(float %33, float %34)
  %38 = bitcast i32 %37 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %36, float %38, float %36, float %38)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800f00
00010002
bf8c0770
5e080702
5e000300
f8001c0f
04000400
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], COLOR, COLOR
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL CONST[0..3]
DCL TEMP[0]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: MOV_SAT OUT[1], IN[1]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = extractelement <4 x float> %29, i32 3
  %34 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %35 = load <16 x i8> addrspace(2)* %34, !tbaa !0
  %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %5)
  %37 = extractelement <4 x float> %36, i32 0
  %38 = extractelement <4 x float> %36, i32 1
  %39 = extractelement <4 x float> %36, i32 2
  %40 = extractelement <4 x float> %36, i32 3
  %41 = fmul float %30, %11
  %42 = fmul float %30, %12
  %43 = fmul float %30, %13
  %44 = fmul float %30, %14
  %45 = fmul float %31, %15
  %46 = fadd float %45, %41
  %47 = fmul float %31, %16
  %48 = fadd float %47, %42
  %49 = fmul float %31, %17
  %50 = fadd float %49, %43
  %51 = fmul float %31, %18
  %52 = fadd float %51, %44
  %53 = fmul float %32, %19
  %54 = fadd float %53, %46
  %55 = fmul float %32, %20
  %56 = fadd float %55, %48
  %57 = fmul float %32, %21
  %58 = fadd float %57, %50
  %59 = fmul float %32, %22
  %60 = fadd float %59, %52
  %61 = fmul float %33, %23
  %62 = fadd float %61, %54
  %63 = fmul float %33, %24
  %64 = fadd float %63, %56
  %65 = fmul float %33, %25
  %66 = fadd float %65, %58
  %67 = fmul float %33, %26
  %68 = fadd float %67, %60
  %69 = call float @llvm.AMDIL.clamp.(float %37, float 0,000000e+00, float 0x3FF0000000000000)
  %70 = call float @llvm.AMDIL.clamp.(float %38, float 0,000000e+00, float 0x3FF0000000000000)
  %71 = call float @llvm.AMDIL.clamp.(float %39, float 0,000000e+00, float 0x3FF0000000000000)
  %72 = call float @llvm.AMDIL.clamp.(float %40, float 0,000000e+00, float 0x3FF0000000000000)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %69, float %70, float %71, float %72)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %62, float %64, float %66, float %68)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
bf8c0770
d2060805
02010104
d2060806
02010103
d2060807
02010102
d2060801
02010101
f800020f
05060701
c0820700
bf8c000f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04100901
c202010b
bf8c007f
d2820004
04100902
c202010f
bf8c007f
d2820004
04100903
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04140901
c202010a
bf8c007f
d2820005
04140902
c202010e
bf8c007f
d2820005
04140903
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
04180901
c2020109
bf8c007f
d2820006
04180902
c202010d
bf8c007f
d2820006
04180903
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041c0901
c2020108
bf8c007f
d2820007
041c0902
c200010c
bf8c007f
d2820000
041c0103
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  %24 = call i32 @llvm.SI.packf16(float %20, float %21)
  %25 = bitcast i32 %24 to float
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %25, float %27, float %25, float %27)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
5e000101
c8060102
c80a0002
5e020302
f8001c0f
00010001
bf810000
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..4]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: MOV TEMP[0].w, IN[0].wwww
  2: TXP TEMP[0], TEMP[0], SAMP[0], 2D
  3: MUL TEMP[0], TEMP[0], CONST[4]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %26 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0
  %28 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0
  %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = fdiv float %30, %32
  %34 = fdiv float %31, %32
  %35 = bitcast float %33 to i32
  %36 = bitcast float %34 to i32
  %37 = insertelement <2 x i32> undef, i32 %35, i32 0
  %38 = insertelement <2 x i32> %37, i32 %36, i32 1
  %39 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %27, <16 x i8> %29, i32 2)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = extractelement <4 x float> %39, i32 2
  %43 = extractelement <4 x float> %39, i32 3
  %44 = fmul float %40, %22
  %45 = fmul float %41, %23
  %46 = fmul float %42, %24
  %47 = fmul float %43, %25
  %48 = call i32 @llvm.SI.packf16(float %44, float %45)
  %49 = bitcast i32 %48 to float
  %50 = call i32 @llvm.SI.packf16(float %46, float %47)
  %51 = bitcast i32 %50 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080100
c8090101
c80c0300
c80d0301
7e085503
10060902
c8140000
c8150001
10040905
c0840300
c0c60500
bf8c007f
f0800f00
00430002
c0800100
bf8c0070
c2020113
bf8c007f
10080604
c2020112
bf8c007f
100a0404
5e080905
c2020111
bf8c007f
100a0204
c2000110
bf8c007f
10000000
5e000b00
f8001c0f
04000400
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: MOV OUT[1], IN[1]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = extractelement <4 x float> %29, i32 3
  %34 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %35 = load <16 x i8> addrspace(2)* %34, !tbaa !0
  %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %5)
  %37 = extractelement <4 x float> %36, i32 0
  %38 = extractelement <4 x float> %36, i32 1
  %39 = extractelement <4 x float> %36, i32 2
  %40 = extractelement <4 x float> %36, i32 3
  %41 = fmul float %30, %11
  %42 = fmul float %30, %12
  %43 = fmul float %30, %13
  %44 = fmul float %30, %14
  %45 = fmul float %31, %15
  %46 = fadd float %45, %41
  %47 = fmul float %31, %16
  %48 = fadd float %47, %42
  %49 = fmul float %31, %17
  %50 = fadd float %49, %43
  %51 = fmul float %31, %18
  %52 = fadd float %51, %44
  %53 = fmul float %32, %19
  %54 = fadd float %53, %46
  %55 = fmul float %32, %20
  %56 = fadd float %55, %48
  %57 = fmul float %32, %21
  %58 = fadd float %57, %50
  %59 = fmul float %32, %22
  %60 = fadd float %59, %52
  %61 = fmul float %33, %23
  %62 = fadd float %61, %54
  %63 = fmul float %33, %24
  %64 = fadd float %63, %56
  %65 = fmul float %33, %25
  %66 = fadd float %65, %58
  %67 = fmul float %33, %26
  %68 = fadd float %67, %60
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %37, float %38, float %39, float %40)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %62, float %64, float %66, float %68)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
bf8c0770
f800020f
04030201
c0820700
bf8c000f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04100901
c202010b
bf8c007f
d2820004
04100902
c202010f
bf8c007f
d2820004
04100903
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04140901
c202010a
bf8c007f
d2820005
04140902
c202010e
bf8c007f
d2820005
04140903
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
04180901
c2020109
bf8c007f
d2820006
04180902
c202010d
bf8c007f
d2820006
04180903
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041c0901
c2020108
bf8c007f
d2820007
041c0902
c200010c
bf8c007f
d2820000
041c0103
f80008cf
04050600
bf810000
Generating new string page texture 6: 48x256, total string texture memory is 49,15 KB
Generating new string page texture 7: 48x256, total string texture memory is 49,15 KB
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], COLOR, COLOR
DCL IN[1], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: MOV TEMP[0].w, IN[1].wwww
  2: TXP TEMP[0], TEMP[0], SAMP[0], 2D
  3: MUL TEMP[0], TEMP[0], IN[0]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %31 = fdiv float %28, %30
  %32 = fdiv float %29, %30
  %33 = bitcast float %31 to i32
  %34 = bitcast float %32 to i32
  %35 = insertelement <2 x i32> undef, i32 %33, i32 0
  %36 = insertelement <2 x i32> %35, i32 %34, i32 1
  %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %21, <16 x i8> %23, i32 2)
  %38 = extractelement <4 x float> %37, i32 0
  %39 = extractelement <4 x float> %37, i32 1
  %40 = extractelement <4 x float> %37, i32 2
  %41 = extractelement <4 x float> %37, i32 3
  %42 = fmul float %38, %24
  %43 = fmul float %39, %25
  %44 = fmul float %40, %26
  %45 = fmul float %41, %27
  %46 = call i32 @llvm.SI.packf16(float %42, float %43)
  %47 = bitcast i32 %46 to float
  %48 = call i32 @llvm.SI.packf16(float %44, float %45)
  %49 = bitcast i32 %48 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %47, float %49, float %47, float %49)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080500
c8090501
c80c0700
c80d0701
7e085503
10060902
c8140400
c8150401
10040905
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0d05
c81c0200
c81d0201
100e0f04
5e0c0d07
c81c0100
c81d0101
100e0f03
c8200000
c8210001
10001102
5e000f00
f8001c0f
06000600
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL OUT[2], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: MOV_SAT OUT[1], IN[1]
  5: MOV OUT[2], IN[2]
  6: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = extractelement <4 x float> %29, i32 3
  %34 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %35 = load <16 x i8> addrspace(2)* %34, !tbaa !0
  %36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %35, i32 0, i32 %5)
  %37 = extractelement <4 x float> %36, i32 0
  %38 = extractelement <4 x float> %36, i32 1
  %39 = extractelement <4 x float> %36, i32 2
  %40 = extractelement <4 x float> %36, i32 3
  %41 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0
  %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %5)
  %44 = extractelement <4 x float> %43, i32 0
  %45 = extractelement <4 x float> %43, i32 1
  %46 = extractelement <4 x float> %43, i32 2
  %47 = extractelement <4 x float> %43, i32 3
  %48 = fmul float %30, %11
  %49 = fmul float %30, %12
  %50 = fmul float %30, %13
  %51 = fmul float %30, %14
  %52 = fmul float %31, %15
  %53 = fadd float %52, %48
  %54 = fmul float %31, %16
  %55 = fadd float %54, %49
  %56 = fmul float %31, %17
  %57 = fadd float %56, %50
  %58 = fmul float %31, %18
  %59 = fadd float %58, %51
  %60 = fmul float %32, %19
  %61 = fadd float %60, %53
  %62 = fmul float %32, %20
  %63 = fadd float %62, %55
  %64 = fmul float %32, %21
  %65 = fadd float %64, %57
  %66 = fmul float %32, %22
  %67 = fadd float %66, %59
  %68 = fmul float %33, %23
  %69 = fadd float %68, %61
  %70 = fmul float %33, %24
  %71 = fadd float %70, %63
  %72 = fmul float %33, %25
  %73 = fadd float %72, %65
  %74 = fmul float %33, %26
  %75 = fadd float %74, %67
  %76 = call float @llvm.AMDIL.clamp.(float %37, float 0,000000e+00, float 0x3FF0000000000000)
  %77 = call float @llvm.AMDIL.clamp.(float %38, float 0,000000e+00, float 0x3FF0000000000000)
  %78 = call float @llvm.AMDIL.clamp.(float %39, float 0,000000e+00, float 0x3FF0000000000000)
  %79 = call float @llvm.AMDIL.clamp.(float %40, float 0,000000e+00, float 0x3FF0000000000000)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %77, float %78, float %79)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %44, float %45, float %46, float %47)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %71, float %73, float %75)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
bf8c0770
d2060805
02010104
d2060806
02010103
d2060807
02010102
d2060801
02010101
f800020f
05060701
c0840708
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0820700
bf8c000f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04100901
c202010b
bf8c007f
d2820004
04100902
c202010f
bf8c007f
d2820004
04100903
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04140901
c202010a
bf8c007f
d2820005
04140902
c202010e
bf8c007f
d2820005
04140903
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
04180901
c2020109
bf8c007f
d2820006
04180902
c202010d
bf8c007f
d2820006
04180903
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041c0901
c2020108
bf8c007f
d2820007
041c0902
c200010c
bf8c007f
d2820000
041c0103
f80008cf
04050600
bf810000
Generating new string page texture 8: 384x256, total string texture memory is 442,37 KB
Installing breakpad exception handler for appid(steam)/version(1374261093_client)

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.
roaming config store loaded successfully - 8533 bytes.
migrating temporary roaming config store
Installing breakpad exception handler for appid(steam)/version(1374261093_client)

** (steam:4791): WARNING **: replace_settings: error updating connection /org/freedesktop/NetworkManager/Settings/0 settings: (1) type
Adding license for package 0
Adding license for package 34
Adding license for package 783
Adding license for package 995
Adding license for package 1259
Adding license for package 1290
Adding license for package 1333
Adding license for package 1507
Adding license for package 1774
Adding license for package 2008
Adding license for package 2075
Adding license for package 2377
Adding license for package 2481
Adding license for package 2832
Adding license for package 3052
Adding license for package 4066
Adding license for package 4097
Adding license for package 4912
Adding license for package 6098
Adding license for package 6146
Adding license for package 6232
Adding license for package 6428
Adding license for package 6443
Adding license for package 6660
Adding license for package 6749
Adding license for package 7150
Adding license for package 7166
Adding license for package 7350
Adding license for package 7431
Adding license for package 7802
Adding license for package 7877
Adding license for package 8009
Adding license for package 8372
Adding license for package 8386
Adding license for package 8535
Adding license for package 8731
Adding license for package 11010
Adding license for package 11072
Adding license for package 11274
Adding license for package 11542
Adding license for package 11591
Adding license for package 11732
Adding license for package 11793
Adding license for package 11850
Adding license for package 11984
Adding license for package 12225
Adding license for package 12288
Adding license for package 12361
Adding license for package 12456
Adding license for package 12544
Adding license for package 12897
Adding license for package 12985
Adding license for package 13054
Adding license for package 13086
Adding license for package 13314
Adding license for package 14188
Adding license for package 14712
Adding license for package 15123
Adding license for package 15349
Adding license for package 15371
Adding license for package 15376
Adding license for package 15479
Adding license for package 16547
Adding license for package 16549
Adding license for package 16699
Adding license for package 17496
Adding license for package 17638
Adding license for package 17919
Adding license for package 17922
Adding license for package 17968
Adding license for package 18207
Adding license for package 18265
Adding license for package 18613
Adding license for package 18777
Adding license for package 25835
Adding license for package 26331
Adding license for package 26375
Adding license for package 27195
Adding license for package 27235
Adding license for package 27531
Adding license for package 27644
Adding license for package 28596
ExecCommandLine: "/home/behem0th/Steam/ubuntu12_32/steam"
System startup time: 9,39 seconds
FRAG
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
SI CODE:
7e000280
f8001800
00000000
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  %15 = extractelement <4 x float> %11, i32 3
  %16 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %5)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %19, float %20, float %21, float %22)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float %15)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800704
bf8c007f
e00c2000
80000100
bf8c0770
f800020f
04030201
c0800700
bf8c000f
e00c2000
80000000
bf8c0770
f80008cf
03020100
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
7e000280
f8001800
00000000
bf810000
Running Steam on arch rolling 32-bit
STEAM_RUNTIME has been set by the user to: /home/behem0th/Steam/ubuntu12_32/steam-runtime
Generating new string page texture 91: 1024x256, total string texture memory is 1,49 MB
Generating new string page texture 92: 128x256, total string texture memory is 1,62 MB
Generating new string page texture 93: 256x256, total string texture memory is 311,30 KB
Generating new string page texture 94: 256x256, total string texture memory is 1,88 MB
Generating new string page texture 95: 128x256, total string texture memory is 442,37 KB
Generating new string page texture 96: 24x256, total string texture memory is 1,91 MB
Generating new string page texture 97: 32x256, total string texture memory is 1,94 MB
Generating new string page texture 98: 64x256, total string texture memory is 507,90 KB
Generating new string page texture 99: 64x256, total string texture memory is 2,01 MB
Generating new string page texture 100: 8x256, total string texture memory is 516,10 KB
Generating new string page texture 101: 8x256, total string texture memory is 2,02 MB
ExecCommandLine: "/home/behem0th/.steam/root/ubuntu12_32/steam steam://open/driverhelperready"
ExecSteamURL: "steam://open/driverhelperready"
Generating new string page texture 105: 16x256, total string texture memory is 2,03 MB
Generating new string page texture 106: 128x256, total string texture memory is 2,16 MB
Installing breakpad exception handler for appid(steam)/version(1374261093_client)
Game update: AppID 91200 "Anomaly Warzone Earth", ProcID 4947, IP 0.0.0.0:0

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.

(steam:4791): LIBDBUSMENU-GLIB-WARNING **: Trying to remove a child that doesn't believe we're it's parent.
Setting breakpad minidump AppID = 91200
Steam_SetMinidumpSteamID:  Caching Steam ID:  76561198026030947 [API loaded no]
GLX_EXT_swap_control not supported, unable to set vertical sync.
FRAG
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
SI CODE:
7e000280
f8001800
00000000
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  %15 = extractelement <4 x float> %11, i32 3
  %16 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %5)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %19, float %20, float %21, float %22)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float %15)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800704
bf8c007f
e00c2000
80000100
bf8c0770
f800020f
04030201
c0800700
bf8c000f
e00c2000
80000000
bf8c0770
f80008cf
03020100
bf810000
FRAG
DCL IN[0], GENERIC[0], CONSTANT
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
7e000280
f8001800
00000000
bf810000
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 0
  %32 = extractelement <4 x float> %30, i32 1
  %33 = extractelement <4 x float> %30, i32 2
  %34 = extractelement <4 x float> %30, i32 3
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %31, float %32, float %33, float %34)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800f00
00010002
bf8c0770
f800180f
03020100
bf810000
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 0
  %32 = extractelement <4 x float> %30, i32 1
  %33 = extractelement <4 x float> %30, i32 2
  %34 = extractelement <4 x float> %30, i32 3
  %35 = call i32 @llvm.SI.packf16(float %31, float %32)
  %36 = bitcast i32 %35 to float
  %37 = call i32 @llvm.SI.packf16(float %33, float %34)
  %38 = bitcast i32 %37 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %36, float %38, float %36, float %38)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800f00
00010002
bf8c0770
5e080702
5e000300
f8001c0f
04000400
bf810000
AL lib: pulseaudio.c:612: Context did not connect: Access denied
OpenAL sound device name: ALSA Default
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 0
  %32 = extractelement <4 x float> %30, i32 1
  %33 = extractelement <4 x float> %30, i32 2
  %34 = extractelement <4 x float> %30, i32 3
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %31, float %32, float %33, float %34)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800f00
00010002
bf8c0770
f800180f
03020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[8..9]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy
  2: DP4 TEMP[1].x, IMM[0].xxxx, CONST[6]
  3: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[4].yyyy
  4: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].xxxx
  5: MOV TEMP[2].w, TEMP[1].wwww
  6: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[1].xyyy
  7: MOV TEMP[3].xy, TEMP[3].xyyy
  8: TEX TEMP[3], TEMP[3], SAMP[0], 2D
  9: DP4 TEMP[4].x, IMM[0].xxxx, CONST[5]
 10: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[4].xxxx
 11: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 12: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[0].yzww
 13: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx
 14: RCP TEMP[5].x, TEMP[5].xxxx
 15: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 17: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[1].xyzz
 18: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 19: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 20: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 21: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[2].xyzz
 22: MOV OUT[0], TEMP[2]
 23: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %49 = fmul float %13, %38
  %50 = fadd float %49, %39
  %51 = fmul float 1.000000e+00, %34
  %52 = fmul float 1.000000e+00, %35
  %53 = fadd float %51, %52
  %54 = fmul float 1.000000e+00, %36
  %55 = fadd float %53, %54
  %56 = fmul float 1.000000e+00, %37
  %57 = fadd float %55, %56
  %58 = fadd float %57, %29
  %59 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %60 = call float @llvm.AMDGPU.lrp(float %59, float %44, float 1.000000e+00)
  %61 = call float @llvm.AMDGPU.lrp(float %59, float %45, float 1.000000e+00)
  %62 = call float @llvm.AMDGPU.lrp(float %59, float %46, float 1.000000e+00)
  %63 = call float @llvm.AMDGPU.lrp(float %59, float %47, float 1.000000e+00)
  %64 = fmul float %12, %22
  %65 = fmul float %50, %23
  %66 = bitcast float %64 to i32
  %67 = bitcast float %65 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %41, <16 x i8> %43, i32 2)
  %71 = extractelement <4 x float> %70, i32 0
  %72 = extractelement <4 x float> %70, i32 1
  %73 = extractelement <4 x float> %70, i32 2
  %74 = extractelement <4 x float> %70, i32 3
  %75 = fmul float 1.000000e+00, %30
  %76 = fmul float 1.000000e+00, %31
  %77 = fadd float %75, %76
  %78 = fmul float 1.000000e+00, %32
  %79 = fadd float %77, %78
  %80 = fmul float 1.000000e+00, %33
  %81 = fadd float %79, %80
  %82 = fadd float %81, %28
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = fmul float %83, %74
  %85 = fmul float %71, 0x3FCB367A00000000
  %86 = fmul float %72, 0x3FE6E2EB20000000
  %87 = fadd float %86, %85
  %88 = fmul float %73, 0x3FB27BB300000000
  %89 = fadd float %87, %88
  %90 = fcmp uge float %89, 0x3F50624DE0000000
  %91 = select i1 %90, float %89, float 0x3F50624DE0000000
  %92 = fdiv float 1.000000e+00, %91
  %93 = fmul float %71, %92
  %94 = fmul float %72, %92
  %95 = fmul float %73, %92
  %96 = fmul float %60, %71
  %97 = fmul float %61, %72
  %98 = fmul float %62, %73
  %99 = fmul float %84, %93
  %100 = fadd float %99, %96
  %101 = fmul float %84, %94
  %102 = fadd float %101, %97
  %103 = fmul float %84, %95
  %104 = fadd float %103, %98
  %105 = fmul float %100, 4.000000e+00
  %106 = fmul float %102, 4.000000e+00
  %107 = fmul float %104, 4.000000e+00
  %108 = fcmp uge float %48, %27
  %109 = select i1 %108, float %48, float %27
  %110 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %110, float %105, float %24)
  %112 = call float @llvm.AMDGPU.lrp(float %110, float %106, float %25)
  %113 = call float @llvm.AMDGPU.lrp(float %110, float %107, float %26)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %111, float %112, float %113, float %63)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
c0840100
bf8c007f
c2000924
c2008925
bf8c007f
7e080201
d2820003
04100103
c2000905
bf8c007f
10080600
c2000904
bf8c007f
10060400
c0800300
c0c60500
bf8c007f
f0800f00
00030203
bf8c0770
100c04ff
3e59b3d0
7e0e02ff
3f371759
d2820006
041a0f03
7e0e02ff
3d93dd98
d2820006
041a0f04
7e0e02ff
3a83126f
d00c0000
02020f06
d2000006
00020d07
7e0c5506
10120d04
befc0306
c8280200
c8290201
c2000918
c2008919
bf8c007f
7e0e0201
d2060007
02020e00
c200091a
bf8c007f
060e0e00
c200091b
bf8c007f
060e0e00
c2000911
bf8c007f
060e0e00
d2060807
02010107
08100ef2
d282000a
04221507
1016090a
c2000914
c2008915
bf8c007f
7e140201
d206000a
02021400
c2000916
bf8c007f
06141400
c2000917
bf8c007f
06141400
c2000910
bf8c007f
06141400
d206080a
0201010a
10140b0a
d2820009
042e130a
101212f6
c82c0700
c82d0701
c200090b
bf8c007f
d00c0002
0200010b
7e180200
d200000b
000a170c
d206080b
0201010b
081816f2
c200090a
bf8c007f
101a1800
d2820009
0436130b
101a0d03
c8380100
c8390101
d282000e
04221d07
101c070e
d282000d
043a1b0a
101a1af6
c2000909
bf8c007f
101c1800
d282000d
043a1b0b
100c0d02
c8380000
c8390001
d282000e
04221d07
1004050e
d2820002
040a0d0a
100404f6
c2000908
bf8c007f
10061800
d2820002
040e050b
c80c0300
c80d0301
d2820000
04220707
f800180f
00090d02
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..8]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[6].xyzz, CONST[5].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MUL TEMP[3], CONST[0], TEMP[1].xxxx
  8: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 10: ADD TEMP[0], TEMP[0], CONST[3]
 11: MOV TEMP[2].w, IMM[0].xxxx
 12: MOV TEMP[2].xyz, CONST[4].xyzx
 13: MUL TEMP[2], TEMP[2], IN[3]
 14: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[8].xyzz
 15: MAD TEMP[3].x, TEMP[0].zzzz, CONST[7].xxxx, CONST[7].yyyy
 16: MOV TEMP[1].w, TEMP[3].xxxx
 17: MOV OUT[1], TEMP[2]
 18: MOV OUT[2], TEMP[1]
 19: MOV OUT[0], TEMP[0]
 20: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0
  %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %5)
  %44 = extractelement <4 x float> %43, i32 0
  %45 = extractelement <4 x float> %43, i32 1
  %46 = extractelement <4 x float> %43, i32 2
  %47 = extractelement <4 x float> %43, i32 3
  %48 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %5)
  %51 = extractelement <4 x float> %50, i32 0
  %52 = extractelement <4 x float> %50, i32 1
  %53 = extractelement <4 x float> %50, i32 2
  %54 = extractelement <4 x float> %50, i32 3
  %55 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %5)
  %58 = extractelement <4 x float> %57, i32 0
  %59 = extractelement <4 x float> %57, i32 1
  %60 = extractelement <4 x float> %57, i32 2
  %61 = extractelement <4 x float> %57, i32 3
  %62 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %5)
  %65 = extractelement <4 x float> %64, i32 0
  %66 = extractelement <4 x float> %64, i32 1
  %67 = extractelement <4 x float> %64, i32 2
  %68 = extractelement <4 x float> %64, i32 3
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = fmul float %72, %33
  %76 = fadd float %75, %30
  %77 = fmul float %73, %34
  %78 = fadd float %77, %31
  %79 = fmul float %74, %35
  %80 = fadd float %79, %32
  %81 = fmul float %76, %44
  %82 = fmul float %78, %45
  %83 = fadd float %81, %82
  %84 = fmul float %80, %46
  %85 = fadd float %83, %84
  %86 = fmul float 1.000000e+00, %47
  %87 = fadd float %85, %86
  %88 = fmul float %76, %51
  %89 = fmul float %78, %52
  %90 = fadd float %88, %89
  %91 = fmul float %80, %53
  %92 = fadd float %90, %91
  %93 = fmul float 1.000000e+00, %54
  %94 = fadd float %92, %93
  %95 = fmul float %76, %58
  %96 = fmul float %78, %59
  %97 = fadd float %95, %96
  %98 = fmul float %80, %60
  %99 = fadd float %97, %98
  %100 = fmul float 1.000000e+00, %61
  %101 = fadd float %99, %100
  %102 = fmul float %11, %87
  %103 = fmul float %12, %87
  %104 = fmul float %13, %87
  %105 = fmul float %14, %87
  %106 = fmul float %15, %94
  %107 = fadd float %106, %102
  %108 = fmul float %16, %94
  %109 = fadd float %108, %103
  %110 = fmul float %17, %94
  %111 = fadd float %110, %104
  %112 = fmul float %18, %94
  %113 = fadd float %112, %105
  %114 = fmul float %19, %101
  %115 = fadd float %114, %107
  %116 = fmul float %20, %101
  %117 = fadd float %116, %109
  %118 = fmul float %21, %101
  %119 = fadd float %118, %111
  %120 = fmul float %22, %101
  %121 = fadd float %120, %113
  %122 = fadd float %115, %23
  %123 = fadd float %117, %24
  %124 = fadd float %119, %25
  %125 = fadd float %121, %26
  %126 = fmul float %27, %65
  %127 = fmul float %28, %66
  %128 = fmul float %29, %67
  %129 = fmul float 1.000000e+00, %68
  %130 = fsub float -0.000000e+00, %38
  %131 = fadd float %87, %130
  %132 = fsub float -0.000000e+00, %39
  %133 = fadd float %94, %132
  %134 = fsub float -0.000000e+00, %40
  %135 = fadd float %101, %134
  %136 = fmul float %124, %36
  %137 = fadd float %136, %37
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %126, float %127, float %128, float %129)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %133, float %135, float %137)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %122, float %123, float %124, float %125)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020500
c2020119
c2028115
bf8c0070
7e020205
d2820003
04040906
c0840704
bf8c007f
e00c2000
80020900
bf8c0770
10021503
c2020118
c2028114
bf8c007f
7e040205
d2820004
04080905
d2820001
04061304
c202011a
c2028116
bf8c007f
7e040205
d2820005
04080907
d2820001
04061705
06021901
c0840700
bf8c007f
e00c2000
80020600
bf8c0770
10040f03
d2820002
040a0d04
d2820002
040a1105
06041302
c2020102
bf8c007f
100c0404
c2020106
bf8c007f
d2820006
041a0204
c0820708
bf8c007f
e00c2000
80010700
bf8c0770
10001103
d2820000
04020f04
d2820000
04021305
06001500
c202010a
bf8c007f
d2820003
041a0004
c202010e
bf8c007f
06060604
c202011c
c202811d
bf8c007f
7e080205
d2820004
04100903
c2020122
bf8c007f
0a0a0004
c2020121
bf8c007f
0a0c0204
c2020120
bf8c007f
0a0e0404
f800021f
04050607
c2020103
bf8c000f
10080404
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120004
c202010f
bf8c007f
06080804
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160004
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820000
04060004
c200010c
bf8c007f
06000000
f80008cf
04030500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  %24 = call i32 @llvm.SI.packf16(float %20, float %21)
  %25 = bitcast i32 %24 to float
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %25, float %27, float %25, float %27)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
5e000101
c8060102
c80a0002
5e020302
f8001c0f
00010001
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %20, float %21, float %22, float %23)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
c80a0102
c80e0002
f800180f
00010203
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..10]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[8].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MOV OUT[1], TEMP[1]
 13: MOV OUT[0], TEMP[0]
 14: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = fmul float %46, %40
  %56 = fadd float %55, %37
  %57 = fmul float %47, %41
  %58 = fadd float %57, %38
  %59 = fmul float %48, %42
  %60 = fadd float %59, %39
  %61 = fmul float %20, %56
  %62 = fmul float %21, %56
  %63 = fmul float %22, %56
  %64 = fmul float %23, %56
  %65 = fmul float %24, %58
  %66 = fadd float %65, %61
  %67 = fmul float %25, %58
  %68 = fadd float %67, %62
  %69 = fmul float %26, %58
  %70 = fadd float %69, %63
  %71 = fmul float %27, %58
  %72 = fadd float %71, %64
  %73 = fmul float %28, %60
  %74 = fadd float %73, %66
  %75 = fmul float %29, %60
  %76 = fadd float %75, %68
  %77 = fmul float %30, %60
  %78 = fadd float %77, %70
  %79 = fmul float %31, %60
  %80 = fadd float %79, %72
  %81 = fadd float %74, %32
  %82 = fadd float %76, %33
  %83 = fadd float %78, %34
  %84 = fadd float %80, %35
  %85 = fmul float %52, %36
  %86 = fmul float %53, %36
  %87 = fmul float %54, %36
  %88 = fmul float %11, %85
  %89 = fmul float %12, %85
  %90 = fmul float %13, %85
  %91 = fmul float %14, %86
  %92 = fadd float %91, %88
  %93 = fmul float %15, %86
  %94 = fadd float %93, %89
  %95 = fmul float %16, %86
  %96 = fadd float %95, %90
  %97 = fmul float %17, %87
  %98 = fadd float %97, %92
  %99 = fmul float %18, %87
  %100 = fadd float %99, %94
  %101 = fmul float %19, %87
  %102 = fadd float %101, %96
  %103 = fmul float %98, %98
  %104 = fmul float %100, %100
  %105 = fadd float %104, %103
  %106 = fmul float %102, %102
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %98, %108
  %110 = fmul float %100, %108
  %111 = fmul float %102, %108
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %111, float %72)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020123
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820001
04060804
d2820002
041a0301
7e045b02
100c0501
100e0503
10100505
c0820700
bf8c007f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020113
bf8c007f
10120204
c2020117
bf8c007f
d2820009
04260004
f800020f
09060708
c202012a
c2028126
bf8c000f
7e0c0205
d2820002
04180904
c202011b
bf8c007f
d2820003
04260404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
  0: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)

attributes #0 = { "ShaderType"="0" }
SI CODE:
7e000280
f8001800
00000000
bf810000
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL OUT[1], POSITION
DCL OUT[2], STENCIL
DCL SAMP[0]
DCL SAMP[1]
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MOV OUT[0], IMM[0].xxxy
  1: TEX OUT[1].z, IN[0], SAMP[0], 2D
  2: TEX OUT[2].y, IN[0], SAMP[1], 2D
  3: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !0
  %26 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 2
  %36 = bitcast float %28 to i32
  %37 = bitcast float %29 to i32
  %38 = insertelement <2 x i32> undef, i32 %36, i32 0
  %39 = insertelement <2 x i32> %38, i32 %37, i32 1
  %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %25, <16 x i8> %27, i32 2)
  %41 = extractelement <4 x float> %40, i32 1
  call void @llvm.SI.export(i32 3, i32 0, i32 0, i32 8, i32 0, float %35, float %41, float %41, float %41)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0840304
c0c60508
bf8c007f
f0800200
00430002
c0800300
c0c20500
bf8c0070
f0800400
00010102
bf8c0770
f8000083
00000001
bf8c070f
7e0002f2
7e020280
f800180f
00010101
bf810000
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL OUT[1], POSITION
DCL OUT[2], STENCIL
DCL SAMP[0]
DCL SAMP[1]
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MOV OUT[0], IMM[0].xxxy
  1: TEX OUT[1].z, IN[0], SAMP[0], 2D
  2: TEX OUT[2].y, IN[0], SAMP[1], 2D
  3: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !0
  %26 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %9)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %9)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 2
  %36 = bitcast float %28 to i32
  %37 = bitcast float %29 to i32
  %38 = insertelement <2 x i32> undef, i32 %36, i32 0
  %39 = insertelement <2 x i32> %38, i32 %37, i32 1
  %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %25, <16 x i8> %27, i32 2)
  %41 = extractelement <4 x float> %40, i32 1
  call void @llvm.SI.export(i32 3, i32 0, i32 0, i32 8, i32 0, float %35, float %41, float %41, float %41)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0840304
c0c60508
bf8c007f
f0800200
00430002
c0800300
c0c20500
bf8c0070
f0800400
00010102
bf8c0770
f8000083
00000001
bf8c070f
7e0002f2
7e020280
f800180f
00010101
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL CONST[22..23]
DCL CONST[4..21]
DCL TEMP[0]
DCL TEMP[1..9], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     1.0000,     0.0000}
IMM[1] FLT32 { 1024.0000,     0.0100,     0.9748,     0.7565}
IMM[2] FLT32 {   -0.8154,    -0.8791,     0.9456,    -0.7689}
IMM[3] FLT32 {   -0.8141,     0.9144,     4.0000,     0.0625}
IMM[4] FLT32 {   -0.9420,    -0.3991,    -0.0942,    -0.9294}
IMM[5] FLT32 {    0.3450,     0.2939,    -0.9159,     0.4577}
IMM[6] FLT32 {   -0.3828,     0.2768,     0.4432,    -0.9751}
IMM[7] FLT32 {    0.5374,    -0.4737,    -0.2650,    -0.4189}
IMM[8] FLT32 {    0.7920,     0.1909,    -0.2419,     0.9971}
IMM[9] FLT32 {    0.1998,     0.7864,     0.1438,    -0.1410}
IMM[10] FLT32 {    0.2500,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[23].xxxx, CONST[23].yyyy
  2: MAD TEMP[1], TEMP[0].xyxy, CONST[4], CONST[5]
  3: MOV TEMP[2].xy, TEMP[1].xyyy
  4: TEX TEMP[2], TEMP[2], SAMP[3], 2D
  5: MAD TEMP[3].xyz, TEMP[2].yzww, IMM[0].xxxx, IMM[0].yyyy
  6: MOV TEMP[4].xy, TEMP[3].xyxx
  7: DP2 TEMP[5].x, TEMP[3].xyyy, TEMP[3].xyyy
  8: ADD_SAT TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
  9: RSQ TEMP[6].x, TEMP[5].xxxx
 10: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 11: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 12: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[3].zzzz
 13: MOV TEMP[4].z, TEMP[5].xxxx
 14: MOV TEMP[6].z, IMM[0].zzzz
 15: MOV TEMP[6].xy, TEMP[1].zwzz
 16: MOV TEMP[1].xy, TEMP[1].xyyy
 17: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[6].zzzz, CONST[6].wwww
 19: RCP TEMP[1].x, TEMP[1].xxxx
 20: MUL TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx
 21: MUL TEMP[6], CONST[11], TEMP[3].xxxx
 22: MAD TEMP[3], CONST[12], TEMP[3].yyyy, TEMP[6]
 23: MAD TEMP[3].xyz, CONST[13], TEMP[5].xxxx, TEMP[3]
 24: MOV TEMP[3].xyz, TEMP[3].xyzz
 25: TEX TEMP[3].xyz, TEMP[3], SAMP[2], CUBE
 26: DP3 TEMP[5].x, TEMP[4].xyzz, CONST[15].xyzz
 27: ADD TEMP[5].x, TEMP[5].xxxx, CONST[17].wwww
 28: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz
 29: RSQ TEMP[6].x, TEMP[6].xxxx
 30: MUL TEMP[6].xyz, TEMP[1].xyzz, TEMP[6].xxxx
 31: ADD TEMP[6].xyz, CONST[20].xyzz, -TEMP[6].xyzz
 32: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz
 33: RSQ TEMP[7].x, TEMP[7].xxxx
 34: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx
 35: DP3_SAT TEMP[4].x, TEMP[6].xyzz, TEMP[4].xyzz
 36: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
 37: POW TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx
 38: SGE TEMP[4].x, TEMP[5].xxxx, IMM[1].yyyy
 39: F2I TEMP[4].x, -TEMP[4]
 40: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz
 41: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
 42: MOV_SAT TEMP[4].x, TEMP[5].xxxx
 43: LRP TEMP[4].xyz, TEMP[4].xxxx, CONST[16].xyzz, CONST[18].xyzz
 44: MOV_SAT TEMP[6].x, -TEMP[5].xxxx
 45: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[17].xyzz, CONST[18].xyzz
 46: SLT TEMP[7].x, TEMP[5].xxxx, IMM[0].wwww
 47: F2I TEMP[7].x, -TEMP[7]
 48: UIF TEMP[7].xxxx :2
 49:   MOV TEMP[6].xyz, TEMP[6].xyzx
 50: ELSE :2
 51:   MOV TEMP[6].xyz, TEMP[4].xyzx
 52: ENDIF
 53: MUL TEMP[2].x, CONST[19].wwww, TEMP[2].xxxx
 54: MUL TEMP[4], CONST[7], TEMP[1].xxxx
 55: MAD TEMP[4], CONST[8], TEMP[1].yyyy, TEMP[4]
 56: MAD TEMP[1], CONST[9], TEMP[1].zzzz, TEMP[4]
 57: ADD TEMP[1].xyz, TEMP[1], CONST[10]
 58: MOV_SAT TEMP[4].x, TEMP[5].xxxx
 59: MAD TEMP[7].xy, IMM[1].zwww, CONST[22].yyyy, TEMP[1].xyyy
 60: MOV TEMP[7].xy, TEMP[7].xyyy
 61: MOV TEMP[7].z, TEMP[1].zzzz
 62: TEX TEMP[7].x, TEMP[7], SAMP[0], SHADOW2D
 63: MAD TEMP[8].xy, IMM[2].xyyy, CONST[22].yyyy, TEMP[1].xyyy
 64: MOV TEMP[8].xy, TEMP[8].xyyy
 65: MOV TEMP[8].z, TEMP[1].zzzz
 66: TEX TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D
 67: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx
 68: MAD TEMP[8].xy, IMM[2].zwww, CONST[22].yyyy, TEMP[1].xyyy
 69: MOV TEMP[8].xy, TEMP[8].xyyy
 70: MOV TEMP[8].z, TEMP[1].zzzz
 71: TEX TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D
 72: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx
 73: MAD TEMP[8].xy, IMM[3].xyyy, CONST[22].yyyy, TEMP[1].xyyy
 74: MOV TEMP[8].xy, TEMP[8].xyyy
 75: MOV TEMP[8].z, TEMP[1].zzzz
 76: TEX TEMP[8].x, TEMP[8], SAMP[0], SHADOW2D
 77: ADD TEMP[8].x, TEMP[7].xxxx, TEMP[8].xxxx
 78: MOV TEMP[7].x, TEMP[8].xxxx
 79: ADD TEMP[9].x, IMM[3].zzzz, -TEMP[8].xxxx
 80: MUL TEMP[9].x, TEMP[8].xxxx, TEMP[9].xxxx
 81: MUL TEMP[4].x, TEMP[9].xxxx, TEMP[4].xxxx
 82: SNE TEMP[4].x, TEMP[4].xxxx, IMM[0].wwww
 83: F2I TEMP[4].x, -TEMP[4]
 84: UIF TEMP[4].xxxx :2
 85:   MAD TEMP[4].xy, IMM[4].xyyy, CONST[22].yyyy, TEMP[1].xyyy
 86:   MOV TEMP[4].xy, TEMP[4].xyyy
 87:   MOV TEMP[4].z, TEMP[1].zzzz
 88:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
 89:   ADD TEMP[7].x, TEMP[8].xxxx, TEMP[4].xxxx
 90:   MAD TEMP[4].xy, IMM[4].zwww, CONST[22].yyyy, TEMP[1].xyyy
 91:   MOV TEMP[4].xy, TEMP[4].xyyy
 92:   MOV TEMP[4].z, TEMP[1].zzzz
 93:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
 94:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
 95:   MAD TEMP[4].xy, IMM[5].xyyy, CONST[22].yyyy, TEMP[1].xyyy
 96:   MOV TEMP[4].xy, TEMP[4].xyyy
 97:   MOV TEMP[4].z, TEMP[1].zzzz
 98:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
 99:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
100:   MAD TEMP[4].xy, IMM[5].zwww, CONST[22].yyyy, TEMP[1].xyyy
101:   MOV TEMP[4].xy, TEMP[4].xyyy
102:   MOV TEMP[4].z, TEMP[1].zzzz
103:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
104:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
105:   MAD TEMP[4].xy, IMM[6].xyyy, CONST[22].yyyy, TEMP[1].xyyy
106:   MOV TEMP[4].xy, TEMP[4].xyyy
107:   MOV TEMP[4].z, TEMP[1].zzzz
108:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
109:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
110:   MAD TEMP[4].xy, IMM[6].zwww, CONST[22].yyyy, TEMP[1].xyyy
111:   MOV TEMP[4].xy, TEMP[4].xyyy
112:   MOV TEMP[4].z, TEMP[1].zzzz
113:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
114:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
115:   MAD TEMP[4].xy, IMM[7].xyyy, CONST[22].yyyy, TEMP[1].xyyy
116:   MOV TEMP[4].xy, TEMP[4].xyyy
117:   MOV TEMP[4].z, TEMP[1].zzzz
118:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
119:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
120:   MAD TEMP[4].xy, IMM[7].zwww, CONST[22].yyyy, TEMP[1].xyyy
121:   MOV TEMP[4].xy, TEMP[4].xyyy
122:   MOV TEMP[4].z, TEMP[1].zzzz
123:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
124:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
125:   MAD TEMP[4].xy, IMM[8].xyyy, CONST[22].yyyy, TEMP[1].xyyy
126:   MOV TEMP[4].xy, TEMP[4].xyyy
127:   MOV TEMP[4].z, TEMP[1].zzzz
128:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
129:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
130:   MAD TEMP[4].xy, IMM[8].zwww, CONST[22].yyyy, TEMP[1].xyyy
131:   MOV TEMP[4].xy, TEMP[4].xyyy
132:   MOV TEMP[4].z, TEMP[1].zzzz
133:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
134:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
135:   MAD TEMP[4].xy, IMM[9].xyyy, CONST[22].yyyy, TEMP[1].xyyy
136:   MOV TEMP[4].xy, TEMP[4].xyyy
137:   MOV TEMP[4].z, TEMP[1].zzzz
138:   TEX TEMP[4].x, TEMP[4], SAMP[0], SHADOW2D
139:   ADD TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx
140:   MAD TEMP[4].xy, IMM[9].zwww, CONST[22].yyyy, TEMP[1].xyyy
141:   MOV TEMP[4].xy, TEMP[4].xyyy
142:   MOV TEMP[4].z, TEMP[1].zzzz
143:   TEX TEMP[1].x, TEMP[4], SAMP[0], SHADOW2D
144:   ADD TEMP[1].x, TEMP[7].xxxx, TEMP[1].xxxx
145:   MUL TEMP[7].x, TEMP[1].xxxx, IMM[3].wwww
146: ELSE :2
147:   MUL TEMP[7].x, TEMP[7].xxxx, IMM[10].xxxx
148: ENDIF
149: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[7].xxxx
150: ADD TEMP[2].x, TEMP[7].xxxx, CONST[22].xxxx
151: MOV_SAT TEMP[4].x, -TEMP[5].xxxx
152: ADD_SAT TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
153: MAD TEMP[2].xyz, TEMP[6].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
154: MOV TEMP[2].w, TEMP[1].xxxx
155: MOV OUT[0], TEMP[2]
156: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 264)
  %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 272)
  %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 276)
  %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 280)
  %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 284)
  %65 = call float @llvm.SI.load.const(<16 x i8> %21, i32 288)
  %66 = call float @llvm.SI.load.const(<16 x i8> %21, i32 292)
  %67 = call float @llvm.SI.load.const(<16 x i8> %21, i32 296)
  %68 = call float @llvm.SI.load.const(<16 x i8> %21, i32 316)
  %69 = call float @llvm.SI.load.const(<16 x i8> %21, i32 320)
  %70 = call float @llvm.SI.load.const(<16 x i8> %21, i32 324)
  %71 = call float @llvm.SI.load.const(<16 x i8> %21, i32 328)
  %72 = call float @llvm.SI.load.const(<16 x i8> %21, i32 352)
  %73 = call float @llvm.SI.load.const(<16 x i8> %21, i32 356)
  %74 = call float @llvm.SI.load.const(<16 x i8> %21, i32 368)
  %75 = call float @llvm.SI.load.const(<16 x i8> %21, i32 372)
  %76 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %77 = load <32 x i8> addrspace(2)* %76, !tbaa !0
  %78 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %81 = load <32 x i8> addrspace(2)* %80, !tbaa !0
  %82 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %83 = load <16 x i8> addrspace(2)* %82, !tbaa !0
  %84 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %85 = load <32 x i8> addrspace(2)* %84, !tbaa !0
  %86 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %89 = load <32 x i8> addrspace(2)* %88, !tbaa !0
  %90 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %91 = load <16 x i8> addrspace(2)* %90, !tbaa !0
  %92 = fmul float %13, %74
  %93 = fadd float %92, %75
  %94 = fmul float %12, %22
  %95 = fadd float %94, %26
  %96 = fmul float %93, %23
  %97 = fadd float %96, %27
  %98 = fmul float %12, %24
  %99 = fadd float %98, %28
  %100 = fmul float %93, %25
  %101 = fadd float %100, %29
  %102 = bitcast float %95 to i32
  %103 = bitcast float %97 to i32
  %104 = insertelement <2 x i32> undef, i32 %102, i32 0
  %105 = insertelement <2 x i32> %104, i32 %103, i32 1
  %106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %105, <32 x i8> %89, <16 x i8> %91, i32 2)
  %107 = extractelement <4 x float> %106, i32 0
  %108 = extractelement <4 x float> %106, i32 1
  %109 = extractelement <4 x float> %106, i32 2
  %110 = extractelement <4 x float> %106, i32 3
  %111 = fmul float %108, 2.000000e+00
  %112 = fadd float %111, -1.000000e+00
  %113 = fmul float %109, 2.000000e+00
  %114 = fadd float %113, -1.000000e+00
  %115 = fmul float %110, 2.000000e+00
  %116 = fadd float %115, -1.000000e+00
  %117 = fmul float %112, %112
  %118 = fmul float %114, %114
  %119 = fadd float %117, %118
  %120 = fsub float -0.000000e+00, %119
  %121 = fadd float 1.000000e+00, %120
  %122 = call float @llvm.AMDIL.clamp.(float %121, float 0.000000e+00, float 1.000000e+00)
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %123, %122
  %125 = fsub float -0.000000e+00, %122
  %126 = call float @llvm.AMDGPU.cndlt(float %125, float %124, float 0.000000e+00)
  %127 = fmul float %126, %116
  %128 = bitcast float %95 to i32
  %129 = bitcast float %97 to i32
  %130 = insertelement <2 x i32> undef, i32 %128, i32 0
  %131 = insertelement <2 x i32> %130, i32 %129, i32 1
  %132 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %81, <16 x i8> %83, i32 2)
  %133 = extractelement <4 x float> %132, i32 0
  %134 = fmul float %133, %30
  %135 = fadd float %134, %31
  %136 = fdiv float 1.000000e+00, %135
  %137 = fmul float %99, %136
  %138 = fmul float %101, %136
  %139 = fmul float 1.000000e+00, %136
  %140 = fmul float %44, %112
  %141 = fmul float %45, %112
  %142 = fmul float %46, %112
  %143 = fmul float %47, %112
  %144 = fmul float %48, %114
  %145 = fadd float %144, %140
  %146 = fmul float %49, %114
  %147 = fadd float %146, %141
  %148 = fmul float %50, %114
  %149 = fadd float %148, %142
  %150 = fmul float %51, %114
  %151 = fadd float %150, %143
  %152 = fmul float %52, %127
  %153 = fadd float %152, %145
  %154 = fmul float %53, %127
  %155 = fadd float %154, %147
  %156 = fmul float %54, %127
  %157 = fadd float %156, %149
  %158 = insertelement <4 x float> undef, float %153, i32 0
  %159 = insertelement <4 x float> %158, float %155, i32 1
  %160 = insertelement <4 x float> %159, float %157, i32 2
  %161 = insertelement <4 x float> %160, float %151, i32 3
  %162 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %161)
  %163 = extractelement <4 x float> %162, i32 0
  %164 = extractelement <4 x float> %162, i32 1
  %165 = extractelement <4 x float> %162, i32 2
  %166 = extractelement <4 x float> %162, i32 3
  %167 = call float @fabs(float %165)
  %168 = fdiv float 1.000000e+00, %167
  %169 = fmul float %163, %168
  %170 = fadd float %169, 1.500000e+00
  %171 = fmul float %164, %168
  %172 = fadd float %171, 1.500000e+00
  %173 = bitcast float %172 to i32
  %174 = bitcast float %170 to i32
  %175 = bitcast float %166 to i32
  %176 = insertelement <4 x i32> undef, i32 %173, i32 0
  %177 = insertelement <4 x i32> %176, i32 %174, i32 1
  %178 = insertelement <4 x i32> %177, i32 %175, i32 2
  %179 = insertelement <4 x i32> %178, i32 undef, i32 3
  %180 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %179, <32 x i8> %85, <16 x i8> %87, i32 4)
  %181 = extractelement <4 x float> %180, i32 0
  %182 = extractelement <4 x float> %180, i32 1
  %183 = extractelement <4 x float> %180, i32 2
  %184 = fmul float %112, %55
  %185 = fmul float %114, %56
  %186 = fadd float %185, %184
  %187 = fmul float %127, %57
  %188 = fadd float %186, %187
  %189 = fadd float %188, %64
  %190 = fmul float %137, %137
  %191 = fmul float %138, %138
  %192 = fadd float %191, %190
  %193 = fmul float %139, %139
  %194 = fadd float %192, %193
  %195 = call float @llvm.AMDGPU.rsq(float %194)
  %196 = fmul float %137, %195
  %197 = fmul float %138, %195
  %198 = fmul float %139, %195
  %199 = fsub float -0.000000e+00, %196
  %200 = fadd float %69, %199
  %201 = fsub float -0.000000e+00, %197
  %202 = fadd float %70, %201
  %203 = fsub float -0.000000e+00, %198
  %204 = fadd float %71, %203
  %205 = fmul float %200, %200
  %206 = fmul float %202, %202
  %207 = fadd float %206, %205
  %208 = fmul float %204, %204
  %209 = fadd float %207, %208
  %210 = call float @llvm.AMDGPU.rsq(float %209)
  %211 = fmul float %200, %210
  %212 = fmul float %202, %210
  %213 = fmul float %204, %210
  %214 = fmul float %211, %112
  %215 = fmul float %212, %114
  %216 = fadd float %215, %214
  %217 = fmul float %213, %127
  %218 = fadd float %216, %217
  %219 = call float @llvm.AMDIL.clamp.(float %218, float 0.000000e+00, float 1.000000e+00)
  %220 = fmul float %107, 1.024000e+03
  %221 = call float @llvm.pow.f32(float %219, float %220)
  %222 = fcmp uge float %189, 0x3F847AE140000000
  %223 = select i1 %222, float 1.000000e+00, float 0.000000e+00
  %224 = fsub float -0.000000e+00, %223
  %225 = fptosi float %224 to i32
  %226 = bitcast i32 %225 to float
  %227 = bitcast float %226 to i32
  %228 = and i32 %227, 1065353216
  %229 = bitcast i32 %228 to float
  %230 = fmul float %221, %229
  %231 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00)
  %232 = call float @llvm.AMDGPU.lrp(float %231, float %58, float %65)
  %233 = call float @llvm.AMDGPU.lrp(float %231, float %59, float %66)
  %234 = call float @llvm.AMDGPU.lrp(float %231, float %60, float %67)
  %235 = fsub float -0.000000e+00, %189
  %236 = call float @llvm.AMDIL.clamp.(float %235, float 0.000000e+00, float 1.000000e+00)
  %237 = call float @llvm.AMDGPU.lrp(float %236, float %61, float %65)
  %238 = call float @llvm.AMDGPU.lrp(float %236, float %62, float %66)
  %239 = call float @llvm.AMDGPU.lrp(float %236, float %63, float %67)
  %240 = fcmp ult float %189, 0.000000e+00
  %241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
  %242 = fsub float -0.000000e+00, %241
  %243 = fptosi float %242 to i32
  %244 = bitcast i32 %243 to float
  %245 = bitcast float %244 to i32
  %246 = icmp ne i32 %245, 0
  %. = select i1 %246, float %237, float %232
  %.43 = select i1 %246, float %238, float %233
  %.44 = select i1 %246, float %239, float %234
  %247 = fmul float %68, %230
  %248 = fmul float %32, %137
  %249 = fmul float %33, %137
  %250 = fmul float %34, %137
  %251 = fmul float %35, %138
  %252 = fadd float %251, %248
  %253 = fmul float %36, %138
  %254 = fadd float %253, %249
  %255 = fmul float %37, %138
  %256 = fadd float %255, %250
  %257 = fmul float %38, %139
  %258 = fadd float %257, %252
  %259 = fmul float %39, %139
  %260 = fadd float %259, %254
  %261 = fmul float %40, %139
  %262 = fadd float %261, %256
  %263 = fadd float %258, %41
  %264 = fadd float %260, %42
  %265 = fadd float %262, %43
  %266 = call float @llvm.AMDIL.clamp.(float %189, float 0.000000e+00, float 1.000000e+00)
  %267 = fmul float 0x3FEF31EC00000000, %73
  %268 = fadd float %267, %263
  %269 = fmul float 0x3FE8351D80000000, %73
  %270 = fadd float %269, %264
  %271 = bitcast float %265 to i32
  %272 = bitcast float %268 to i32
  %273 = bitcast float %270 to i32
  %274 = insertelement <4 x i32> undef, i32 %271, i32 0
  %275 = insertelement <4 x i32> %274, i32 %272, i32 1
  %276 = insertelement <4 x i32> %275, i32 %273, i32 2
  %277 = insertelement <4 x i32> %276, i32 undef, i32 3
  %278 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %277, <32 x i8> %77, <16 x i8> %79, i32 7)
  %279 = extractelement <4 x float> %278, i32 0
  %280 = fmul float 0xBFEA181A80000000, %73
  %281 = fadd float %280, %263
  %282 = fmul float 0xBFEC21CA00000000, %73
  %283 = fadd float %282, %264
  %284 = bitcast float %265 to i32
  %285 = bitcast float %281 to i32
  %286 = bitcast float %283 to i32
  %287 = insertelement <4 x i32> undef, i32 %284, i32 0
  %288 = insertelement <4 x i32> %287, i32 %285, i32 1
  %289 = insertelement <4 x i32> %288, i32 %286, i32 2
  %290 = insertelement <4 x i32> %289, i32 undef, i32 3
  %291 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %290, <32 x i8> %77, <16 x i8> %79, i32 7)
  %292 = extractelement <4 x float> %291, i32 0
  %293 = fadd float %279, %292
  %294 = fmul float 0x3FEE423DC0000000, %73
  %295 = fadd float %294, %263
  %296 = fmul float 0xBFE89AE360000000, %73
  %297 = fadd float %296, %264
  %298 = bitcast float %265 to i32
  %299 = bitcast float %295 to i32
  %300 = bitcast float %297 to i32
  %301 = insertelement <4 x i32> undef, i32 %298, i32 0
  %302 = insertelement <4 x i32> %301, i32 %299, i32 1
  %303 = insertelement <4 x i32> %302, i32 %300, i32 2
  %304 = insertelement <4 x i32> %303, i32 undef, i32 3
  %305 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %304, <32 x i8> %77, <16 x i8> %79, i32 7)
  %306 = extractelement <4 x float> %305, i32 0
  %307 = fadd float %293, %306
  %308 = fmul float 0xBFEA0D1A80000000, %73
  %309 = fadd float %308, %263
  %310 = fmul float 0x3FED429140000000, %73
  %311 = fadd float %310, %264
  %312 = bitcast float %265 to i32
  %313 = bitcast float %309 to i32
  %314 = bitcast float %311 to i32
  %315 = insertelement <4 x i32> undef, i32 %312, i32 0
  %316 = insertelement <4 x i32> %315, i32 %313, i32 1
  %317 = insertelement <4 x i32> %316, i32 %314, i32 2
  %318 = insertelement <4 x i32> %317, i32 undef, i32 3
  %319 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %318, <32 x i8> %77, <16 x i8> %79, i32 7)
  %320 = extractelement <4 x float> %319, i32 0
  %321 = fadd float %307, %320
  %322 = fsub float -0.000000e+00, %321
  %323 = fadd float 4.000000e+00, %322
  %324 = fmul float %321, %323
  %325 = fmul float %324, %266
  %326 = fcmp une float %325, 0.000000e+00
  %327 = select i1 %326, float 1.000000e+00, float 0.000000e+00
  %328 = fsub float -0.000000e+00, %327
  %329 = fptosi float %328 to i32
  %330 = bitcast i32 %329 to float
  %331 = bitcast float %330 to i32
  %332 = icmp ne i32 %331, 0
  br i1 %332, label %IF41, label %ELSE42

IF41:                                             ; preds = %main_body
  %333 = fmul float 0xBFEE24FF40000000, %73
  %334 = fadd float %333, %263
  %335 = fmul float 0xBFD98A3C00000000, %73
  %336 = fadd float %335, %264
  %337 = bitcast float %265 to i32
  %338 = bitcast float %334 to i32
  %339 = bitcast float %336 to i32
  %340 = insertelement <4 x i32> undef, i32 %337, i32 0
  %341 = insertelement <4 x i32> %340, i32 %338, i32 1
  %342 = insertelement <4 x i32> %341, i32 %339, i32 2
  %343 = insertelement <4 x i32> %342, i32 undef, i32 3
  %344 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %343, <32 x i8> %77, <16 x i8> %79, i32 7)
  %345 = extractelement <4 x float> %344, i32 0
  %346 = fadd float %321, %345
  %347 = fmul float 0xBFB81C7300000000, %73
  %348 = fadd float %347, %263
  %349 = fmul float 0xBFEDBD8D60000000, %73
  %350 = fadd float %349, %264
  %351 = bitcast float %265 to i32
  %352 = bitcast float %348 to i32
  %353 = bitcast float %350 to i32
  %354 = insertelement <4 x i32> undef, i32 %351, i32 0
  %355 = insertelement <4 x i32> %354, i32 %352, i32 1
  %356 = insertelement <4 x i32> %355, i32 %353, i32 2
  %357 = insertelement <4 x i32> %356, i32 undef, i32 3
  %358 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %357, <32 x i8> %77, <16 x i8> %79, i32 7)
  %359 = extractelement <4 x float> %358, i32 0
  %360 = fadd float %346, %359
  %361 = fmul float 0x3FD613D080000000, %73
  %362 = fadd float %361, %263
  %363 = fmul float 0x3FD2CEE400000000, %73
  %364 = fadd float %363, %264
  %365 = bitcast float %265 to i32
  %366 = bitcast float %362 to i32
  %367 = bitcast float %364 to i32
  %368 = insertelement <4 x i32> undef, i32 %365, i32 0
  %369 = insertelement <4 x i32> %368, i32 %366, i32 1
  %370 = insertelement <4 x i32> %369, i32 %367, i32 2
  %371 = insertelement <4 x i32> %370, i32 undef, i32 3
  %372 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %371, <32 x i8> %77, <16 x i8> %79, i32 7)
  %373 = extractelement <4 x float> %372, i32 0
  %374 = fadd float %360, %373
  %375 = fmul float 0xBFED4EEFC0000000, %73
  %376 = fadd float %375, %263
  %377 = fmul float 0x3FDD4B3100000000, %73
  %378 = fadd float %377, %264
  %379 = bitcast float %265 to i32
  %380 = bitcast float %376 to i32
  %381 = bitcast float %378 to i32
  %382 = insertelement <4 x i32> undef, i32 %379, i32 0
  %383 = insertelement <4 x i32> %382, i32 %380, i32 1
  %384 = insertelement <4 x i32> %383, i32 %381, i32 2
  %385 = insertelement <4 x i32> %384, i32 undef, i32 3
  %386 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %385, <32 x i8> %77, <16 x i8> %79, i32 7)
  %387 = extractelement <4 x float> %386, i32 0
  %388 = fadd float %374, %387
  %389 = fmul float 0xBFD87F6480000000, %73
  %390 = fadd float %389, %263
  %391 = fmul float 0x3FD1B69300000000, %73
  %392 = fadd float %391, %264
  %393 = bitcast float %265 to i32
  %394 = bitcast float %390 to i32
  %395 = bitcast float %392 to i32
  %396 = insertelement <4 x i32> undef, i32 %393, i32 0
  %397 = insertelement <4 x i32> %396, i32 %394, i32 1
  %398 = insertelement <4 x i32> %397, i32 %395, i32 2
  %399 = insertelement <4 x i32> %398, i32 undef, i32 3
  %400 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %399, <32 x i8> %77, <16 x i8> %79, i32 7)
  %401 = extractelement <4 x float> %400, i32 0
  %402 = fadd float %388, %401
  %403 = fmul float 0x3FDC5DEF00000000, %73
  %404 = fadd float %403, %263
  %405 = fmul float 0xBFEF342580000000, %73
  %406 = fadd float %405, %264
  %407 = bitcast float %265 to i32
  %408 = bitcast float %404 to i32
  %409 = bitcast float %406 to i32
  %410 = insertelement <4 x i32> undef, i32 %407, i32 0
  %411 = insertelement <4 x i32> %410, i32 %408, i32 1
  %412 = insertelement <4 x i32> %411, i32 %409, i32 2
  %413 = insertelement <4 x i32> %412, i32 undef, i32 3
  %414 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %413, <32 x i8> %77, <16 x i8> %79, i32 7)
  %415 = extractelement <4 x float> %414, i32 0
  %416 = fadd float %402, %415
  %417 = fmul float 0x3FE132A000000000, %73
  %418 = fadd float %417, %263
  %419 = fmul float 0xBFDE51A940000000, %73
  %420 = fadd float %419, %264
  %421 = bitcast float %265 to i32
  %422 = bitcast float %418 to i32
  %423 = bitcast float %420 to i32
  %424 = insertelement <4 x i32> undef, i32 %421, i32 0
  %425 = insertelement <4 x i32> %424, i32 %422, i32 1
  %426 = insertelement <4 x i32> %425, i32 %423, i32 2
  %427 = insertelement <4 x i32> %426, i32 undef, i32 3
  %428 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %427, <32 x i8> %77, <16 x i8> %79, i32 7)
  %429 = extractelement <4 x float> %428, i32 0
  %430 = fadd float %416, %429
  %431 = fmul float 0xBFD0F54100000000, %73
  %432 = fadd float %431, %263
  %433 = fmul float 0xBFDACFC0C0000000, %73
  %434 = fadd float %433, %264
  %435 = bitcast float %265 to i32
  %436 = bitcast float %432 to i32
  %437 = bitcast float %434 to i32
  %438 = insertelement <4 x i32> undef, i32 %435, i32 0
  %439 = insertelement <4 x i32> %438, i32 %436, i32 1
  %440 = insertelement <4 x i32> %439, i32 %437, i32 2
  %441 = insertelement <4 x i32> %440, i32 undef, i32 3
  %442 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %441, <32 x i8> %77, <16 x i8> %79, i32 7)
  %443 = extractelement <4 x float> %442, i32 0
  %444 = fadd float %430, %443
  %445 = fmul float 0x3FE957DC40000000, %73
  %446 = fadd float %445, %263
  %447 = fmul float 0x3FC86F7900000000, %73
  %448 = fadd float %447, %264
  %449 = bitcast float %265 to i32
  %450 = bitcast float %446 to i32
  %451 = bitcast float %448 to i32
  %452 = insertelement <4 x i32> undef, i32 %449, i32 0
  %453 = insertelement <4 x i32> %452, i32 %450, i32 1
  %454 = insertelement <4 x i32> %453, i32 %451, i32 2
  %455 = insertelement <4 x i32> %454, i32 undef, i32 3
  %456 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %455, <32 x i8> %77, <16 x i8> %79, i32 7)
  %457 = extractelement <4 x float> %456, i32 0
  %458 = fadd float %444, %457
  %459 = fmul float 0xBFCEF63300000000, %73
  %460 = fadd float %459, %263
  %461 = fmul float 0x3FEFE7F500000000, %73
  %462 = fadd float %461, %264
  %463 = bitcast float %265 to i32
  %464 = bitcast float %460 to i32
  %465 = bitcast float %462 to i32
  %466 = insertelement <4 x i32> undef, i32 %463, i32 0
  %467 = insertelement <4 x i32> %466, i32 %464, i32 1
  %468 = insertelement <4 x i32> %467, i32 %465, i32 2
  %469 = insertelement <4 x i32> %468, i32 undef, i32 3
  %470 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %469, <32 x i8> %77, <16 x i8> %79, i32 7)
  %471 = extractelement <4 x float> %470, i32 0
  %472 = fadd float %458, %471
  %473 = fmul float 0x3FC9946600000000, %73
  %474 = fadd float %473, %263
  %475 = fmul float 0x3FE92A4D00000000, %73
  %476 = fadd float %475, %264
  %477 = bitcast float %265 to i32
  %478 = bitcast float %474 to i32
  %479 = bitcast float %476 to i32
  %480 = insertelement <4 x i32> undef, i32 %477, i32 0
  %481 = insertelement <4 x i32> %480, i32 %478, i32 1
  %482 = insertelement <4 x i32> %481, i32 %479, i32 2
  %483 = insertelement <4 x i32> %482, i32 undef, i32 3
  %484 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %483, <32 x i8> %77, <16 x i8> %79, i32 7)
  %485 = extractelement <4 x float> %484, i32 0
  %486 = fadd float %472, %485
  %487 = fmul float 0x3FC2691300000000, %73
  %488 = fadd float %487, %263
  %489 = fmul float 0xBFC20C8C00000000, %73
  %490 = fadd float %489, %264
  %491 = bitcast float %265 to i32
  %492 = bitcast float %488 to i32
  %493 = bitcast float %490 to i32
  %494 = insertelement <4 x i32> undef, i32 %491, i32 0
  %495 = insertelement <4 x i32> %494, i32 %492, i32 1
  %496 = insertelement <4 x i32> %495, i32 %493, i32 2
  %497 = insertelement <4 x i32> %496, i32 undef, i32 3
  %498 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %497, <32 x i8> %77, <16 x i8> %79, i32 7)
  %499 = extractelement <4 x float> %498, i32 0
  %500 = fadd float %486, %499
  %501 = fmul float %500, 6.250000e-02
  br label %ENDIF40

ELSE42:                                           ; preds = %main_body
  %502 = fmul float %321, 2.500000e-01
  br label %ENDIF40

ENDIF40:                                          ; preds = %ELSE42, %IF41
  %temp28.0 = phi float [ %501, %IF41 ], [ %502, %ELSE42 ]
  %503 = fmul float %247, %temp28.0
  %504 = fadd float %temp28.0, %72
  %505 = fsub float -0.000000e+00, %189
  %506 = call float @llvm.AMDIL.clamp.(float %505, float 0.000000e+00, float 1.000000e+00)
  %507 = fadd float %504, %506
  %508 = call float @llvm.AMDIL.clamp.(float %507, float 0.000000e+00, float 1.000000e+00)
  %509 = fmul float %., %508
  %510 = fadd float %509, %181
  %511 = fmul float %.43, %508
  %512 = fadd float %511, %182
  %513 = fmul float %.44, %508
  %514 = fadd float %513, %183
  %515 = call i32 @llvm.SI.packf16(float %510, float %512)
  %516 = bitcast i32 %515 to float
  %517 = call i32 @llvm.SI.packf16(float %514, float %503)
  %518 = bitcast i32 %517 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %516, float %518, float %516, float %518)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840100
bf8c007f
c200095c
c200895d
bf8c007f
7e000201
d2820008
04000103
c2000911
c2008915
bf8c007f
7e000201
d282000a
04000108
c2000910
c2008914
bf8c007f
7e000201
d2820009
04000102
c086030c
c0c80518
bf8c007f
f0800f00
00640c09
bf8c0770
06001b0d
062200f3
06001d0e
062400f3
10002512
d2820000
04022311
080000f2
d2060800
02010100
7e025b00
10020101
d2060000
22010100
d0080000
02020080
d2000000
00020280
06021f0f
060202f3
10260300
c200092d
bf8c007f
10002200
c2000931
bf8c007f
d2820000
04022400
c2000935
bf8c007f
d2820004
04022600
c200092c
bf8c007f
10002200
c2000930
bf8c007f
d2820000
04022400
c2000934
bf8c007f
d2820003
04022600
c200092e
bf8c007f
10002200
c2000932
bf8c007f
d2820000
04022400
c2000936
bf8c007f
d2820005
04022600
c200092f
bf8c007f
10002200
c2000933
bf8c007f
d2820006
04022400
d28a0015
04160903
d28c0014
04160903
d28e0016
04160903
d2880017
04160903
d2060100
02010116
7e005500
7e0202ff
3fc00000
d2820016
04060114
d2820015
04060115
c0860308
c0c80510
bf8c007f
f0800700
00640315
c200093c
bf8c0070
10002200
c200093d
bf8c007f
d2820000
04000112
c200093e
bf8c007f
d2820000
04000113
c2000947
bf8c007f
06000000
d2060814
02010100
080e28f2
c200094a
bf8c007f
10020e00
c2008942
bf8c007f
d2820001
04040314
d2060006
22010100
d206080b
02010106
082016f2
100c2000
c2000946
bf8c007f
d2820006
0418010b
d0020000
02010100
d2000015
0001e480
d2060015
22010115
7e2a1115
d10a0000
02010115
d2000001
00020d01
c2030949
bf8c007f
100c0e06
c2038941
bf8c007f
d2820006
04180f14
102a2006
c2030945
bf8c007f
d2820015
04540d0b
d2000006
00022b06
c2030948
bf8c007f
100e0e06
c2038940
bf8c007f
d2820007
041c0f14
10202006
c2030944
bf8c007f
d282000b
04400d0b
d2000007
00021707
c0860304
c0c80508
bf8c007f
f0800100
00640909
c200091a
c200891b
bf8c0070
7e140201
d2820009
04280109
7e2a5509
c2000913
c2008917
bf8c007f
7e120201
d2820008
04240108
102c2b08
c2000912
c2008916
bf8c007f
7e100201
d2820002
04200102
102e2b02
c200091c
bf8c007f
10042e00
c2000920
bf8c007f
d2820002
040a2c00
c2000924
bf8c007f
d2820002
040a2a00
c2000928
bf8c007f
06040400
c2030959
7e1002ff
bf50c0d4
bf8c007f
d2820009
040a1006
c200091e
bf8c007f
10202e00
c2000922
bf8c007f
d2820010
04422c00
c2000926
bf8c007f
d2820010
04422a00
c200092a
bf8c007f
06102000
c200091d
bf8c007f
10202e00
c2000921
bf8c007f
d2820010
04422c00
c2000925
bf8c007f
d2820010
04422a00
c2000929
bf8c007f
06202000
7e3002ff
bf610e50
d282000a
04423006
c0800300
c0c60500
bf8c007f
f0a00100
00031808
7e3202ff
3f798f60
d282001d
040a3206
7e320308
7e340309
7e36030a
7e38030b
7e34031d
7e3a02ff
3f41a8ec
d282001b
04423a06
f0a00100
00031919
bf8c0770
06303119
7e3202ff
3f7211ee
d282001d
040a3206
7e320308
7e340309
7e36030a
7e38030b
7e34031d
7e3a02ff
bf44d71b
d282001b
04423a06
f0a00100
00031919
bf8c0770
06303318
7e3202ff
bf5068d4
d282001d
040a3206
7e320308
7e340309
7e36030a
7e38030b
7e34031d
7e3a02ff
3f6a148a
d282001b
04423a06
f0a00100
00031919
bf8c0770
06303318
083230f6
10323318
10282919
d00a0004
02010114
d2000014
0011e480
d2060014
22010114
7e281114
d1040004
02010114
10282f17
d2820014
04522d16
d2820014
04522b15
7e325b14
10283316
c2038951
bf8c007f
08282807
102c3317
c2038950
bf8c007f
082c2c07
102e2d16
d2820017
045e2914
102a3315
c2038952
bf8c007f
082a2a07
d2820017
045e2b15
7e2e5b17
10282f14
102c2f16
10222316
d2820011
04462514
10242f15
d2820011
04462712
d2060811
02010111
7e224f11
101818ff
44800000
0e18230c
7e184b0c
7e1a02ff
3c23d70a
d00c0014
02021b00
d200000d
0051e480
d206000d
2201010d
7e1a110d
361a1af2
10181b0c
c203894f
bf8c007f
10181807
c2038958
bf8c007f
be842404
8984047e
101c30ff
3e800000
be842504
7e1a0207
89fe047e
bf880093
7e1c0206
7e1e02ff
becc51e0
d282000a
04421f0e
7e1e02ff
bf7127fa
d2820009
040a1f0e
f0a00100
00030f08
bf8c0770
061e1f18
7e2202ff
bf6dec6b
d282000a
0442230e
7e2202ff
bdc0e398
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
3e967720
d282000a
0442230e
7e2202ff
3eb09e84
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
3eea5988
d282000a
0442230e
7e2202ff
bf6a777e
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
3e8db498
d282000a
0442230e
7e2202ff
bec3fb24
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
bf79a12c
d282000a
0442230e
7e2202ff
3ee2ef78
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
bef28d4a
d282000a
0442230e
7e2202ff
3f099500
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
bed67e06
d282000a
0442230e
7e2202ff
be87aa08
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
3e437bc8
d282000a
0442230e
7e2202ff
3f4abee2
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
3f7f3fa8
d282000a
0442230e
7e2202ff
be77b198
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
3f495268
d282000a
0442230e
7e2202ff
3e4ca330
d2820009
040a230e
f0a00100
00031108
bf8c0770
061e230f
7e2202ff
be106460
d282000a
0442230e
7e2002ff
3e134898
d2820009
040a210e
f0a00100
00030208
bf8c0770
0604050f
101c04ff
3d800000
88fe047e
06041b0e
d2060000
22010100
d2060800
02010100
06000102
d2060800
02010100
d2820001
04160101
10041d0c
5e020501
d2820002
04120106
d2820000
040e0107
5e000500
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL TEMP[0], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MOV OUT[0], TEMP[0]
  3: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800700
bf8c007f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[8..9]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy
  2: DP4 TEMP[1].x, IMM[0].xxxx, CONST[6]
  3: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[4].yyyy
  4: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].xxxx
  5: MOV TEMP[2].w, TEMP[1].wwww
  6: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[1].xyyy
  7: MOV TEMP[3].xy, TEMP[3].xyyy
  8: TEX TEMP[3], TEMP[3], SAMP[0], 2D
  9: DP4 TEMP[4].x, IMM[0].xxxx, CONST[5]
 10: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[4].xxxx
 11: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 12: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[0].yzww
 13: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx
 14: RCP TEMP[5].x, TEMP[5].xxxx
 15: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 17: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[1].xyzz
 18: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 19: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 20: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 21: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[2].xyzz
 22: MOV OUT[0], TEMP[2]
 23: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %49 = fmul float %13, %38
  %50 = fadd float %49, %39
  %51 = fmul float 1.000000e+00, %34
  %52 = fmul float 1.000000e+00, %35
  %53 = fadd float %51, %52
  %54 = fmul float 1.000000e+00, %36
  %55 = fadd float %53, %54
  %56 = fmul float 1.000000e+00, %37
  %57 = fadd float %55, %56
  %58 = fadd float %57, %29
  %59 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %60 = call float @llvm.AMDGPU.lrp(float %59, float %44, float 1.000000e+00)
  %61 = call float @llvm.AMDGPU.lrp(float %59, float %45, float 1.000000e+00)
  %62 = call float @llvm.AMDGPU.lrp(float %59, float %46, float 1.000000e+00)
  %63 = call float @llvm.AMDGPU.lrp(float %59, float %47, float 1.000000e+00)
  %64 = fmul float %12, %22
  %65 = fmul float %50, %23
  %66 = bitcast float %64 to i32
  %67 = bitcast float %65 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %41, <16 x i8> %43, i32 2)
  %71 = extractelement <4 x float> %70, i32 0
  %72 = extractelement <4 x float> %70, i32 1
  %73 = extractelement <4 x float> %70, i32 2
  %74 = extractelement <4 x float> %70, i32 3
  %75 = fmul float 1.000000e+00, %30
  %76 = fmul float 1.000000e+00, %31
  %77 = fadd float %75, %76
  %78 = fmul float 1.000000e+00, %32
  %79 = fadd float %77, %78
  %80 = fmul float 1.000000e+00, %33
  %81 = fadd float %79, %80
  %82 = fadd float %81, %28
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = fmul float %83, %74
  %85 = fmul float %71, 0x3FCB367A00000000
  %86 = fmul float %72, 0x3FE6E2EB20000000
  %87 = fadd float %86, %85
  %88 = fmul float %73, 0x3FB27BB300000000
  %89 = fadd float %87, %88
  %90 = fcmp uge float %89, 0x3F50624DE0000000
  %91 = select i1 %90, float %89, float 0x3F50624DE0000000
  %92 = fdiv float 1.000000e+00, %91
  %93 = fmul float %71, %92
  %94 = fmul float %72, %92
  %95 = fmul float %73, %92
  %96 = fmul float %60, %71
  %97 = fmul float %61, %72
  %98 = fmul float %62, %73
  %99 = fmul float %84, %93
  %100 = fadd float %99, %96
  %101 = fmul float %84, %94
  %102 = fadd float %101, %97
  %103 = fmul float %84, %95
  %104 = fadd float %103, %98
  %105 = fmul float %100, 4.000000e+00
  %106 = fmul float %102, 4.000000e+00
  %107 = fmul float %104, 4.000000e+00
  %108 = fcmp uge float %48, %27
  %109 = select i1 %108, float %48, float %27
  %110 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %110, float %105, float %24)
  %112 = call float @llvm.AMDGPU.lrp(float %110, float %106, float %25)
  %113 = call float @llvm.AMDGPU.lrp(float %110, float %107, float %26)
  %114 = call i32 @llvm.SI.packf16(float %111, float %112)
  %115 = bitcast i32 %114 to float
  %116 = call i32 @llvm.SI.packf16(float %113, float %63)
  %117 = bitcast i32 %116 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
c0840100
bf8c007f
c2000924
c2008925
bf8c007f
7e080201
d2820003
04100103
c2000905
bf8c007f
10080600
c2000904
bf8c007f
10060400
c0800300
c0c60500
bf8c007f
f0800f00
00030203
bf8c0770
100c04ff
3e59b3d0
7e0e02ff
3f371759
d2820006
041a0f03
7e0e02ff
3d93dd98
d2820006
041a0f04
7e0e02ff
3a83126f
d00c0000
02020f06
d2000006
00020d07
7e0c5506
10140d03
befc0306
c8240100
c8250101
c2000918
c2008919
bf8c007f
7e0e0201
d2060007
02020e00
c200091a
bf8c007f
060e0e00
c200091b
bf8c007f
060e0e00
c2000911
bf8c007f
060e0e00
d2060807
02010107
08100ef2
d2820009
04221307
10160709
c2000914
c2008915
bf8c007f
7e120201
d2060009
02021200
c2000916
bf8c007f
06121200
c2000917
bf8c007f
06121200
c2000910
bf8c007f
06121200
d2060809
02010109
10120b09
d282000a
042e1509
101814f6
c8280700
c8290701
c200090b
bf8c007f
d00c0002
0200010a
7e160200
d200000a
000a150b
d206080a
0201010a
081614f2
c2000909
bf8c007f
101a1600
d282000c
0436190a
101a0d02
c8380000
c8390001
d282000e
04221d07
101c050e
d282000d
043a1b09
101a1af6
c2000908
bf8c007f
101c1600
d282000d
043a1b0a
5e18190d
100c0d04
c8340200
c8350201
d282000d
04221b07
1004090d
d2820002
040a0d09
100404f6
c200090a
bf8c007f
10061600
d2820002
040e050a
c80c0300
c80d0301
d2820000
04220707
5e000102
f8001c0f
000c000c
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..12]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[12].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[11].xxxx, CONST[11].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MOV OUT[1], TEMP[2]
 15: MOV OUT[2], TEMP[0]
 16: MOV OUT[0], TEMP[1]
 17: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = fmul float %56, %45
  %60 = fadd float %59, %42
  %61 = fmul float %57, %46
  %62 = fadd float %61, %43
  %63 = fmul float %58, %47
  %64 = fadd float %63, %44
  %65 = fmul float %23, %60
  %66 = fmul float %24, %60
  %67 = fmul float %25, %60
  %68 = fmul float %26, %60
  %69 = fmul float %27, %62
  %70 = fadd float %69, %65
  %71 = fmul float %28, %62
  %72 = fadd float %71, %66
  %73 = fmul float %29, %62
  %74 = fadd float %73, %67
  %75 = fmul float %30, %62
  %76 = fadd float %75, %68
  %77 = fmul float %31, %64
  %78 = fadd float %77, %70
  %79 = fmul float %32, %64
  %80 = fadd float %79, %72
  %81 = fmul float %33, %64
  %82 = fadd float %81, %74
  %83 = fmul float %34, %64
  %84 = fadd float %83, %76
  %85 = fadd float %78, %35
  %86 = fadd float %80, %36
  %87 = fadd float %82, %37
  %88 = fadd float %84, %38
  %89 = fmul float %11, %60
  %90 = fmul float %12, %60
  %91 = fmul float %13, %60
  %92 = fmul float %14, %62
  %93 = fadd float %92, %89
  %94 = fmul float %15, %62
  %95 = fadd float %94, %90
  %96 = fmul float %16, %62
  %97 = fadd float %96, %91
  %98 = fmul float %17, %64
  %99 = fadd float %98, %93
  %100 = fmul float %18, %64
  %101 = fadd float %100, %95
  %102 = fmul float %19, %64
  %103 = fadd float %102, %97
  %104 = fadd float %99, %20
  %105 = fadd float %101, %21
  %106 = fadd float %103, %22
  %107 = fsub float -0.000000e+00, %50
  %108 = fadd float %104, %107
  %109 = fsub float -0.000000e+00, %51
  %110 = fadd float %105, %109
  %111 = fsub float -0.000000e+00, %52
  %112 = fadd float %106, %111
  %113 = fmul float %87, %48
  %114 = fadd float %113, %49
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %108, float %110, float %112, float %114)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %85, float %86, float %87, float %88)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0820700
bf8c000f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020112
bf8c007f
100c0204
c2020116
bf8c007f
d2820006
041a0004
c202012a
c2028126
bf8c007f
7e0e0205
d2820002
041c0904
c202011a
bf8c007f
d2820003
041a0404
c202011e
bf8c007f
06060604
c202012c
c202812d
bf8c007f
7e080205
d2820004
04100903
c2020102
bf8c007f
100a0204
c2020106
bf8c007f
d2820005
04160004
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020132
bf8c007f
0a0a0a04
c2020101
bf8c007f
100c0204
c2020105
bf8c007f
d2820006
041a0004
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020131
bf8c007f
0a0c0c04
c2020100
bf8c007f
100e0204
c2020104
bf8c007f
d2820007
041e0004
c2020108
bf8c007f
d2820007
041e0404
c202010c
bf8c007f
060e0e04
c2020130
bf8c007f
0a0e0e04
f800021f
04050607
c2020113
bf8c000f
10080204
c2020117
bf8c007f
d2820004
04120004
c202011b
bf8c007f
d2820004
04120404
c202011f
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
04030500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MUL TEMP[0], IN[0], TEMP[0]
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %24, %35
  %40 = fmul float %25, %36
  %41 = fmul float %26, %37
  %42 = fmul float %27, %38
  %43 = fcmp ugt float %42, 0x3F80101020000000
  %44 = sext i1 %43 to i32
  %45 = trunc i32 %44 to i1
  %46 = select i1 %45, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %46)
  %47 = call i32 @llvm.SI.packf16(float %39, float %40)
  %48 = bitcast i32 %47 to float
  %49 = call i32 @llvm.SI.packf16(float %41, float %42)
  %50 = bitcast i32 %49 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kill(float)

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0b06
7e0e02ff
3c008081
d0080000
02020f06
d2000007
0001e4f3
7c260e80
c81c0200
c81d0201
100e0907
5e0c0d07
c81c0100
c81d0101
100e0707
c8200000
c8210001
10000508
5e000f00
f8001c0f
06000600
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..6]
DCL TEMP[0..1], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MAD TEMP[1], IN[1].xyxy, CONST[5], CONST[6]
  5: MOV OUT[2], TEMP[1]
  6: MOV OUT[1], CONST[4]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %39 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0
  %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %5)
  %42 = extractelement <4 x float> %41, i32 0
  %43 = extractelement <4 x float> %41, i32 1
  %44 = extractelement <4 x float> %41, i32 2
  %45 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %5)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = fmul float %11, %42
  %51 = fmul float %12, %42
  %52 = fmul float %13, %42
  %53 = fmul float %14, %42
  %54 = fmul float %15, %43
  %55 = fadd float %54, %50
  %56 = fmul float %16, %43
  %57 = fadd float %56, %51
  %58 = fmul float %17, %43
  %59 = fadd float %58, %52
  %60 = fmul float %18, %43
  %61 = fadd float %60, %53
  %62 = fmul float %19, %44
  %63 = fadd float %62, %55
  %64 = fmul float %20, %44
  %65 = fadd float %64, %57
  %66 = fmul float %21, %44
  %67 = fadd float %66, %59
  %68 = fmul float %22, %44
  %69 = fadd float %68, %61
  %70 = fadd float %63, %23
  %71 = fadd float %65, %24
  %72 = fadd float %67, %25
  %73 = fadd float %69, %26
  %74 = fmul float %48, %31
  %75 = fadd float %74, %35
  %76 = fmul float %49, %32
  %77 = fadd float %76, %36
  %78 = fmul float %48, %33
  %79 = fadd float %78, %37
  %80 = fmul float %49, %34
  %81 = fadd float %80, %38
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %77, float %79, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %71, float %72, float %73)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840704
bf8c000f
e00c2000
80020100
c2020117
c202811b
bf8c0070
7e0a0205
d2820005
04140902
c2020116
c202811a
bf8c007f
7e0c0205
d2820006
04180901
c2020115
c2028119
bf8c007f
7e0e0205
d2820007
041c0902
c2020114
c2028118
bf8c007f
7e100205
d2820001
04200901
f800021f
05060701
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].xy, IN[0].zwww
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: ADD TEMP[0], TEMP[0], TEMP[1]
  5: MOV TEMP[1].xy, IN[1].xyyy
  6: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  7: ADD TEMP[0], TEMP[0], TEMP[1]
  8: MOV TEMP[1].xy, IN[1].zwww
  9: TEX TEMP[1], TEMP[1], SAMP[0], 2D
 10: ADD TEMP[0], TEMP[0], TEMP[1]
 11: SLT TEMP[1].x, IMM[0].xxxx, TEMP[0].xxxx
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :0
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :0
 16:   MOV TEMP[1].x, IMM[0].xxxx
 17: ENDIF
 18: MOV TEMP[1].x, TEMP[1].xxxx
 19: SLT TEMP[2].x, IMM[0].xxxx, TEMP[0].yyyy
 20: F2I TEMP[2].x, -TEMP[2]
 21: UIF TEMP[2].xxxx :0
 22:   MOV TEMP[2].x, IMM[0].yyyy
 23: ELSE :0
 24:   MOV TEMP[2].x, IMM[0].xxxx
 25: ENDIF
 26: MOV TEMP[1].y, TEMP[2].xxxx
 27: SLT TEMP[2].x, IMM[0].xxxx, TEMP[0].zzzz
 28: F2I TEMP[2].x, -TEMP[2]
 29: UIF TEMP[2].xxxx :0
 30:   MOV TEMP[2].x, IMM[0].yyyy
 31: ELSE :0
 32:   MOV TEMP[2].x, IMM[0].xxxx
 33: ENDIF
 34: MOV TEMP[1].z, TEMP[2].xxxx
 35: SLT TEMP[0].x, IMM[0].xxxx, TEMP[0].wwww
 36: F2I TEMP[0].x, -TEMP[0]
 37: UIF TEMP[0].xxxx :0
 38:   MOV TEMP[0].x, IMM[0].yyyy
 39: ELSE :0
 40:   MOV TEMP[0].x, IMM[0].xxxx
 41: ENDIF
 42: MOV TEMP[1].w, TEMP[0].xxxx
 43: MOV TEMP[0].xy, IN[2].xyyy
 44: TEX TEMP[0], TEMP[0], SAMP[0], 2D
 45: ADD TEMP[0], TEMP[1], -TEMP[0]
 46: MOV OUT[0], TEMP[0]
 47: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %34 = bitcast float %24 to i32
  %35 = bitcast float %25 to i32
  %36 = insertelement <2 x i32> undef, i32 %34, i32 0
  %37 = insertelement <2 x i32> %36, i32 %35, i32 1
  %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %21, <16 x i8> %23, i32 2)
  %39 = extractelement <4 x float> %38, i32 0
  %40 = extractelement <4 x float> %38, i32 1
  %41 = extractelement <4 x float> %38, i32 2
  %42 = extractelement <4 x float> %38, i32 3
  %43 = bitcast float %26 to i32
  %44 = bitcast float %27 to i32
  %45 = insertelement <2 x i32> undef, i32 %43, i32 0
  %46 = insertelement <2 x i32> %45, i32 %44, i32 1
  %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %21, <16 x i8> %23, i32 2)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = extractelement <4 x float> %47, i32 2
  %51 = extractelement <4 x float> %47, i32 3
  %52 = fadd float %39, %48
  %53 = fadd float %40, %49
  %54 = fadd float %41, %50
  %55 = fadd float %42, %51
  %56 = bitcast float %28 to i32
  %57 = bitcast float %29 to i32
  %58 = insertelement <2 x i32> undef, i32 %56, i32 0
  %59 = insertelement <2 x i32> %58, i32 %57, i32 1
  %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %21, <16 x i8> %23, i32 2)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = extractelement <4 x float> %60, i32 3
  %65 = fadd float %52, %61
  %66 = fadd float %53, %62
  %67 = fadd float %54, %63
  %68 = fadd float %55, %64
  %69 = bitcast float %30 to i32
  %70 = bitcast float %31 to i32
  %71 = insertelement <2 x i32> undef, i32 %69, i32 0
  %72 = insertelement <2 x i32> %71, i32 %70, i32 1
  %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %21, <16 x i8> %23, i32 2)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = extractelement <4 x float> %73, i32 2
  %77 = extractelement <4 x float> %73, i32 3
  %78 = fadd float %65, %74
  %79 = fadd float %66, %75
  %80 = fadd float %67, %76
  %81 = fadd float %68, %77
  %82 = fcmp ult float 0.000000e+00, %78
  %83 = select i1 %82, float 1.000000e+00, float 0.000000e+00
  %84 = fsub float -0.000000e+00, %83
  %85 = fptosi float %84 to i32
  %86 = bitcast i32 %85 to float
  %87 = bitcast float %86 to i32
  %88 = icmp ne i32 %87, 0
  %. = select i1 %88, float 1.000000e+00, float 0.000000e+00
  %89 = fcmp ult float 0.000000e+00, %79
  %90 = select i1 %89, float 1.000000e+00, float 0.000000e+00
  %91 = fsub float -0.000000e+00, %90
  %92 = fptosi float %91 to i32
  %93 = bitcast i32 %92 to float
  %94 = bitcast float %93 to i32
  %95 = icmp ne i32 %94, 0
  %temp8.0 = select i1 %95, float 1.000000e+00, float 0.000000e+00
  %96 = fcmp ult float 0.000000e+00, %80
  %97 = select i1 %96, float 1.000000e+00, float 0.000000e+00
  %98 = fsub float -0.000000e+00, %97
  %99 = fptosi float %98 to i32
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %100 to i32
  %102 = icmp ne i32 %101, 0
  %.21 = select i1 %102, float 1.000000e+00, float 0.000000e+00
  %103 = fcmp ult float 0.000000e+00, %81
  %104 = select i1 %103, float 1.000000e+00, float 0.000000e+00
  %105 = fsub float -0.000000e+00, %104
  %106 = fptosi float %105 to i32
  %107 = bitcast i32 %106 to float
  %108 = bitcast float %107 to i32
  %109 = icmp ne i32 %108, 0
  %temp.0 = select i1 %109, float 1.000000e+00, float 0.000000e+00
  %110 = bitcast float %32 to i32
  %111 = bitcast float %33 to i32
  %112 = insertelement <2 x i32> undef, i32 %110, i32 0
  %113 = insertelement <2 x i32> %112, i32 %111, i32 1
  %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %21, <16 x i8> %23, i32 2)
  %115 = extractelement <4 x float> %114, i32 0
  %116 = extractelement <4 x float> %114, i32 1
  %117 = extractelement <4 x float> %114, i32 2
  %118 = extractelement <4 x float> %114, i32 3
  %119 = fsub float -0.000000e+00, %115
  %120 = fadd float %., %119
  %121 = fsub float -0.000000e+00, %116
  %122 = fadd float %temp8.0, %121
  %123 = fsub float -0.000000e+00, %117
  %124 = fadd float %.21, %123
  %125 = fsub float -0.000000e+00, %118
  %126 = fadd float %temp.0, %125
  %127 = call i32 @llvm.SI.packf16(float %120, float %122)
  %128 = bitcast i32 %127 to float
  %129 = call i32 @llvm.SI.packf16(float %124, float %126)
  %130 = bitcast i32 %129 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %128, float %130, float %128, float %130)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0300
c80d0301
c8080200
c8090201
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c81c0100
c81d0101
c8180000
c8190001
f0800f00
00020606
bf8c0770
061c0b09
c82c0500
c82d0501
c8280400
c8290401
f0800f00
00020a0a
bf8c0770
06241b0e
c83c0700
c83d0701
c8380600
c8390601
f0800f00
00020e0e
bf8c0770
06242312
d0080004
02010112
d2000012
0011e480
d2060012
22010112
7e241112
d10a0004
02010112
d2000016
0011e480
c84c0900
c84d0901
c8480800
c8490801
f0800f00
00021212
bf8c0770
08002b16
06020908
06021901
06022101
d0080000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
d2000001
0001e480
08022901
5e000101
06020707
06021701
06021f01
d0080000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
d2000001
0001e480
08022701
06040506
06041502
06041d02
d0080000
02010102
d2000002
0001e480
d2060002
22010102
7e041102
d10a0000
02010102
d2000002
0001e480
08042502
5e020302
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: ADD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy
  3: ADD TEMP[2].xy, IN[1].xyyy, -CONST[0].xyyy
  4: MOV TEMP[1].zw, TEMP[2].yyxy
  5: ADD TEMP[2].xy, IN[1].xyyy, CONST[0].zwww
  6: ADD TEMP[3].xy, IN[1].xyyy, -CONST[0].zwww
  7: MOV TEMP[2].zw, TEMP[3].yyxy
  8: MOV TEMP[3].xy, IN[1].xyxx
  9: MOV OUT[1], TEMP[1]
 10: MOV OUT[2], TEMP[2]
 11: MOV OUT[3], TEMP[3]
 12: MOV OUT[0], TEMP[0]
 13: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0
  %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %5)
  %18 = extractelement <4 x float> %17, i32 0
  %19 = extractelement <4 x float> %17, i32 1
  %20 = extractelement <4 x float> %17, i32 2
  %21 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
  %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %5)
  %24 = extractelement <4 x float> %23, i32 0
  %25 = extractelement <4 x float> %23, i32 1
  %26 = fadd float %24, %11
  %27 = fadd float %25, %12
  %28 = fsub float -0.000000e+00, %11
  %29 = fadd float %24, %28
  %30 = fsub float -0.000000e+00, %12
  %31 = fadd float %25, %30
  %32 = fadd float %24, %13
  %33 = fadd float %25, %14
  %34 = fsub float -0.000000e+00, %13
  %35 = fadd float %24, %34
  %36 = fsub float -0.000000e+00, %14
  %37 = fadd float %25, %36
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float %29, float %31)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %32, float %33, float %35, float %37)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %24, float %25, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020101
bf8c007f
0a0a0404
c2028100
bf8c007f
0a0c0205
060e0404
06100205
f800020f
05060708
c2020103
bf8c000f
0a0a0404
c2000102
bf8c007f
0a0c0200
060e0404
06100200
f800021f
05060708
bf8c070f
7e0a0280
f800022f
05050201
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    0.2500,     0.0000,     2.0000,     1.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].xy, IN[0].zwww
  3: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
  4: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz
  5: MOV TEMP[1].xy, IN[1].xyyy
  6: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
  7: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz
  8: MOV TEMP[1].xy, IN[1].zwww
  9: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 10: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].xyzz
 11: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[0].xxxx, -CONST[1].xxxx
 12: MAX TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy
 13: MUL TEMP[0].xyz, TEMP[1].xyzz, IMM[0].zzzz
 14: MOV TEMP[1].w, IMM[0].wwww
 15: MOV TEMP[1].xyz, TEMP[0].xyzx
 16: MOV OUT[0], TEMP[1]
 17: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %35 = bitcast float %27 to i32
  %36 = bitcast float %28 to i32
  %37 = insertelement <2 x i32> undef, i32 %35, i32 0
  %38 = insertelement <2 x i32> %37, i32 %36, i32 1
  %39 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %24, <16 x i8> %26, i32 2)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = extractelement <4 x float> %39, i32 2
  %43 = bitcast float %29 to i32
  %44 = bitcast float %30 to i32
  %45 = insertelement <2 x i32> undef, i32 %43, i32 0
  %46 = insertelement <2 x i32> %45, i32 %44, i32 1
  %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %24, <16 x i8> %26, i32 2)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = extractelement <4 x float> %47, i32 2
  %51 = fadd float %40, %48
  %52 = fadd float %41, %49
  %53 = fadd float %42, %50
  %54 = bitcast float %31 to i32
  %55 = bitcast float %32 to i32
  %56 = insertelement <2 x i32> undef, i32 %54, i32 0
  %57 = insertelement <2 x i32> %56, i32 %55, i32 1
  %58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %24, <16 x i8> %26, i32 2)
  %59 = extractelement <4 x float> %58, i32 0
  %60 = extractelement <4 x float> %58, i32 1
  %61 = extractelement <4 x float> %58, i32 2
  %62 = fadd float %51, %59
  %63 = fadd float %52, %60
  %64 = fadd float %53, %61
  %65 = bitcast float %33 to i32
  %66 = bitcast float %34 to i32
  %67 = insertelement <2 x i32> undef, i32 %65, i32 0
  %68 = insertelement <2 x i32> %67, i32 %66, i32 1
  %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %24, <16 x i8> %26, i32 2)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = fadd float %62, %70
  %74 = fadd float %63, %71
  %75 = fadd float %64, %72
  %76 = fsub float -0.000000e+00, %22
  %77 = fmul float %73, 2.500000e-01
  %78 = fadd float %77, %76
  %79 = fsub float -0.000000e+00, %22
  %80 = fmul float %74, 2.500000e-01
  %81 = fadd float %80, %79
  %82 = fsub float -0.000000e+00, %22
  %83 = fmul float %75, 2.500000e-01
  %84 = fadd float %83, %82
  %85 = fcmp uge float %78, 0.000000e+00
  %86 = select i1 %85, float %78, float 0.000000e+00
  %87 = fcmp uge float %81, 0.000000e+00
  %88 = select i1 %87, float %81, float 0.000000e+00
  %89 = fcmp uge float %84, 0.000000e+00
  %90 = select i1 %89, float %84, float 0.000000e+00
  %91 = fmul float %86, 2.000000e+00
  %92 = fmul float %88, 2.000000e+00
  %93 = fmul float %90, 2.000000e+00
  %94 = call i32 @llvm.SI.packf16(float %91, float %92)
  %95 = bitcast i32 %94 to float
  %96 = call i32 @llvm.SI.packf16(float %93, float 1.000000e+00)
  %97 = bitcast i32 %96 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0300
c80d0301
c8080200
c8090201
c0840300
c0c60500
bf8c007f
f0800700
00430202
c8180100
c8190101
c8140000
c8150001
f0800700
00430505
bf8c0770
06160706
c8240500
c8250501
c8200400
c8210401
f0800700
00430808
bf8c0770
061c130b
c8300700
c8310701
c82c0600
c82d0601
f0800700
00430b0b
bf8c0770
0600190e
100000ff
3e800000
c0800100
bf8c007f
c2000104
bf8c007f
0a000000
d00c0002
02010100
d2000000
000a0080
06000100
06020505
06021101
06021701
100202ff
3e800000
0a020200
d00c0002
02010101
d2000001
000a0280
06020301
5e000101
06020907
06021501
06021b01
100202ff
3e800000
0a020200
d00c0000
02010101
d2000001
00020280
06020301
d25e0001
0201e501
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..1]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  3: ADD TEMP[2].xy, TEMP[1].xyyy, CONST[1].xyyy
  4: ADD TEMP[3].xy, TEMP[1].xyyy, -CONST[1].xyyy
  5: MOV TEMP[2].zw, TEMP[3].yyxy
  6: ADD TEMP[3].xy, TEMP[1].xyyy, CONST[1].zwww
  7: ADD TEMP[1].xy, TEMP[1].xyyy, -CONST[1].zwww
  8: MOV TEMP[3].zw, TEMP[1].yyxy
  9: MOV OUT[1], TEMP[2]
 10: MOV OUT[2], TEMP[3]
 11: MOV OUT[0], TEMP[0]
 12: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0
  %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %5)
  %22 = extractelement <4 x float> %21, i32 0
  %23 = extractelement <4 x float> %21, i32 1
  %24 = extractelement <4 x float> %21, i32 2
  %25 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %5)
  %28 = extractelement <4 x float> %27, i32 0
  %29 = extractelement <4 x float> %27, i32 1
  %30 = fmul float %28, %11
  %31 = fadd float %30, %13
  %32 = fmul float %29, %12
  %33 = fadd float %32, %14
  %34 = fadd float %31, %15
  %35 = fadd float %33, %16
  %36 = fsub float -0.000000e+00, %15
  %37 = fadd float %31, %36
  %38 = fsub float -0.000000e+00, %16
  %39 = fadd float %33, %38
  %40 = fadd float %31, %17
  %41 = fadd float %33, %18
  %42 = fsub float -0.000000e+00, %17
  %43 = fadd float %31, %42
  %44 = fsub float -0.000000e+00, %18
  %45 = fadd float %33, %44
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %37, float %39)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float %43, float %45)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %22, float %23, float %24, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020200
c0800100
bf8c0070
c2020101
c2028103
bf8c007f
7e020205
d2820001
04040903
c2020105
bf8c007f
0a0c0204
c2028100
c2040102
bf8c007f
7e0e0208
d2820002
041c0b02
c2028104
bf8c007f
0a060405
06080204
060a0405
f800020f
06030405
c2020107
bf8c000f
0a060204
c2000106
bf8c007f
0a080400
06020204
06040400
f800021f
03040102
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL IN[3], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..2]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MIN TEMP[0].xy, IN[0].xyyy, CONST[1].xyyy
  1: MOV TEMP[0].xy, TEMP[0].xyyy
  2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
  3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[2].xxxx
  4: MIN TEMP[1].xy, IN[0].zwww, CONST[1].xyyy
  5: MOV TEMP[1].xy, TEMP[1].xyyy
  6: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
  7: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[2].yyyy, TEMP[0].xyzz
  8: MIN TEMP[1].xy, IN[1].xyyy, CONST[1].xyyy
  9: MOV TEMP[1].xy, TEMP[1].xyyy
 10: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 11: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[2].zzzz, TEMP[0].xyzz
 12: MIN TEMP[1].xy, IN[1].zwww, CONST[1].xyyy
 13: MOV TEMP[1].xy, TEMP[1].xyyy
 14: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 15: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[2].wwww, TEMP[0].xyzz
 16: MIN TEMP[1].xy, IN[2].xyyy, CONST[1].xyyy
 17: MOV TEMP[1].xy, TEMP[1].xyyy
 18: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 19: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[2].zzzz, TEMP[0].xyzz
 20: MIN TEMP[1].xy, IN[2].zwww, CONST[1].xyyy
 21: MOV TEMP[1].xy, TEMP[1].xyyy
 22: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 23: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[2].yyyy, TEMP[0].xyzz
 24: MIN TEMP[1].xy, IN[3].xyyy, CONST[1].xyyy
 25: MOV TEMP[1].xy, TEMP[1].xyyy
 26: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 27: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[2].xxxx, TEMP[0].xyzz
 28: MOV TEMP[1].w, IMM[0].xxxx
 29: MOV TEMP[1].xyz, TEMP[0].xyzx
 30: MOV OUT[0], TEMP[1]
 31: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !0
  %30 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %46 = fcmp uge float %32, %22
  %47 = select i1 %46, float %22, float %32
  %48 = fcmp uge float %33, %23
  %49 = select i1 %48, float %23, float %33
  %50 = bitcast float %47 to i32
  %51 = bitcast float %49 to i32
  %52 = insertelement <2 x i32> undef, i32 %50, i32 0
  %53 = insertelement <2 x i32> %52, i32 %51, i32 1
  %54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %29, <16 x i8> %31, i32 2)
  %55 = extractelement <4 x float> %54, i32 0
  %56 = extractelement <4 x float> %54, i32 1
  %57 = extractelement <4 x float> %54, i32 2
  %58 = fmul float %55, %24
  %59 = fmul float %56, %24
  %60 = fmul float %57, %24
  %61 = fcmp uge float %34, %22
  %62 = select i1 %61, float %22, float %34
  %63 = fcmp uge float %35, %23
  %64 = select i1 %63, float %23, float %35
  %65 = bitcast float %62 to i32
  %66 = bitcast float %64 to i32
  %67 = insertelement <2 x i32> undef, i32 %65, i32 0
  %68 = insertelement <2 x i32> %67, i32 %66, i32 1
  %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %29, <16 x i8> %31, i32 2)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = fmul float %70, %25
  %74 = fadd float %73, %58
  %75 = fmul float %71, %25
  %76 = fadd float %75, %59
  %77 = fmul float %72, %25
  %78 = fadd float %77, %60
  %79 = fcmp uge float %36, %22
  %80 = select i1 %79, float %22, float %36
  %81 = fcmp uge float %37, %23
  %82 = select i1 %81, float %23, float %37
  %83 = bitcast float %80 to i32
  %84 = bitcast float %82 to i32
  %85 = insertelement <2 x i32> undef, i32 %83, i32 0
  %86 = insertelement <2 x i32> %85, i32 %84, i32 1
  %87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %29, <16 x i8> %31, i32 2)
  %88 = extractelement <4 x float> %87, i32 0
  %89 = extractelement <4 x float> %87, i32 1
  %90 = extractelement <4 x float> %87, i32 2
  %91 = fmul float %88, %26
  %92 = fadd float %91, %74
  %93 = fmul float %89, %26
  %94 = fadd float %93, %76
  %95 = fmul float %90, %26
  %96 = fadd float %95, %78
  %97 = fcmp uge float %38, %22
  %98 = select i1 %97, float %22, float %38
  %99 = fcmp uge float %39, %23
  %100 = select i1 %99, float %23, float %39
  %101 = bitcast float %98 to i32
  %102 = bitcast float %100 to i32
  %103 = insertelement <2 x i32> undef, i32 %101, i32 0
  %104 = insertelement <2 x i32> %103, i32 %102, i32 1
  %105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %29, <16 x i8> %31, i32 2)
  %106 = extractelement <4 x float> %105, i32 0
  %107 = extractelement <4 x float> %105, i32 1
  %108 = extractelement <4 x float> %105, i32 2
  %109 = fmul float %106, %27
  %110 = fadd float %109, %92
  %111 = fmul float %107, %27
  %112 = fadd float %111, %94
  %113 = fmul float %108, %27
  %114 = fadd float %113, %96
  %115 = fcmp uge float %40, %22
  %116 = select i1 %115, float %22, float %40
  %117 = fcmp uge float %41, %23
  %118 = select i1 %117, float %23, float %41
  %119 = bitcast float %116 to i32
  %120 = bitcast float %118 to i32
  %121 = insertelement <2 x i32> undef, i32 %119, i32 0
  %122 = insertelement <2 x i32> %121, i32 %120, i32 1
  %123 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %122, <32 x i8> %29, <16 x i8> %31, i32 2)
  %124 = extractelement <4 x float> %123, i32 0
  %125 = extractelement <4 x float> %123, i32 1
  %126 = extractelement <4 x float> %123, i32 2
  %127 = fmul float %124, %26
  %128 = fadd float %127, %110
  %129 = fmul float %125, %26
  %130 = fadd float %129, %112
  %131 = fmul float %126, %26
  %132 = fadd float %131, %114
  %133 = fcmp uge float %42, %22
  %134 = select i1 %133, float %22, float %42
  %135 = fcmp uge float %43, %23
  %136 = select i1 %135, float %23, float %43
  %137 = bitcast float %134 to i32
  %138 = bitcast float %136 to i32
  %139 = insertelement <2 x i32> undef, i32 %137, i32 0
  %140 = insertelement <2 x i32> %139, i32 %138, i32 1
  %141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %140, <32 x i8> %29, <16 x i8> %31, i32 2)
  %142 = extractelement <4 x float> %141, i32 0
  %143 = extractelement <4 x float> %141, i32 1
  %144 = extractelement <4 x float> %141, i32 2
  %145 = fmul float %142, %25
  %146 = fadd float %145, %128
  %147 = fmul float %143, %25
  %148 = fadd float %147, %130
  %149 = fmul float %144, %25
  %150 = fadd float %149, %132
  %151 = fcmp uge float %44, %22
  %152 = select i1 %151, float %22, float %44
  %153 = fcmp uge float %45, %23
  %154 = select i1 %153, float %23, float %45
  %155 = bitcast float %152 to i32
  %156 = bitcast float %154 to i32
  %157 = insertelement <2 x i32> undef, i32 %155, i32 0
  %158 = insertelement <2 x i32> %157, i32 %156, i32 1
  %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %29, <16 x i8> %31, i32 2)
  %160 = extractelement <4 x float> %159, i32 0
  %161 = extractelement <4 x float> %159, i32 1
  %162 = extractelement <4 x float> %159, i32 2
  %163 = fmul float %160, %24
  %164 = fadd float %163, %146
  %165 = fmul float %161, %24
  %166 = fadd float %165, %148
  %167 = fmul float %162, %24
  %168 = fadd float %167, %150
  %169 = call i32 @llvm.SI.packf16(float %164, float %166)
  %170 = bitcast i32 %169 to float
  %171 = call i32 @llvm.SI.packf16(float %168, float 1.000000e+00)
  %172 = bitcast i32 %171 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %170, float %172, float %170, float %172)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080100
c8090101
c08a0100
bf8c007f
c2009505
bf8c007f
d00c0008
02000302
7e0a0201
d2000003
00220b02
c8100000
c8110001
c2039504
bf8c007f
d00c0008
02000f04
7e0c0207
d2000002
00220d04
c0840300
c0c60500
bf8c007f
f0800700
00430202
c2001508
bf8c0070
10140600
c81c0300
c81d0301
d00c0002
02000307
d2000008
000a0b07
c8240200
c8250201
d00c0002
02000f09
d2000007
000a0d09
f0800700
00430707
c2011509
bf8c0070
d282000d
04280508
c8280500
c8290501
d00c0004
0200030a
d200000b
00120b0a
c8300400
c8310401
d00c0004
02000f0c
d200000a
00120d0c
f0800700
00430a0a
c201950a
bf8c0070
d2820010
0434070b
c8340700
c8350701
d00c0004
0200030d
d200000e
00120b0d
c83c0600
c83d0601
d00c0004
02000f0f
d200000d
00120d0f
f0800700
00430d0d
c202150b
bf8c0070
d2820013
0440090e
c8400900
c8410901
d00c0014
02000310
d2000011
00520b10
c8480800
c8490801
d00c0014
02000f12
d2000010
00520d12
f0800700
00431010
bf8c0770
d2820016
044c0711
c84c0b00
c84d0b01
d00c0014
02000313
d2000014
00520b13
c8540a00
c8550a01
d00c0014
02000f15
d2000013
00520d15
f0800700
00431313
bf8c0770
d2820019
04580514
c8580d00
c8590d01
d00c0014
02000316
d2000017
00520b16
c8140c00
c8150c01
d00c0006
02000f05
d2000016
001a0d05
f0800700
00431616
bf8c0770
d2820000
04640117
10020400
d2820001
04040507
d2820001
0404070a
d2820001
0404090d
d2820001
04040710
d2820001
04040513
d2820001
04040116
5e000101
10020800
d2820001
04040509
d2820001
0404070c
d2820001
0404090f
d2820001
04040712
d2820001
04040515
d2820001
04040118
d25e0001
0201e501
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..3]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, CONST[2].xxxx, CONST[1].xyyy, IN[1].xyyy
  3: MAD TEMP[2].xy, CONST[2].yyyy, CONST[1].xyyy, IN[1].xyyy
  4: MOV TEMP[1].zw, TEMP[2].yyxy
  5: MAD TEMP[2].xy, CONST[2].zzzz, CONST[1].xyyy, IN[1].xyyy
  6: MOV TEMP[2].zw, IN[1].yyxy
  7: MAD TEMP[3].xy, CONST[3].xxxx, CONST[1].xyyy, IN[1].xyyy
  8: MAD TEMP[4].xy, CONST[3].yyyy, CONST[1].xyyy, IN[1].xyyy
  9: MOV TEMP[3].zw, TEMP[4].yyxy
 10: MAD TEMP[4].xy, CONST[3].zzzz, CONST[1].xyyy, IN[1].xyyy
 11: MOV OUT[1], TEMP[1]
 12: MOV OUT[2], TEMP[2]
 13: MOV OUT[3], TEMP[3]
 14: MOV OUT[4], TEMP[4]
 15: MOV OUT[0], TEMP[0]
 16: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %19 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0
  %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %5)
  %22 = extractelement <4 x float> %21, i32 0
  %23 = extractelement <4 x float> %21, i32 1
  %24 = extractelement <4 x float> %21, i32 2
  %25 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %5)
  %28 = extractelement <4 x float> %27, i32 0
  %29 = extractelement <4 x float> %27, i32 1
  %30 = fmul float %13, %11
  %31 = fadd float %30, %28
  %32 = fmul float %13, %12
  %33 = fadd float %32, %29
  %34 = fmul float %14, %11
  %35 = fadd float %34, %28
  %36 = fmul float %14, %12
  %37 = fadd float %36, %29
  %38 = fmul float %15, %11
  %39 = fadd float %38, %28
  %40 = fmul float %15, %12
  %41 = fadd float %40, %29
  %42 = fmul float %16, %11
  %43 = fadd float %42, %28
  %44 = fmul float %16, %12
  %45 = fadd float %44, %29
  %46 = fmul float %17, %11
  %47 = fadd float %46, %28
  %48 = fmul float %17, %12
  %49 = fadd float %48, %29
  %50 = fmul float %18, %11
  %51 = fadd float %50, %28
  %52 = fmul float %18, %12
  %53 = fadd float %52, %29
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %31, float %33, float %35, float %37)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %39, float %41, float %28, float %29)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %43, float %45, float %47, float %49)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %51, float %53, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %22, float %23, float %24, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020109
c2028105
bf8c007f
7e0a0205
d2820007
040a0a04
c2028104
bf8c007f
7e0c0205
d2820008
04060c04
c2020108
bf8c007f
d2820009
040a0a04
d282000a
04060c04
f800020f
0708090a
c202010a
bf8c000f
d2820007
040a0a04
d2820008
04060c04
f800021f
02010708
c202010d
bf8c000f
d2820007
040a0a04
d2820008
04060c04
c202010c
bf8c007f
d2820009
040a0a04
d282000a
04060c04
f800022f
0708090a
c200010e
bf8c000f
d2820005
040a0a00
d2820001
04060c00
7e040280
f800023f
02020501
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL IN[3], GENERIC[22], PERSPECTIVE
DCL IN[4], GENERIC[23], PERSPECTIVE
DCL IN[5], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[5..18]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    0.5000,     0.2500,     0.4545,     2.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].w, TEMP[0].wwww
  3: ADD TEMP[2].xy, IN[0].xyyy, CONST[16].xyyy
  4: MOV TEMP[2].xy, TEMP[2].xyyy
  5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
  6: LRP TEMP[0].xyz, CONST[16].zzzz, TEMP[2].xyzz, TEMP[0].xyzz
  7: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[16].wwww
  8: MOV TEMP[2].y, IMM[0].xxxx
  9: MUL TEMP[2].x, TEMP[0].xxxx, IMM[0].yyyy
 10: MOV TEMP[2].xy, TEMP[2].xyyy
 11: TEX TEMP[2].x, TEMP[2], SAMP[4], 2D
 12: MOV TEMP[0].x, TEMP[2].xxxx
 13: MOV TEMP[2].y, IMM[0].xxxx
 14: MUL TEMP[2].x, TEMP[0].yyyy, IMM[0].yyyy
 15: MOV TEMP[2].xy, TEMP[2].xyyy
 16: TEX TEMP[2].x, TEMP[2], SAMP[4], 2D
 17: MOV TEMP[0].y, TEMP[2].xxxx
 18: MOV TEMP[2].y, IMM[0].xxxx
 19: MUL TEMP[2].x, TEMP[0].zzzz, IMM[0].yyyy
 20: MOV TEMP[2].xy, TEMP[2].xyyy
 21: TEX TEMP[2].x, TEMP[2], SAMP[4], 2D
 22: MOV TEMP[0].z, TEMP[2].xxxx
 23: MOV TEMP[2].xy, IN[0].zwww
 24: TEX TEMP[2], TEMP[2], SAMP[1], 2D
 25: MUL TEMP[2].xyz, TEMP[2], IMM[0].xxxx
 26: MAD_SAT TEMP[0].xyz, TEMP[2].xyzz, CONST[5].xyzz, TEMP[0].xyzz
 27: POW TEMP[2].x, TEMP[0].xxxx, IMM[0].zzzz
 28: POW TEMP[3].x, TEMP[0].yyyy, IMM[0].zzzz
 29: MOV TEMP[2].y, TEMP[3].xxxx
 30: POW TEMP[3].x, TEMP[0].zzzz, IMM[0].zzzz
 31: MOV TEMP[2].z, TEMP[3].xxxx
 32: MOV TEMP[3].y, IMM[0].xxxx
 33: DP3_SAT TEMP[4].x, TEMP[2].xyzz, IMM[1].xyzz
 34: MOV TEMP[3].x, TEMP[4].xxxx
 35: MOV TEMP[3].xy, TEMP[3].xyyy
 36: TEX TEMP[3], TEMP[3], SAMP[3], 2D
 37: ADD TEMP[4].xyz, CONST[13].xyzz, -TEMP[2].xyzz
 38: ADD TEMP[2].xyz, CONST[14].xyzz, -TEMP[2].xyzz
 39: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz
 40: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
 41: MOV TEMP[4].y, TEMP[2].xxxx
 42: MAD_SAT TEMP[2].xy, TEMP[4].xyyy, CONST[15].xyyy, CONST[15].zwww
 43: MUL TEMP[4].x, TEMP[3].wwww, TEMP[2].xxxx
 44: MUL_SAT TEMP[2].x, TEMP[4].xxxx, TEMP[2].yyyy
 45: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[0].xyzz
 46: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[1].xyzz
 47: LRP TEMP[0].xyz, CONST[6].zzzz, TEMP[1].xyzz, TEMP[0].xxxx
 48: MUL TEMP[1].xyz, TEMP[0].xyzz, IMM[0].wwww
 49: MOV TEMP[0].xy, IN[1].xyyy
 50: TEX TEMP[0], TEMP[0], SAMP[2], 2D
 51: MOV TEMP[2].xy, IN[1].zwww
 52: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 53: ADD TEMP[0], TEMP[0], TEMP[2]
 54: MOV TEMP[2].xy, IN[2].xyyy
 55: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 56: ADD TEMP[0], TEMP[0], TEMP[2]
 57: MOV TEMP[2].xy, IN[2].zwww
 58: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 59: ADD TEMP[0], TEMP[0], TEMP[2]
 60: MOV TEMP[2].xy, IN[4].xyyy
 61: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 62: MUL TEMP[2], TEMP[2], CONST[12].xxxx
 63: MAD TEMP[0].xyz, TEMP[0], CONST[12].yyyy, TEMP[2]
 64: POW TEMP[2].x, TEMP[1].xxxx, CONST[6].wwww
 65: POW TEMP[2].y, TEMP[1].yyyy, CONST[6].wwww
 66: POW TEMP[2].z, TEMP[1].zzzz, CONST[6].wwww
 67: MUL_SAT TEMP[2].xyz, TEMP[2].xyzz, IMM[0].xxxx
 68: MAD TEMP[1].xyz, CONST[8].xyzz, TEMP[0].xxxx, TEMP[2].xyzz
 69: MAD TEMP[1].xyz, CONST[9].xyzz, TEMP[0].yyyy, TEMP[1].xyzz
 70: MAD TEMP[1].xyz, CONST[10].xyzz, TEMP[0].zzzz, TEMP[1].xyzz
 71: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[5].xyzz
 72: DP2 TEMP[0].x, IN[3].xyyy, IN[3].xyyy
 73: MAD_SAT TEMP[0].x, TEMP[0].xxxx, CONST[6].xxxx, CONST[6].yyyy
 74: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[0].xxxx
 75: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[0].xxxx
 76: MOV OUT[0], TEMP[1]
 77: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 264)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 268)
  %54 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %55 = load <32 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %59 = load <32 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %63 = load <32 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %67 = load <32 x i8> addrspace(2)* %66, !tbaa !0
  %68 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0
  %70 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %71 = load <32 x i8> addrspace(2)* %70, !tbaa !0
  %72 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %91 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %92 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %93 = bitcast float %74 to i32
  %94 = bitcast float %75 to i32
  %95 = insertelement <2 x i32> undef, i32 %93, i32 0
  %96 = insertelement <2 x i32> %95, i32 %94, i32 1
  %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %55, <16 x i8> %57, i32 2)
  %98 = extractelement <4 x float> %97, i32 0
  %99 = extractelement <4 x float> %97, i32 1
  %100 = extractelement <4 x float> %97, i32 2
  %101 = extractelement <4 x float> %97, i32 3
  %102 = fadd float %74, %50
  %103 = fadd float %75, %51
  %104 = bitcast float %102 to i32
  %105 = bitcast float %103 to i32
  %106 = insertelement <2 x i32> undef, i32 %104, i32 0
  %107 = insertelement <2 x i32> %106, i32 %105, i32 1
  %108 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %55, <16 x i8> %57, i32 2)
  %109 = extractelement <4 x float> %108, i32 0
  %110 = extractelement <4 x float> %108, i32 1
  %111 = extractelement <4 x float> %108, i32 2
  %112 = call float @llvm.AMDGPU.lrp(float %52, float %109, float %98)
  %113 = call float @llvm.AMDGPU.lrp(float %52, float %110, float %99)
  %114 = call float @llvm.AMDGPU.lrp(float %52, float %111, float %100)
  %115 = fmul float %112, %53
  %116 = fmul float %113, %53
  %117 = fmul float %114, %53
  %118 = fmul float %115, 2.500000e-01
  %119 = bitcast float %118 to i32
  %120 = bitcast float 5.000000e-01 to i32
  %121 = insertelement <2 x i32> undef, i32 %119, i32 0
  %122 = insertelement <2 x i32> %121, i32 %120, i32 1
  %123 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %122, <32 x i8> %71, <16 x i8> %73, i32 2)
  %124 = extractelement <4 x float> %123, i32 0
  %125 = fmul float %116, 2.500000e-01
  %126 = bitcast float %125 to i32
  %127 = bitcast float 5.000000e-01 to i32
  %128 = insertelement <2 x i32> undef, i32 %126, i32 0
  %129 = insertelement <2 x i32> %128, i32 %127, i32 1
  %130 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %129, <32 x i8> %71, <16 x i8> %73, i32 2)
  %131 = extractelement <4 x float> %130, i32 0
  %132 = fmul float %117, 2.500000e-01
  %133 = bitcast float %132 to i32
  %134 = bitcast float 5.000000e-01 to i32
  %135 = insertelement <2 x i32> undef, i32 %133, i32 0
  %136 = insertelement <2 x i32> %135, i32 %134, i32 1
  %137 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %136, <32 x i8> %71, <16 x i8> %73, i32 2)
  %138 = extractelement <4 x float> %137, i32 0
  %139 = bitcast float %76 to i32
  %140 = bitcast float %77 to i32
  %141 = insertelement <2 x i32> undef, i32 %139, i32 0
  %142 = insertelement <2 x i32> %141, i32 %140, i32 1
  %143 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %142, <32 x i8> %59, <16 x i8> %61, i32 2)
  %144 = extractelement <4 x float> %143, i32 0
  %145 = extractelement <4 x float> %143, i32 1
  %146 = extractelement <4 x float> %143, i32 2
  %147 = fmul float %144, 5.000000e-01
  %148 = fmul float %145, 5.000000e-01
  %149 = fmul float %146, 5.000000e-01
  %150 = fmul float %147, %22
  %151 = fadd float %150, %124
  %152 = fmul float %148, %23
  %153 = fadd float %152, %131
  %154 = fmul float %149, %24
  %155 = fadd float %154, %138
  %156 = call float @llvm.AMDIL.clamp.(float %151, float 0.000000e+00, float 1.000000e+00)
  %157 = call float @llvm.AMDIL.clamp.(float %153, float 0.000000e+00, float 1.000000e+00)
  %158 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00)
  %159 = call float @llvm.pow.f32(float %156, float 0x3FDD1745C0000000)
  %160 = call float @llvm.pow.f32(float %157, float 0x3FDD1745C0000000)
  %161 = call float @llvm.pow.f32(float %158, float 0x3FDD1745C0000000)
  %162 = fmul float %159, 0x3FCB367A00000000
  %163 = fmul float %160, 0x3FE6E2EB20000000
  %164 = fadd float %163, %162
  %165 = fmul float %161, 0x3FB27BB300000000
  %166 = fadd float %164, %165
  %167 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00)
  %168 = bitcast float %167 to i32
  %169 = bitcast float 5.000000e-01 to i32
  %170 = insertelement <2 x i32> undef, i32 %168, i32 0
  %171 = insertelement <2 x i32> %170, i32 %169, i32 1
  %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %171, <32 x i8> %67, <16 x i8> %69, i32 2)
  %173 = extractelement <4 x float> %172, i32 0
  %174 = extractelement <4 x float> %172, i32 1
  %175 = extractelement <4 x float> %172, i32 2
  %176 = extractelement <4 x float> %172, i32 3
  %177 = fsub float -0.000000e+00, %159
  %178 = fadd float %40, %177
  %179 = fsub float -0.000000e+00, %160
  %180 = fadd float %41, %179
  %181 = fsub float -0.000000e+00, %161
  %182 = fadd float %42, %181
  %183 = fsub float -0.000000e+00, %159
  %184 = fadd float %43, %183
  %185 = fsub float -0.000000e+00, %160
  %186 = fadd float %44, %185
  %187 = fsub float -0.000000e+00, %161
  %188 = fadd float %45, %187
  %189 = fmul float %178, %178
  %190 = fmul float %180, %180
  %191 = fadd float %190, %189
  %192 = fmul float %182, %182
  %193 = fadd float %191, %192
  %194 = fmul float %184, %184
  %195 = fmul float %186, %186
  %196 = fadd float %195, %194
  %197 = fmul float %188, %188
  %198 = fadd float %196, %197
  %199 = fmul float %193, %46
  %200 = fadd float %199, %48
  %201 = fmul float %198, %47
  %202 = fadd float %201, %49
  %203 = call float @llvm.AMDIL.clamp.(float %200, float 0.000000e+00, float 1.000000e+00)
  %204 = call float @llvm.AMDIL.clamp.(float %202, float 0.000000e+00, float 1.000000e+00)
  %205 = fmul float %176, %203
  %206 = fmul float %205, %204
  %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00)
  %208 = call float @llvm.AMDGPU.lrp(float %207, float %173, float %156)
  %209 = call float @llvm.AMDGPU.lrp(float %207, float %174, float %157)
  %210 = call float @llvm.AMDGPU.lrp(float %207, float %175, float %158)
  %211 = fmul float %208, 0x3FCB367A00000000
  %212 = fmul float %209, 0x3FE6E2EB20000000
  %213 = fadd float %212, %211
  %214 = fmul float %210, 0x3FB27BB300000000
  %215 = fadd float %213, %214
  %216 = call float @llvm.AMDGPU.lrp(float %27, float %208, float %215)
  %217 = call float @llvm.AMDGPU.lrp(float %27, float %209, float %215)
  %218 = call float @llvm.AMDGPU.lrp(float %27, float %210, float %215)
  %219 = fmul float %216, 2.000000e+00
  %220 = fmul float %217, 2.000000e+00
  %221 = fmul float %218, 2.000000e+00
  %222 = bitcast float %78 to i32
  %223 = bitcast float %79 to i32
  %224 = insertelement <2 x i32> undef, i32 %222, i32 0
  %225 = insertelement <2 x i32> %224, i32 %223, i32 1
  %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %63, <16 x i8> %65, i32 2)
  %227 = extractelement <4 x float> %226, i32 0
  %228 = extractelement <4 x float> %226, i32 1
  %229 = extractelement <4 x float> %226, i32 2
  %230 = bitcast float %80 to i32
  %231 = bitcast float %81 to i32
  %232 = insertelement <2 x i32> undef, i32 %230, i32 0
  %233 = insertelement <2 x i32> %232, i32 %231, i32 1
  %234 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %233, <32 x i8> %63, <16 x i8> %65, i32 2)
  %235 = extractelement <4 x float> %234, i32 0
  %236 = extractelement <4 x float> %234, i32 1
  %237 = extractelement <4 x float> %234, i32 2
  %238 = fadd float %227, %235
  %239 = fadd float %228, %236
  %240 = fadd float %229, %237
  %241 = bitcast float %82 to i32
  %242 = bitcast float %83 to i32
  %243 = insertelement <2 x i32> undef, i32 %241, i32 0
  %244 = insertelement <2 x i32> %243, i32 %242, i32 1
  %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %244, <32 x i8> %63, <16 x i8> %65, i32 2)
  %246 = extractelement <4 x float> %245, i32 0
  %247 = extractelement <4 x float> %245, i32 1
  %248 = extractelement <4 x float> %245, i32 2
  %249 = fadd float %238, %246
  %250 = fadd float %239, %247
  %251 = fadd float %240, %248
  %252 = bitcast float %84 to i32
  %253 = bitcast float %85 to i32
  %254 = insertelement <2 x i32> undef, i32 %252, i32 0
  %255 = insertelement <2 x i32> %254, i32 %253, i32 1
  %256 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %255, <32 x i8> %63, <16 x i8> %65, i32 2)
  %257 = extractelement <4 x float> %256, i32 0
  %258 = extractelement <4 x float> %256, i32 1
  %259 = extractelement <4 x float> %256, i32 2
  %260 = fadd float %249, %257
  %261 = fadd float %250, %258
  %262 = fadd float %251, %259
  %263 = bitcast float %88 to i32
  %264 = bitcast float %89 to i32
  %265 = insertelement <2 x i32> undef, i32 %263, i32 0
  %266 = insertelement <2 x i32> %265, i32 %264, i32 1
  %267 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %266, <32 x i8> %63, <16 x i8> %65, i32 2)
  %268 = extractelement <4 x float> %267, i32 0
  %269 = extractelement <4 x float> %267, i32 1
  %270 = extractelement <4 x float> %267, i32 2
  %271 = fmul float %268, %38
  %272 = fmul float %269, %38
  %273 = fmul float %270, %38
  %274 = fmul float %260, %39
  %275 = fadd float %274, %271
  %276 = fmul float %261, %39
  %277 = fadd float %276, %272
  %278 = fmul float %262, %39
  %279 = fadd float %278, %273
  %280 = call float @llvm.pow.f32(float %219, float %28)
  %281 = call float @llvm.pow.f32(float %220, float %28)
  %282 = call float @llvm.pow.f32(float %221, float %28)
  %283 = fmul float %280, 5.000000e-01
  %284 = fmul float %281, 5.000000e-01
  %285 = fmul float %282, 5.000000e-01
  %286 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00)
  %287 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00)
  %288 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00)
  %289 = fmul float %29, %275
  %290 = fadd float %289, %286
  %291 = fmul float %30, %275
  %292 = fadd float %291, %287
  %293 = fmul float %31, %275
  %294 = fadd float %293, %288
  %295 = fmul float %32, %277
  %296 = fadd float %295, %290
  %297 = fmul float %33, %277
  %298 = fadd float %297, %292
  %299 = fmul float %34, %277
  %300 = fadd float %299, %294
  %301 = fmul float %35, %279
  %302 = fadd float %301, %296
  %303 = fmul float %36, %279
  %304 = fadd float %303, %298
  %305 = fmul float %37, %279
  %306 = fadd float %305, %300
  %307 = fmul float %302, %90
  %308 = fmul float %304, %91
  %309 = fmul float %306, %92
  %310 = fmul float %86, %86
  %311 = fmul float %87, %87
  %312 = fadd float %310, %311
  %313 = fmul float %312, %25
  %314 = fadd float %313, %26
  %315 = call float @llvm.AMDIL.clamp.(float %314, float 0.000000e+00, float 1.000000e+00)
  %316 = fmul float %307, %315
  %317 = fmul float %308, %315
  %318 = fmul float %309, %315
  %319 = fmul float %316, %315
  %320 = fmul float %317, %315
  %321 = fmul float %318, %315
  %322 = call i32 @llvm.SI.packf16(float %319, float %320)
  %323 = bitcast i32 %322 to float
  %324 = call i32 @llvm.SI.packf16(float %321, float %101)
  %325 = bitcast i32 %324 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %323, float %325, float %323, float %325)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c81c0100
c81d0101
c8180000
c8190001
c0860300
c0c80500
bf8c007f
f0800f00
00640206
c0840100
bf8c0070
c2000942
bf8c007f
d2080009
020000f2
10100709
c2008941
bf8c007f
06160e01
c2008940
bf8c007f
06140c01
f0800700
00640c0a
bf8c0770
d2820006
04221a00
c2008943
bf8c007f
100c0c01
100c0cff
3e800000
7e0e02f0
c0860310
c0c80520
bf8c007f
f0800100
00640806
c82c0300
c82d0301
c8280200
c8290201
c08c0304
c0ce0508
bf8c0070
f0800700
00c7100a
bf8c0770
101422f0
c2038915
bf8c007f
d2820008
04200f0a
d2060808
02010108
7e144f08
0e1414ff
3ee8ba2e
7e164b0a
c2038935
bf8c007f
08261607
10140509
d282000a
042a1800
10141401
100c14ff
3e800000
f0800100
00640a06
101e20f0
c2038914
bf8c0070
d282000a
04280f0f
d206080a
0201010a
7e1e4f0a
0e1e1eff
3ee8ba2e
7e1e4b0f
c2038934
bf8c007f
08281e07
10282914
d2820013
04522713
10120909
d2820009
04261c00
10121201
100c12ff
3e800000
f0800100
00640906
101824f0
c2000916
bf8c0070
d2820009
0424010c
d2060809
02010109
7e184f09
0e1818ff
3ee8ba2e
7e1c4b0c
c2000936
bf8c007f
08181c00
d282000c
044e190c
c200093c
c200893e
bf8c007f
7e1a0201
d282000c
0434010c
d2060814
0201010c
10181eff
3e59b3d0
7e1a02ff
3f371759
d2820010
04321b0b
7e1802ff
3d93dd98
d2820010
0442190e
d2060806
02010110
c086030c
c0c80518
bf8c007f
f0800f00
00641006
bf8c0770
100c2913
c2000939
bf8c007f
080e1600
c2000938
bf8c007f
08161e00
1016170b
d2820007
042e0f07
c200093a
bf8c007f
08161c00
d2820007
041e170b
c200093d
c200893f
bf8c007f
7e160201
d2820007
042c0107
d2060807
02010107
100c0f06
d2060806
02010106
080e0cf2
10101107
d282000b
04222306
10101507
d2820008
04222106
101410ff
3e59b3d0
d282000a
042a1b0b
100e1307
d2820006
041e2506
d2820007
042a1906
c200091a
bf8c007f
d2080009
020000f2
100e0f09
d2820009
041e1600
06121309
7e124f09
c200891b
bf8c007f
0e121201
7e124b09
101212f0
d2060809
02010109
c82c0700
c82d0701
c8280600
c8290601
c0860308
c0c80510
bf8c007f
f0800700
00640a0a
c8380500
c8390501
c8340400
c8350401
f0800700
00640e0d
bf8c0770
061a150e
c8480900
c8490901
c8440800
c8450801
f0800700
00641111
bf8c0770
061a230d
c8540b00
c8550b01
c8500a00
c8510a01
f0800700
00641414
bf8c0770
061a290d
c8601100
c8611101
c85c1000
c85d1001
f0800700
00641717
c2010930
bf8c0070
10342e02
c2018931
bf8c007f
d282000d
0468070d
c2020921
bf8c007f
d282001a
04261a04
0612170f
06122509
06122b09
10363002
d2820009
046c0709
c2020925
bf8c007f
d282001a
046a1204
06141910
0614270a
06142d0a
10163202
d282000a
042c070a
c2010929
bf8c007f
d282000b
046a1402
c8301500
c8311501
1018190b
c82c0c00
c82d0c01
c8380d00
c8390d01
101c1d0e
d282000b
043a170b
c2010918
c2018919
bf8c007f
7e1c0203
d282000b
0438050b
d206080b
0201010b
1018170c
1018170c
d2820008
041e1000
06101108
7e104f08
0e101001
7e104b08
101010f0
d2060808
02010108
c2010920
bf8c007f
d2820008
04221a02
c2010924
bf8c007f
d2820008
04221202
c2010928
bf8c007f
d2820008
04221402
c8381400
c8391401
10101d08
10101708
10101708
5e101908
d2820006
041e0c00
060c0d06
7e0c4f06
0e0c0c01
7e0c4b06
100c0cf0
d2060806
02010106
c2000922
bf8c007f
d2820006
041a1a00
c2000926
bf8c007f
d2820006
041a1200
c200092a
bf8c007f
d2820006
041a1400
c81c1600
c81d1601
10000f06
10001700
10001700
5e000b00
f8001c0f
00080008
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..14]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.5000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  3: MAD TEMP[2].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww
  4: MOV TEMP[1].zw, TEMP[2].yyxy
  5: ADD TEMP[3].xy, TEMP[1].xyyy, CONST[2].xyyy
  6: ADD TEMP[4].xy, TEMP[1].xyyy, -CONST[2].xyyy
  7: MOV TEMP[3].zw, TEMP[4].yyxy
  8: ADD TEMP[4].xy, TEMP[1].xyyy, CONST[2].zwww
  9: ADD TEMP[5].xy, TEMP[1].xyyy, -CONST[2].zwww
 10: MOV TEMP[4].zw, TEMP[5].yyxy
 11: MAD TEMP[5].xy, TEMP[1].xyyy, CONST[3].xyyy, CONST[3].zwww
 12: ADD TEMP[2].xy, TEMP[2].xyyy, IMM[0].yzzz
 13: MOV TEMP[5].zw, TEMP[2].yyxy
 14: LRP TEMP[2], TEMP[1].yyyy, CONST[4], CONST[5]
 15: LRP TEMP[2].xyz, TEMP[2].wwww, TEMP[2].xyzz, CONST[14].xyzz
 16: MOV TEMP[2].xyz, TEMP[2].xyzx
 17: MOV OUT[1], TEMP[1]
 18: MOV OUT[2], TEMP[3]
 19: MOV OUT[3], TEMP[4]
 20: MOV OUT[4], TEMP[5]
 21: MOV OUT[5], TEMP[1].xyxy
 22: MOV OUT[6], TEMP[2]
 23: MOV OUT[0], TEMP[0]
 24: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %38 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0
  %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %5)
  %41 = extractelement <4 x float> %40, i32 0
  %42 = extractelement <4 x float> %40, i32 1
  %43 = extractelement <4 x float> %40, i32 2
  %44 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0
  %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %5)
  %47 = extractelement <4 x float> %46, i32 0
  %48 = extractelement <4 x float> %46, i32 1
  %49 = fmul float %47, %11
  %50 = fadd float %49, %13
  %51 = fmul float %48, %12
  %52 = fadd float %51, %14
  %53 = fmul float %47, %15
  %54 = fadd float %53, %17
  %55 = fmul float %48, %16
  %56 = fadd float %55, %18
  %57 = fadd float %50, %19
  %58 = fadd float %52, %20
  %59 = fsub float -0.000000e+00, %19
  %60 = fadd float %50, %59
  %61 = fsub float -0.000000e+00, %20
  %62 = fadd float %52, %61
  %63 = fadd float %50, %21
  %64 = fadd float %52, %22
  %65 = fsub float -0.000000e+00, %21
  %66 = fadd float %50, %65
  %67 = fsub float -0.000000e+00, %22
  %68 = fadd float %52, %67
  %69 = fmul float %50, %23
  %70 = fadd float %69, %25
  %71 = fmul float %52, %24
  %72 = fadd float %71, %26
  %73 = fadd float %54, 5.000000e-01
  %74 = fadd float %56, 0.000000e+00
  %75 = call float @llvm.AMDGPU.lrp(float %52, float %27, float %31)
  %76 = call float @llvm.AMDGPU.lrp(float %52, float %28, float %32)
  %77 = call float @llvm.AMDGPU.lrp(float %52, float %29, float %33)
  %78 = call float @llvm.AMDGPU.lrp(float %52, float %30, float %34)
  %79 = call float @llvm.AMDGPU.lrp(float %78, float %75, float %35)
  %80 = call float @llvm.AMDGPU.lrp(float %78, float %76, float %36)
  %81 = call float @llvm.AMDGPU.lrp(float %78, float %77, float %37)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %50, float %52, float %54, float %56)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %57, float %58, float %60, float %62)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %63, float %64, float %66, float %68)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %70, float %72, float %73, float %74)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %50, float %52, float %50, float %52)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %79, float %80, float %81, float %78)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020400
c0800100
bf8c0070
c2020105
c2028107
bf8c007f
7e020205
d2820002
04040905
c2020104
c2028106
bf8c007f
7e020205
d2820003
04040904
c2020101
c2028103
bf8c007f
7e020205
d2820001
04040905
c2020100
c2028102
bf8c007f
7e100205
d2820004
04200904
f800020f
02030104
c2020109
bf8c000f
0a0a0204
c2028108
bf8c007f
0a0c0805
060e0204
06100805
f800021f
05060708
c202010b
bf8c000f
0a0a0204
c202810a
bf8c007f
0a0c0805
060e0204
06100805
f800022f
05060708
c202010d
c202810f
bf8c000f
7e0a0205
d2820005
04140901
c202010c
c202810e
bf8c007f
7e0c0205
d2820006
04180904
06040480
060606f0
f800023f
02030506
f800024f
01040104
bf8c070f
080402f2
c2020116
bf8c007f
10060404
c2020112
bf8c007f
d2820005
040c0901
c2020117
bf8c007f
10060404
c2020113
bf8c007f
d2820003
040c0901
080806f2
c202013a
bf8c007f
100c0804
d2820005
041a0b03
c2020115
bf8c007f
100c0404
c2020111
bf8c007f
d2820006
04180901
c2020139
bf8c007f
100e0804
d2820006
041e0d03
c2020114
bf8c007f
10040404
c2020110
bf8c007f
d2820001
04080901
c2000138
bf8c007f
10040800
d2820001
040a0303
f800025f
03050601
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %21 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %22 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %23 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %24 = call i32 @llvm.SI.packf16(float %20, float %21)
  %25 = bitcast i32 %24 to float
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %25, float %27, float %25, float %27)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8080300
c8090301
c80c0200
c80d0201
5e040503
c80c0100
c80d0101
c8100000
c8110001
5e000704
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..4]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV OUT[1], CONST[4]
  5: MOV OUT[0], TEMP[0]
  6: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %5)
  %34 = extractelement <4 x float> %33, i32 0
  %35 = extractelement <4 x float> %33, i32 1
  %36 = extractelement <4 x float> %33, i32 2
  %37 = fmul float %11, %34
  %38 = fmul float %12, %34
  %39 = fmul float %13, %34
  %40 = fmul float %14, %34
  %41 = fmul float %15, %35
  %42 = fadd float %41, %37
  %43 = fmul float %16, %35
  %44 = fadd float %43, %38
  %45 = fmul float %17, %35
  %46 = fadd float %45, %39
  %47 = fmul float %18, %35
  %48 = fadd float %47, %40
  %49 = fmul float %19, %36
  %50 = fadd float %49, %42
  %51 = fmul float %20, %36
  %52 = fadd float %51, %44
  %53 = fmul float %21, %36
  %54 = fadd float %53, %46
  %55 = fmul float %22, %36
  %56 = fadd float %55, %48
  %57 = fadd float %50, %23
  %58 = fadd float %52, %24
  %59 = fadd float %54, %25
  %60 = fadd float %56, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %57, float %58, float %59, float %60)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
Installing breakpad exception handler for appid(gameoverlayui)/version(20130719102159_client)
Installing breakpad exception handler for appid(gameoverlayui)/version(1.0_client)
Installing breakpad exception handler for appid(gameoverlayui)/version(1.0_client)
Installing breakpad exception handler for appid(gameoverlayui)/version(1.0_client)
Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element
Fontconfig error: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 70: non-double matrix element
Fontconfig warning: "/etc/fonts/conf.d/10-scale-bitmap-fonts.conf", line 78: saw unknown, expected number

(gameoverlayui:4977): Gtk-WARNING **: Загружаемый модуль тем не найден в module_path: «qtcurve»,
/home/behem0th/.gtkrc-2.0:12: error: scanner: unterminated string constant
[0720/121913:WARNING:proxy_service.cc(958)] PAC support disabled because there is no system implementation
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..4]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: MOV TEMP[0].w, IN[0].wwww
  2: TXP TEMP[0], TEMP[0], SAMP[0], 2D
  3: MUL TEMP[0], TEMP[0], CONST[4]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %26 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0
  %28 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0
  %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = fdiv float %30, %32
  %34 = fdiv float %31, %32
  %35 = bitcast float %33 to i32
  %36 = bitcast float %34 to i32
  %37 = insertelement <2 x i32> undef, i32 %35, i32 0
  %38 = insertelement <2 x i32> %37, i32 %36, i32 1
  %39 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %27, <16 x i8> %29, i32 2)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = extractelement <4 x float> %39, i32 2
  %43 = extractelement <4 x float> %39, i32 3
  %44 = fmul float %40, %22
  %45 = fmul float %41, %23
  %46 = fmul float %42, %24
  %47 = fmul float %43, %25
  %48 = call i32 @llvm.SI.packf16(float %44, float %45)
  %49 = bitcast i32 %48 to float
  %50 = call i32 @llvm.SI.packf16(float %46, float %47)
  %51 = bitcast i32 %50 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080100
c8090101
c80c0300
c80d0301
7e085503
10060902
c8140000
c8150001
10040905
c0840300
c0c60500
bf8c007f
f0800f00
00430002
c0800100
bf8c0070
c2020113
bf8c007f
10080604
c2020112
bf8c007f
100a0404
5e080905
c2020111
bf8c007f
100a0204
c2000110
bf8c007f
10000000
5e000b00
f8001c0f
04000400
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..7]
DCL TEMP[0]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: MUL TEMP[0], IN[1].xxxx, CONST[4]
  5: MAD TEMP[0], IN[1].yyyy, CONST[5], TEMP[0]
  6: MAD TEMP[0], IN[1].zzzz, CONST[6], TEMP[0]
  7: MAD OUT[1], IN[1].wwww, CONST[7], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = extractelement <4 x float> %45, i32 3
  %50 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %5)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = extractelement <4 x float> %52, i32 1
  %55 = extractelement <4 x float> %52, i32 2
  %56 = extractelement <4 x float> %52, i32 3
  %57 = fmul float %46, %11
  %58 = fmul float %46, %12
  %59 = fmul float %46, %13
  %60 = fmul float %46, %14
  %61 = fmul float %47, %15
  %62 = fadd float %61, %57
  %63 = fmul float %47, %16
  %64 = fadd float %63, %58
  %65 = fmul float %47, %17
  %66 = fadd float %65, %59
  %67 = fmul float %47, %18
  %68 = fadd float %67, %60
  %69 = fmul float %48, %19
  %70 = fadd float %69, %62
  %71 = fmul float %48, %20
  %72 = fadd float %71, %64
  %73 = fmul float %48, %21
  %74 = fadd float %73, %66
  %75 = fmul float %48, %22
  %76 = fadd float %75, %68
  %77 = fmul float %49, %23
  %78 = fadd float %77, %70
  %79 = fmul float %49, %24
  %80 = fadd float %79, %72
  %81 = fmul float %49, %25
  %82 = fadd float %81, %74
  %83 = fmul float %49, %26
  %84 = fadd float %83, %76
  %85 = fmul float %53, %27
  %86 = fmul float %53, %28
  %87 = fmul float %53, %29
  %88 = fmul float %53, %30
  %89 = fmul float %54, %31
  %90 = fadd float %89, %85
  %91 = fmul float %54, %32
  %92 = fadd float %91, %86
  %93 = fmul float %54, %33
  %94 = fadd float %93, %87
  %95 = fmul float %54, %34
  %96 = fadd float %95, %88
  %97 = fmul float %55, %35
  %98 = fadd float %97, %90
  %99 = fmul float %55, %36
  %100 = fadd float %99, %92
  %101 = fmul float %55, %37
  %102 = fadd float %101, %94
  %103 = fmul float %55, %38
  %104 = fadd float %103, %96
  %105 = fmul float %56, %39
  %106 = fadd float %105, %98
  %107 = fmul float %56, %40
  %108 = fadd float %107, %100
  %109 = fmul float %56, %41
  %110 = fadd float %109, %102
  %111 = fmul float %56, %42
  %112 = fadd float %111, %104
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %106, float %108, float %110, float %112)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020113
bf8c007f
100a0204
c2020117
bf8c007f
d2820005
04140902
c202011b
bf8c007f
d2820005
04140903
c202011f
bf8c007f
d2820005
04140904
c2020112
bf8c007f
100c0204
c2020116
bf8c007f
d2820006
04180902
c202011a
bf8c007f
d2820006
04180903
c202011e
bf8c007f
d2820006
04180904
c2020111
bf8c007f
100e0204
c2020115
bf8c007f
d2820007
041c0902
c2020119
bf8c007f
d2820007
041c0903
c202011d
bf8c007f
d2820007
041c0904
c2020110
bf8c007f
10100204
c2020114
bf8c007f
d2820008
04200902
c2020118
bf8c007f
d2820008
04200903
c202011c
bf8c007f
d2820001
04200904
f800020f
05060701
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04100901
c202010b
bf8c007f
d2820004
04100902
c202010f
bf8c007f
d2820004
04100903
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04140901
c202010a
bf8c007f
d2820005
04140902
c202010e
bf8c007f
d2820005
04140903
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
04180901
c2020109
bf8c007f
d2820006
04180902
c202010d
bf8c007f
d2820006
04180903
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041c0901
c2020108
bf8c007f
d2820007
041c0902
c200010c
bf8c007f
d2820000
041c0103
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], COLOR, COLOR
DCL IN[1], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: MOV TEMP[0].w, IN[1].wwww
  2: TXP TEMP[0], TEMP[0], SAMP[0], 2D
  3: MUL TEMP[0], TEMP[0], IN[0]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %31 = fdiv float %28, %30
  %32 = fdiv float %29, %30
  %33 = bitcast float %31 to i32
  %34 = bitcast float %32 to i32
  %35 = insertelement <2 x i32> undef, i32 %33, i32 0
  %36 = insertelement <2 x i32> %35, i32 %34, i32 1
  %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %21, <16 x i8> %23, i32 2)
  %38 = extractelement <4 x float> %37, i32 0
  %39 = extractelement <4 x float> %37, i32 1
  %40 = extractelement <4 x float> %37, i32 2
  %41 = extractelement <4 x float> %37, i32 3
  %42 = fmul float %38, %24
  %43 = fmul float %39, %25
  %44 = fmul float %40, %26
  %45 = fmul float %41, %27
  %46 = call i32 @llvm.SI.packf16(float %42, float %43)
  %47 = bitcast i32 %46 to float
  %48 = call i32 @llvm.SI.packf16(float %44, float %45)
  %49 = bitcast i32 %48 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %47, float %49, float %47, float %49)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080500
c8090501
c80c0700
c80d0701
7e085503
10060902
c8140400
c8150401
10040905
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0d05
c81c0200
c81d0201
100e0f04
5e0c0d07
c81c0100
c81d0101
100e0f03
c8200000
c8210001
10001102
5e000f00
f8001c0f
06000600
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL OUT[2], GENERIC[0]
DCL CONST[0..7]
DCL TEMP[0]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: MOV_SAT OUT[1], IN[1]
  5: MUL TEMP[0], IN[2].xxxx, CONST[4]
  6: MAD TEMP[0], IN[2].yyyy, CONST[5], TEMP[0]
  7: MAD TEMP[0], IN[2].zzzz, CONST[6], TEMP[0]
  8: MAD OUT[2], IN[2].wwww, CONST[7], TEMP[0]
  9: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = extractelement <4 x float> %45, i32 3
  %50 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %5)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = extractelement <4 x float> %52, i32 1
  %55 = extractelement <4 x float> %52, i32 2
  %56 = extractelement <4 x float> %52, i32 3
  %57 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0
  %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %5)
  %60 = extractelement <4 x float> %59, i32 0
  %61 = extractelement <4 x float> %59, i32 1
  %62 = extractelement <4 x float> %59, i32 2
  %63 = extractelement <4 x float> %59, i32 3
  %64 = fmul float %46, %11
  %65 = fmul float %46, %12
  %66 = fmul float %46, %13
  %67 = fmul float %46, %14
  %68 = fmul float %47, %15
  %69 = fadd float %68, %64
  %70 = fmul float %47, %16
  %71 = fadd float %70, %65
  %72 = fmul float %47, %17
  %73 = fadd float %72, %66
  %74 = fmul float %47, %18
  %75 = fadd float %74, %67
  %76 = fmul float %48, %19
  %77 = fadd float %76, %69
  %78 = fmul float %48, %20
  %79 = fadd float %78, %71
  %80 = fmul float %48, %21
  %81 = fadd float %80, %73
  %82 = fmul float %48, %22
  %83 = fadd float %82, %75
  %84 = fmul float %49, %23
  %85 = fadd float %84, %77
  %86 = fmul float %49, %24
  %87 = fadd float %86, %79
  %88 = fmul float %49, %25
  %89 = fadd float %88, %81
  %90 = fmul float %49, %26
  %91 = fadd float %90, %83
  %92 = call float @llvm.AMDIL.clamp.(float %53, float 0.000000e+00, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float %54, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float %55, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float %56, float 0.000000e+00, float 1.000000e+00)
  %96 = fmul float %60, %27
  %97 = fmul float %60, %28
  %98 = fmul float %60, %29
  %99 = fmul float %60, %30
  %100 = fmul float %61, %31
  %101 = fadd float %100, %96
  %102 = fmul float %61, %32
  %103 = fadd float %102, %97
  %104 = fmul float %61, %33
  %105 = fadd float %104, %98
  %106 = fmul float %61, %34
  %107 = fadd float %106, %99
  %108 = fmul float %62, %35
  %109 = fadd float %108, %101
  %110 = fmul float %62, %36
  %111 = fadd float %110, %103
  %112 = fmul float %62, %37
  %113 = fadd float %112, %105
  %114 = fmul float %62, %38
  %115 = fadd float %114, %107
  %116 = fmul float %63, %39
  %117 = fadd float %116, %109
  %118 = fmul float %63, %40
  %119 = fadd float %118, %111
  %120 = fmul float %63, %41
  %121 = fadd float %120, %113
  %122 = fmul float %63, %42
  %123 = fadd float %122, %115
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %92, float %93, float %94, float %95)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %117, float %119, float %121, float %123)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %85, float %87, float %89, float %91)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
bf8c0770
d2060805
02010104
d2060806
02010103
d2060807
02010102
d2060801
02010101
f800020f
05060701
c0840708
bf8c000f
e00c2000
80020100
c0800100
bf8c0070
c2020113
bf8c007f
100a0204
c2020117
bf8c007f
d2820005
04140902
c202011b
bf8c007f
d2820005
04140903
c202011f
bf8c007f
d2820005
04140904
c2020112
bf8c007f
100c0204
c2020116
bf8c007f
d2820006
04180902
c202011a
bf8c007f
d2820006
04180903
c202011e
bf8c007f
d2820006
04180904
c2020111
bf8c007f
100e0204
c2020115
bf8c007f
d2820007
041c0902
c2020119
bf8c007f
d2820007
041c0903
c202011d
bf8c007f
d2820007
041c0904
c2020110
bf8c007f
10100204
c2020114
bf8c007f
d2820008
04200902
c2020118
bf8c007f
d2820008
04200903
c202011c
bf8c007f
d2820001
04200904
f800021f
05060701
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04100901
c202010b
bf8c007f
d2820004
04100902
c202010f
bf8c007f
d2820004
04100903
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04140901
c202010a
bf8c007f
d2820005
04140902
c202010e
bf8c007f
d2820005
04140903
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
04180901
c2020109
bf8c007f
d2820006
04180902
c202010d
bf8c007f
d2820006
04180903
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041c0901
c2020108
bf8c007f
d2820007
041c0902
c200010c
bf8c007f
d2820000
041c0103
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    1.1643,    -0.0625,    -0.5000,     1.5958}
IMM[1] FLT32 {    0.3917,     0.8129,     2.0170,     1.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D
  2: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy
  3: MUL TEMP[0].x, IMM[0].xxxx, TEMP[0].xxxx
  4: MOV TEMP[1].xy, IN[0].xyyy
  5: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
  6: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz
  7: MOV TEMP[2].xy, IN[0].xyyy
  8: TEX TEMP[2].x, TEMP[2], SAMP[2], 2D
  9: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz
 10: MAD TEMP[3].x, TEMP[2].xxxx, IMM[0].wwww, TEMP[0].xxxx
 11: MUL TEMP[4].x, TEMP[1].xxxx, IMM[1].xxxx
 12: ADD TEMP[4].x, TEMP[0].xxxx, -TEMP[4].xxxx
 13: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy
 14: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx
 15: MOV TEMP[3].y, TEMP[2].xxxx
 16: MAD TEMP[0].x, TEMP[1].xxxx, IMM[1].zzzz, TEMP[0].xxxx
 17: MOV TEMP[3].z, TEMP[0].xxxx
 18: MOV TEMP[0].w, IMM[1].wwww
 19: MOV_SAT TEMP[0].xyz, TEMP[3].xyzz
 20: MOV OUT[0], TEMP[0]
 21: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !0
  %26 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0
  %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !0
  %30 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %34 = bitcast float %32 to i32
  %35 = bitcast float %33 to i32
  %36 = insertelement <2 x i32> undef, i32 %34, i32 0
  %37 = insertelement <2 x i32> %36, i32 %35, i32 1
  %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %21, <16 x i8> %23, i32 2)
  %39 = extractelement <4 x float> %38, i32 0
  %40 = fadd float %39, -6.250000e-02
  %41 = fmul float 0x3FF2A0F900000000, %40
  %42 = bitcast float %32 to i32
  %43 = bitcast float %33 to i32
  %44 = insertelement <2 x i32> undef, i32 %42, i32 0
  %45 = insertelement <2 x i32> %44, i32 %43, i32 1
  %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %25, <16 x i8> %27, i32 2)
  %47 = extractelement <4 x float> %46, i32 0
  %48 = fadd float %47, -5.000000e-01
  %49 = bitcast float %32 to i32
  %50 = bitcast float %33 to i32
  %51 = insertelement <2 x i32> undef, i32 %49, i32 0
  %52 = insertelement <2 x i32> %51, i32 %50, i32 1
  %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %29, <16 x i8> %31, i32 2)
  %54 = extractelement <4 x float> %53, i32 0
  %55 = fadd float %54, -5.000000e-01
  %56 = fmul float %55, 0x3FF98865A0000000
  %57 = fadd float %56, %41
  %58 = fmul float %48, 0x3FD9121AC0000000
  %59 = fsub float -0.000000e+00, %58
  %60 = fadd float %41, %59
  %61 = fmul float %55, 0x3FEA0346E0000000
  %62 = fsub float -0.000000e+00, %61
  %63 = fadd float %60, %62
  %64 = fmul float %48, 0x400022D0E0000000
  %65 = fadd float %64, %41
  %66 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00)
  %67 = call float @llvm.AMDIL.clamp.(float %63, float 0.000000e+00, float 1.000000e+00)
  %68 = call float @llvm.AMDIL.clamp.(float %65, float 0.000000e+00, float 1.000000e+00)
  %69 = call i32 @llvm.SI.packf16(float %66, float %67)
  %70 = bitcast i32 %69 to float
  %71 = call i32 @llvm.SI.packf16(float %68, float 1.000000e+00)
  %72 = bitcast i32 %71 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %70, float %72, float %70, float %72)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0840308
c0c60510
bf8c007f
f0800100
00430002
bf8c0770
060000f1
c0840300
c0c60500
bf8c007f
f0800100
00430102
bf8c0770
060802ff
bd800000
100208ff
3f9507c8
7e0a02ff
3fcc432d
d2820005
04060b00
d2060805
02010105
c0800304
c0c20508
bf8c007f
f0800100
00010202
bf8c0770
060404f1
100604ff
bec890d6
7e0c02ff
3f9507c8
d2820003
040e0d04
7e0802ff
bf501a37
d2820000
040e0900
d2060800
02010100
5e000105
7e0602ff
40011687
d2820001
04060702
d2060801
02010101
d25e0001
0201e501
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].z, IN[0].xyzx
  2: MOV TEMP[0].zw, TEMP[0].wwzw
  3: MUL TEMP[0].xy, IN[0].xyyy, CONST[1].xyyy
  4: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  5: MOV OUT[1], TEMP[1]
  6: MOV OUT[0], TEMP[0]
  7: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %5)
  %20 = extractelement <4 x float> %19, i32 0
  %21 = extractelement <4 x float> %19, i32 1
  %22 = extractelement <4 x float> %19, i32 2
  %23 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0
  %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %5)
  %26 = extractelement <4 x float> %25, i32 0
  %27 = extractelement <4 x float> %25, i32 1
  %28 = fmul float %20, %15
  %29 = fmul float %21, %16
  %30 = fmul float %26, %11
  %31 = fadd float %30, %13
  %32 = fmul float %27, %12
  %33 = fadd float %32, %14
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %31, float %33, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %28, float %29, float %22, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020101
c2028103
bf8c007f
7e0a0205
d2820005
04140902
c2020100
c2028102
bf8c007f
7e0c0205
d2820001
04180901
7e040280
f800020f
02020501
c0820700
bf8c000f
e00c2000
80010000
c2020105
bf8c0070
10080204
c2000104
bf8c007f
100a0000
7e0c02f2
f80008cf
06020405
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
  %21 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %3)
  %22 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %3)
  %23 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %3)
  %24 = call i32 @llvm.SI.packf16(float %20, float %21)
  %25 = bitcast i32 %24 to float
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %25, float %27, float %25, float %27)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8020302
c8060202
5e000101
c8060102
c80a0002
5e020302
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  %15 = extractelement <4 x float> %11, i32 3
  %16 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %5)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = extractelement <4 x float> %18, i32 3
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %19, float %20, float %21, float %22)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float %15)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800704
bf8c007f
e00c2000
80000100
bf8c0770
f800020f
04030201
c0800700
bf8c000f
e00c2000
80000000
bf8c0770
f80008cf
03020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV OUT[0], TEMP[0]
  3: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 0
  %32 = extractelement <4 x float> %30, i32 1
  %33 = extractelement <4 x float> %30, i32 2
  %34 = extractelement <4 x float> %30, i32 3
  %35 = call i32 @llvm.SI.packf16(float %31, float %32)
  %36 = bitcast i32 %35 to float
  %37 = call i32 @llvm.SI.packf16(float %33, float %34)
  %38 = bitcast i32 %37 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %36, float %38, float %36, float %38)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800f00
00010002
bf8c0770
5e080702
5e000300
f8001c0f
04000400
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..14]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  3: MAD TEMP[2].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww
  4: MOV TEMP[1].zw, TEMP[2].yyxy
  5: MOV OUT[1], TEMP[1]
  6: MOV OUT[0], TEMP[0]
  7: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0
  %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %5)
  %22 = extractelement <4 x float> %21, i32 0
  %23 = extractelement <4 x float> %21, i32 1
  %24 = extractelement <4 x float> %21, i32 2
  %25 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %5)
  %28 = extractelement <4 x float> %27, i32 0
  %29 = extractelement <4 x float> %27, i32 1
  %30 = fmul float %28, %11
  %31 = fadd float %30, %13
  %32 = fmul float %29, %12
  %33 = fadd float %32, %14
  %34 = fmul float %28, %15
  %35 = fadd float %34, %17
  %36 = fmul float %29, %16
  %37 = fadd float %36, %18
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %31, float %33, float %35, float %37)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %22, float %23, float %24, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020105
c2028107
bf8c007f
7e0a0205
d2820005
04140902
c2020104
c2028106
bf8c007f
7e0c0205
d2820006
04180901
c2020101
c2028103
bf8c007f
7e0e0205
d2820007
041c0902
c2020100
c2000102
bf8c007f
7e100200
d2820001
04200901
f800020f
05060701
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MUL TEMP[0], IN[0], TEMP[0]
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %24, %35
  %40 = fmul float %25, %36
  %41 = fmul float %26, %37
  %42 = fmul float %27, %38
  %43 = call i32 @llvm.SI.packf16(float %39, float %40)
  %44 = bitcast i32 %43 to float
  %45 = call i32 @llvm.SI.packf16(float %41, float %42)
  %46 = bitcast i32 %45 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0b06
c81c0200
c81d0201
100e0907
5e0c0d07
c81c0100
c81d0101
100e0707
c8200000
c8210001
10000508
5e000f00
f8001c0f
06000600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %21 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %22 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %23 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %24 = call i32 @llvm.SI.packf16(float %20, float %21)
  %25 = bitcast i32 %24 to float
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %25, float %27, float %25, float %27)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8080300
c8090301
c80c0200
c80d0201
5e040503
c80c0100
c80d0101
c8100000
c8110001
5e000704
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..69]
DCL TEMP[0..2], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    2.0000,     1.0000,     0.0000,     4.0000}
  0: MAD TEMP[0].x, IMM[0].xxxx, IN[0].zzzz, IMM[0].yyyy
  1: F2I TEMP[0].x, TEMP[0].xxxx
  2: UARL ADDR[0].x, TEMP[0].xxxx
  3: MOV TEMP[0].xyz, CONST[ADDR[0].x+6]
  4: MUL TEMP[1].x, IMM[0].xxxx, IN[0].zzzz
  5: F2I TEMP[1].x, TEMP[1].xxxx
  6: UARL ADDR[0].x, TEMP[1].xxxx
  7: UARL ADDR[0].x, TEMP[1].xxxx
  8: MAD TEMP[1].xyz, TEMP[0].xyzz, IN[0].xxxx, CONST[ADDR[0].x+6].xyzz
  9: MUL TEMP[2].xyz, TEMP[0].zxyy, IMM[0].yzzz
 10: MAD TEMP[0].xyz, TEMP[0].yzxx, IMM[0].zzyy, -TEMP[2].xyzz
 11: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz
 12: RSQ TEMP[2].x, TEMP[2].xxxx
 13: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx
 14: MUL TEMP[2].x, IMM[0].wwww, IN[0].yyyy
 15: MUL TEMP[2].x, CONST[0].xxxx, TEMP[2].xxxx
 16: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx, TEMP[1].xyzz
 17: MUL TEMP[1], CONST[2], TEMP[0].xxxx
 18: MAD TEMP[1], CONST[3], TEMP[0].yyyy, TEMP[1]
 19: MAD TEMP[0], CONST[4], TEMP[0].zzzz, TEMP[1]
 20: ADD TEMP[0], TEMP[0], CONST[5]
 21: MOV OUT[1], CONST[1]
 22: MOV OUT[0], TEMP[0]
 23: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %32 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0
  %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %5)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = fmul float 2.000000e+00, %37
  %39 = fadd float %38, 1.000000e+00
  %40 = fptosi float %39 to i32
  %41 = bitcast i32 %40 to float
  %42 = bitcast float %41 to i32
  %43 = shl i32 %42, 4
  %44 = add i32 %43, 96
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %44)
  %46 = shl i32 %42, 4
  %47 = add i32 %46, 100
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %47)
  %49 = shl i32 %42, 4
  %50 = add i32 %49, 104
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %50)
  %52 = fmul float 2.000000e+00, %37
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = shl i32 %55, 4
  %57 = add i32 %56, 96
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %57)
  %59 = fmul float %45, %35
  %60 = fadd float %59, %58
  %61 = shl i32 %55, 4
  %62 = add i32 %61, 100
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %62)
  %64 = fmul float %48, %35
  %65 = fadd float %64, %63
  %66 = shl i32 %55, 4
  %67 = add i32 %66, 104
  %68 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %67)
  %69 = fmul float %51, %35
  %70 = fadd float %69, %68
  %71 = fmul float %51, 1.000000e+00
  %72 = fmul float %45, 0.000000e+00
  %73 = fmul float %48, 0.000000e+00
  %74 = fsub float -0.000000e+00, %71
  %75 = fmul float %48, 0.000000e+00
  %76 = fadd float %75, %74
  %77 = fsub float -0.000000e+00, %72
  %78 = fmul float %51, 0.000000e+00
  %79 = fadd float %78, %77
  %80 = fsub float -0.000000e+00, %73
  %81 = fmul float %45, 1.000000e+00
  %82 = fadd float %81, %80
  %83 = fmul float %76, %76
  %84 = fmul float %79, %79
  %85 = fadd float %84, %83
  %86 = fmul float %82, %82
  %87 = fadd float %85, %86
  %88 = call float @llvm.AMDGPU.rsq(float %87)
  %89 = fmul float %76, %88
  %90 = fmul float %79, %88
  %91 = fmul float %82, %88
  %92 = fmul float 4.000000e+00, %36
  %93 = fmul float %11, %92
  %94 = fmul float %89, %93
  %95 = fadd float %94, %60
  %96 = fmul float %90, %93
  %97 = fadd float %96, %65
  %98 = fmul float %91, %93
  %99 = fadd float %98, %70
  %100 = fmul float %16, %95
  %101 = fmul float %17, %95
  %102 = fmul float %18, %95
  %103 = fmul float %19, %95
  %104 = fmul float %20, %97
  %105 = fadd float %104, %100
  %106 = fmul float %21, %97
  %107 = fadd float %106, %101
  %108 = fmul float %22, %97
  %109 = fadd float %108, %102
  %110 = fmul float %23, %97
  %111 = fadd float %110, %103
  %112 = fmul float %24, %99
  %113 = fadd float %112, %105
  %114 = fmul float %25, %99
  %115 = fadd float %114, %107
  %116 = fmul float %26, %99
  %117 = fadd float %116, %109
  %118 = fmul float %27, %99
  %119 = fadd float %118, %111
  %120 = fadd float %113, %28
  %121 = fadd float %115, %29
  %122 = fadd float %117, %30
  %123 = fadd float %119, %31
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %12, float %13, float %14, float %15)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %120, float %121, float %122, float %123)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020107
c2028106
c2040105
c2048104
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0820700
bf8c000f
e00c2000
80010000
bf8c0770
060c0502
7e081106
34080884
4a0a08ff
00000064
e0301000
80000505
060c0cf2
7e0c1106
340e0c84
4a0c0eff
00000064
e0301000
80000806
bf8c0770
d2820005
04160108
4a0c0eff
00000060
e0301000
80000606
bf8c0770
10120c80
4a0e0eff
00000068
e0301000
80000707
bf8c0770
10140e80
0816130a
10101080
08140f08
1012150a
d2820009
0426170b
08101106
d2820009
04261108
7e125b09
1018130b
101602f6
c2020100
bf8c007f
10161604
d2820005
0416170c
4a1808ff
00000060
e0301000
80000c0c
bf8c0770
d2820006
04320106
1014130a
d2820006
041a170a
c202010b
bf8c007f
10140c04
c202010f
bf8c007f
d282000a
042a0a04
4a0808ff
00000068
e0301000
80000404
bf8c0770
d2820000
04120107
10021308
d2820000
04021701
c2020113
bf8c007f
d2820001
042a0004
c2020117
bf8c007f
06020204
c202010a
bf8c007f
10040c04
c202010e
bf8c007f
d2820002
040a0a04
c2020112
bf8c007f
d2820002
040a0004
c2020116
bf8c007f
06040404
c2020109
bf8c007f
10060c04
c202010d
bf8c007f
d2820003
040e0a04
c2020111
bf8c007f
d2820003
040e0004
c2020115
bf8c007f
06060604
c2020108
bf8c007f
10080c04
c202010c
bf8c007f
d2820004
04120a04
c2020110
bf8c007f
d2820000
04120004
c2000114
bf8c007f
06000000
f80008cf
01020300
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[2..14]
DCL TEMP[0]
DCL TEMP[1..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.0000,     0.0100}
IMM[1] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[4].xxxx
  2: MOV TEMP[1].xy, IN[3].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP4 TEMP[3].x, IMM[0].yyyy, CONST[12]
  9: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[11].xxxx
 10: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 11: RSQ TEMP[4].x, TEMP[4].xxxx
 12: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 14: DP4 TEMP[2].x, IMM[0].yyyy, CONST[13]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[11].yyyy
 16: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].yyyy
 17: MOV TEMP[4].w, TEMP[2]
 18: DP3 TEMP[5].x, TEMP[1].xyzz, CONST[3].xyzz
 19: ADD TEMP[5].x, TEMP[5].xxxx, CONST[7].wwww
 20: MOV_SAT TEMP[6].x, TEMP[5].xxxx
 21: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[5].xyzz, CONST[6].xyzz
 22: MOV_SAT TEMP[7].x, -TEMP[5].xxxx
 23: LRP TEMP[7].xyz, TEMP[7].xxxx, CONST[7].xyzz, CONST[6].xyzz
 24: SLT TEMP[8].x, TEMP[5].xxxx, IMM[0].zzzz
 25: F2I TEMP[8].x, -TEMP[8]
 26: UIF TEMP[8].xxxx :1
 27:   MOV TEMP[7].xyz, TEMP[7].xyzx
 28: ELSE :1
 29:   MOV TEMP[7].xyz, TEMP[6].xyzx
 30: ENDIF
 31: DP3 TEMP[6].x, IN[2].xyzz, IN[2].xyzz
 32: RSQ TEMP[6].x, TEMP[6].xxxx
 33: MUL TEMP[6].xyz, IN[2].xyzz, TEMP[6].xxxx
 34: ADD TEMP[6].xyz, CONST[4].xyzz, -TEMP[6].xyzz
 35: DP3 TEMP[8].x, TEMP[6].xyzz, TEMP[6].xyzz
 36: RSQ TEMP[8].x, TEMP[8].xxxx
 37: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xxxx
 38: DP3_SAT TEMP[6].x, TEMP[6].xyzz, TEMP[1].xyzz
 39: POW TEMP[6].x, TEMP[6].xxxx, CONST[10].xxxx
 40: SGE TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 41: F2I TEMP[5].x, -TEMP[5]
 42: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
 43: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx
 44: MUL TEMP[5].xyz, CONST[8].xyzz, TEMP[5].xxxx
 45: MOV TEMP[1].xyz, TEMP[1].xyzz
 46: TEX TEMP[1].xyz, TEMP[1], SAMP[1], CUBE
 47: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[7].xyzz
 48: MOV TEMP[6].xy, IN[3].xyyy
 49: TEX TEMP[6], TEMP[6], SAMP[0], 2D
 50: MUL TEMP[6], TEMP[6], CONST[9]
 51: LRP TEMP[1].xyz, TEMP[6].wwww, TEMP[6].xyzz, TEMP[1].xyzz
 52: LRP TEMP[5].xyz, TEMP[6].wwww, IMM[0].zzzz, TEMP[5].xyzz
 53: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz
 54: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[1].xyzz
 55: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 56: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 57: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[4].xyzz, CONST[2].xyzz
 58: MOV OUT[0], TEMP[4]
 59: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %85 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %87 = bitcast float %83 to i32
  %88 = icmp ne i32 %87, 0
  %. = select i1 %88, float -1.000000e+00, float 1.000000e+00
  %89 = fmul float 1.000000e+00, %52
  %90 = fmul float 1.000000e+00, %53
  %91 = fadd float %89, %90
  %92 = fmul float 1.000000e+00, %54
  %93 = fadd float %91, %92
  %94 = fmul float 1.000000e+00, %55
  %95 = fadd float %93, %94
  %96 = fadd float %95, %50
  %97 = call float @llvm.AMDIL.clamp.(float %96, float 0.000000e+00, float 1.000000e+00)
  %98 = fmul float %80, %80
  %99 = fmul float %81, %81
  %100 = fadd float %99, %98
  %101 = fmul float %82, %82
  %102 = fadd float %100, %101
  %103 = call float @llvm.AMDGPU.rsq(float %102)
  %104 = fmul float %80, %103
  %105 = fmul float %81, %103
  %106 = fmul float %82, %103
  %107 = fmul float %104, %.
  %108 = fmul float %105, %.
  %109 = fmul float %106, %.
  %110 = fmul float 1.000000e+00, %56
  %111 = fmul float 1.000000e+00, %57
  %112 = fadd float %110, %111
  %113 = fmul float 1.000000e+00, %58
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %59
  %116 = fadd float %114, %115
  %117 = fadd float %116, %51
  %118 = call float @llvm.AMDIL.clamp.(float %117, float 0.000000e+00, float 1.000000e+00)
  %119 = call float @llvm.AMDGPU.lrp(float %118, float %70, float 1.000000e+00)
  %120 = call float @llvm.AMDGPU.lrp(float %118, float %71, float 1.000000e+00)
  %121 = call float @llvm.AMDGPU.lrp(float %118, float %72, float 1.000000e+00)
  %122 = call float @llvm.AMDGPU.lrp(float %118, float %73, float 1.000000e+00)
  %123 = fmul float %107, %26
  %124 = fmul float %108, %27
  %125 = fadd float %124, %123
  %126 = fmul float %109, %28
  %127 = fadd float %125, %126
  %128 = fadd float %127, %41
  %129 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00)
  %130 = call float @llvm.AMDGPU.lrp(float %129, float %32, float %35)
  %131 = call float @llvm.AMDGPU.lrp(float %129, float %33, float %36)
  %132 = call float @llvm.AMDGPU.lrp(float %129, float %34, float %37)
  %133 = fsub float -0.000000e+00, %128
  %134 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00)
  %135 = call float @llvm.AMDGPU.lrp(float %134, float %38, float %35)
  %136 = call float @llvm.AMDGPU.lrp(float %134, float %39, float %36)
  %137 = call float @llvm.AMDGPU.lrp(float %134, float %40, float %37)
  %138 = fcmp ult float %128, 0.000000e+00
  %139 = select i1 %138, float 1.000000e+00, float 0.000000e+00
  %140 = fsub float -0.000000e+00, %139
  %141 = fptosi float %140 to i32
  %142 = bitcast i32 %141 to float
  %143 = bitcast float %142 to i32
  %144 = icmp ne i32 %143, 0
  %temp28.0 = select i1 %144, float %135, float %130
  %temp29.0 = select i1 %144, float %136, float %131
  %temp30.0 = select i1 %144, float %137, float %132
  %145 = fmul float %74, %74
  %146 = fmul float %75, %75
  %147 = fadd float %146, %145
  %148 = fmul float %76, %76
  %149 = fadd float %147, %148
  %150 = call float @llvm.AMDGPU.rsq(float %149)
  %151 = fmul float %74, %150
  %152 = fmul float %75, %150
  %153 = fmul float %76, %150
  %154 = fsub float -0.000000e+00, %151
  %155 = fadd float %29, %154
  %156 = fsub float -0.000000e+00, %152
  %157 = fadd float %30, %156
  %158 = fsub float -0.000000e+00, %153
  %159 = fadd float %31, %158
  %160 = fmul float %155, %155
  %161 = fmul float %157, %157
  %162 = fadd float %161, %160
  %163 = fmul float %159, %159
  %164 = fadd float %162, %163
  %165 = call float @llvm.AMDGPU.rsq(float %164)
  %166 = fmul float %155, %165
  %167 = fmul float %157, %165
  %168 = fmul float %159, %165
  %169 = fmul float %166, %107
  %170 = fmul float %167, %108
  %171 = fadd float %170, %169
  %172 = fmul float %168, %109
  %173 = fadd float %171, %172
  %174 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00)
  %175 = call float @llvm.pow.f32(float %174, float %49)
  %176 = fcmp uge float %128, 0x3F847AE140000000
  %177 = select i1 %176, float 1.000000e+00, float 0.000000e+00
  %178 = fsub float -0.000000e+00, %177
  %179 = fptosi float %178 to i32
  %180 = bitcast i32 %179 to float
  %181 = bitcast float %180 to i32
  %182 = and i32 %181, 1065353216
  %183 = bitcast i32 %182 to float
  %184 = fmul float %175, %183
  %185 = fmul float %42, %184
  %186 = fmul float %43, %184
  %187 = fmul float %44, %184
  %188 = insertelement <4 x float> undef, float %107, i32 0
  %189 = insertelement <4 x float> %188, float %108, i32 1
  %190 = insertelement <4 x float> %189, float %109, i32 2
  %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3
  %192 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %191)
  %193 = extractelement <4 x float> %192, i32 0
  %194 = extractelement <4 x float> %192, i32 1
  %195 = extractelement <4 x float> %192, i32 2
  %196 = extractelement <4 x float> %192, i32 3
  %197 = call float @fabs(float %195)
  %198 = fdiv float 1.000000e+00, %197
  %199 = fmul float %193, %198
  %200 = fadd float %199, 1.500000e+00
  %201 = fmul float %194, %198
  %202 = fadd float %201, 1.500000e+00
  %203 = bitcast float %202 to i32
  %204 = bitcast float %200 to i32
  %205 = bitcast float %196 to i32
  %206 = insertelement <4 x i32> undef, i32 %203, i32 0
  %207 = insertelement <4 x i32> %206, i32 %204, i32 1
  %208 = insertelement <4 x i32> %207, i32 %205, i32 2
  %209 = insertelement <4 x i32> %208, i32 undef, i32 3
  %210 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %209, <32 x i8> %65, <16 x i8> %67, i32 4)
  %211 = extractelement <4 x float> %210, i32 0
  %212 = extractelement <4 x float> %210, i32 1
  %213 = extractelement <4 x float> %210, i32 2
  %214 = fmul float %211, 4.000000e+00
  %215 = fadd float %214, %temp28.0
  %216 = fmul float %212, 4.000000e+00
  %217 = fadd float %216, %temp29.0
  %218 = fmul float %213, 4.000000e+00
  %219 = fadd float %218, %temp30.0
  %220 = bitcast float %78 to i32
  %221 = bitcast float %79 to i32
  %222 = insertelement <2 x i32> undef, i32 %220, i32 0
  %223 = insertelement <2 x i32> %222, i32 %221, i32 1
  %224 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %61, <16 x i8> %63, i32 2)
  %225 = extractelement <4 x float> %224, i32 0
  %226 = extractelement <4 x float> %224, i32 1
  %227 = extractelement <4 x float> %224, i32 2
  %228 = extractelement <4 x float> %224, i32 3
  %229 = fmul float %225, %45
  %230 = fmul float %226, %46
  %231 = fmul float %227, %47
  %232 = fmul float %228, %48
  %233 = call float @llvm.AMDGPU.lrp(float %232, float %229, float %215)
  %234 = call float @llvm.AMDGPU.lrp(float %232, float %230, float %217)
  %235 = call float @llvm.AMDGPU.lrp(float %232, float %231, float %219)
  %236 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %185)
  %237 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %186)
  %238 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %187)
  %239 = fmul float %119, %233
  %240 = fmul float %120, %234
  %241 = fmul float %121, %235
  %242 = fmul float %236, %97
  %243 = fadd float %242, %239
  %244 = fmul float %237, %97
  %245 = fadd float %244, %240
  %246 = fmul float %238, %97
  %247 = fadd float %246, %241
  %248 = fcmp uge float %77, %25
  %249 = select i1 %248, float %77, float %25
  %250 = call float @llvm.AMDIL.clamp.(float %249, float 0.000000e+00, float 1.000000e+00)
  %251 = call float @llvm.AMDGPU.lrp(float %250, float %243, float %22)
  %252 = call float @llvm.AMDGPU.lrp(float %250, float %245, float %23)
  %253 = call float @llvm.AMDGPU.lrp(float %250, float %247, float %24)
  %254 = call i32 @llvm.SI.packf16(float %251, float %252)
  %255 = bitcast i32 %254 to float
  %256 = call i32 @llvm.SI.packf16(float %253, float %122)
  %257 = bitcast i32 %256 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %255, float %257, float %255, float %257)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100b00
c8110b01
c80c0a00
c80d0a01
100a0703
d2820006
04160904
c8140c00
c8150c01
d2820006
041a0b05
7e0c5b06
10080d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000002
0021e6f2
10180504
10060d03
10160503
10060d05
101a0503
7e1c0280
d28a0003
0436190b
d28c0002
0436190b
d28e0004
0436190b
d2880005
0436190b
d206010a
02010104
7e14550a
7e1e02ff
3fc00000
d2820004
043e1502
d2820003
043e1503
c0840304
c0c60508
bf8c007f
f0800700
00430203
c0840100
bf8c0070
c200090c
bf8c007f
100a1600
c200090d
bf8c007f
d2820005
0414010c
c200090e
bf8c007f
d2820005
0414010d
c200091f
bf8c007f
062a0a00
d2060805
02010115
080c0af2
c2000919
bf8c007f
100e0c00
c2008915
bf8c007f
d2820009
041c0305
d2060007
22010115
d2060807
02010107
08100ef2
10141000
c200091d
bf8c007f
d282000a
04280107
d0020000
02010115
d200000f
0001e480
d206000f
2201010f
7e1e110f
d10a0000
0201010f
d2000009
00021509
d2820013
0425ed03
c8280900
c8290901
c8240800
c8250801
c0860300
c0c80500
bf8c007f
f0800f00
00640f09
c2010927
bf8c0070
10122402
081412f2
1026270a
c2010925
bf8c007f
10282002
d2820016
044e2909
c85c0100
c85d0101
c2010934
c2018935
bf8c007f
7e260203
d2060013
02022602
c2010936
bf8c007f
06262602
c2010937
bf8c007f
06262602
c201092d
bf8c007f
06262602
d2060813
02010113
082826f2
d2820017
04522f13
102c2d17
c85c0500
c85d0501
c8600400
c8610401
10323118
d282001a
04662f17
c8640600
c8650601
d282001a
046a3319
7e345b1a
102e3517
c2010911
bf8c007f
082e2e02
10303518
c2010910
bf8c007f
08303002
10363118
d282001b
046e2f17
10323519
c2010912
bf8c007f
08323202
d282001a
046e3319
7e345b1a
102e3517
10303518
10301718
d2820017
04621917
10303519
d282000b
045e1b18
d206080b
0201010b
7e164f0b
c2010928
bf8c007f
0e161602
7e164b0b
7e1802ff
3c23d70a
d00c0002
02021915
d200000c
0009e480
d206000c
2201010c
7e18110c
361818f2
1016190b
c2010921
bf8c007f
10181602
1018190a
d282000d
04310109
c2010930
c2018931
bf8c007f
7e180203
d206000c
02021802
c2010932
bf8c007f
06181802
c2010933
bf8c007f
06181802
c201092c
bf8c007f
06181802
d206080c
0201010c
d2820015
045a190d
c8340700
c8350701
c201090b
bf8c007f
d00c0004
0200050d
7e1c0202
d200000d
00121b0e
d206080d
0201010d
081c1af2
c2010909
bf8c007f
102c1c02
d2820015
045a2b0d
c2010918
bf8c007f
102c0c02
c2018914
bf8c007f
d2820016
04580705
102e1002
c201091c
bf8c007f
d2820017
045c0507
d2000016
00022f16
d2820016
0459ed02
102c2d0a
c2010924
bf8c007f
102e1e02
d2820016
045a2f09
c85c0000
c85d0001
d2820017
04522f13
102c2d17
c2010920
bf8c007f
102e1602
102e2f0a
d2820017
045d0109
d2820016
045a1917
c2010908
bf8c007f
102e1c02
d2820016
045e2d0d
5e2a2b16
c201091a
bf8c007f
100c0c02
c2018916
bf8c007f
d2820005
04180705
100c1002
c201091e
bf8c007f
d2820006
04180507
d2000005
00020d05
d2820002
0415ed04
1004050a
c2000926
bf8c007f
10062200
d2820002
040a0709
c80c0200
c80d0201
d2820003
04520713
10040503
c2000922
bf8c007f
10061600
1006070a
d2820003
040d0109
d2820002
040a1903
c200090a
bf8c007f
10061c00
d2820002
040e050d
c80c0300
c80d0301
d2820000
04520713
5e000102
f8001c0f
00150015
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..15]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2].xyz, IN[1].xyzz, CONST[13].wwww
  6: MUL TEMP[3], CONST[0], TEMP[2].xxxx
  7: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
  8: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
  9: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 10: RSQ TEMP[3].x, TEMP[3].xxxx
 11: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 12: MUL TEMP[3], CONST[0], TEMP[0].xxxx
 13: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
 14: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 15: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 16: MOV TEMP[3].w, IMM[0].yyyy
 17: MOV TEMP[3].xyz, TEMP[0].xyzx
 18: MOV TEMP[4].w, IMM[0].yyyy
 19: MOV TEMP[4].xyz, TEMP[0].xyzx
 20: DP4 TEMP[3].x, CONST[14], TEMP[3]
 21: DP4 TEMP[4].x, CONST[15], TEMP[4]
 22: MOV TEMP[3].y, TEMP[4].xxxx
 23: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[11].xyzz
 24: MAD TEMP[4].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 25: MOV TEMP[0].w, TEMP[4].xxxx
 26: MOV TEMP[4].zw, TEMP[2].yyxy
 27: MOV TEMP[2].x, TEMP[2].zzzz
 28: MOV TEMP[4].xy, TEMP[3].xyxx
 29: MOV OUT[1], CONST[8]
 30: MOV OUT[4], TEMP[2]
 31: MOV OUT[2], TEMP[0]
 32: MOV OUT[0], TEMP[1]
 33: MOV OUT[3], TEMP[4]
 34: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = fmul float %66, %46
  %76 = fadd float %75, %43
  %77 = fmul float %67, %47
  %78 = fadd float %77, %44
  %79 = fmul float %68, %48
  %80 = fadd float %79, %45
  %81 = fmul float %23, %76
  %82 = fmul float %24, %76
  %83 = fmul float %25, %76
  %84 = fmul float %26, %76
  %85 = fmul float %27, %78
  %86 = fadd float %85, %81
  %87 = fmul float %28, %78
  %88 = fadd float %87, %82
  %89 = fmul float %29, %78
  %90 = fadd float %89, %83
  %91 = fmul float %30, %78
  %92 = fadd float %91, %84
  %93 = fmul float %31, %80
  %94 = fadd float %93, %86
  %95 = fmul float %32, %80
  %96 = fadd float %95, %88
  %97 = fmul float %33, %80
  %98 = fadd float %97, %90
  %99 = fmul float %34, %80
  %100 = fadd float %99, %92
  %101 = fadd float %94, %35
  %102 = fadd float %96, %36
  %103 = fadd float %98, %37
  %104 = fadd float %100, %38
  %105 = fmul float %72, %54
  %106 = fmul float %73, %54
  %107 = fmul float %74, %54
  %108 = fmul float %11, %105
  %109 = fmul float %12, %105
  %110 = fmul float %13, %105
  %111 = fmul float %14, %106
  %112 = fadd float %111, %108
  %113 = fmul float %15, %106
  %114 = fadd float %113, %109
  %115 = fmul float %16, %106
  %116 = fadd float %115, %110
  %117 = fmul float %17, %107
  %118 = fadd float %117, %112
  %119 = fmul float %18, %107
  %120 = fadd float %119, %114
  %121 = fmul float %19, %107
  %122 = fadd float %121, %116
  %123 = fmul float %118, %118
  %124 = fmul float %120, %120
  %125 = fadd float %124, %123
  %126 = fmul float %122, %122
  %127 = fadd float %125, %126
  %128 = call float @llvm.AMDGPU.rsq(float %127)
  %129 = fmul float %118, %128
  %130 = fmul float %120, %128
  %131 = fmul float %122, %128
  %132 = fmul float %11, %76
  %133 = fmul float %12, %76
  %134 = fmul float %13, %76
  %135 = fmul float %14, %78
  %136 = fadd float %135, %132
  %137 = fmul float %15, %78
  %138 = fadd float %137, %133
  %139 = fmul float %16, %78
  %140 = fadd float %139, %134
  %141 = fmul float %17, %80
  %142 = fadd float %141, %136
  %143 = fmul float %18, %80
  %144 = fadd float %143, %138
  %145 = fmul float %19, %80
  %146 = fadd float %145, %140
  %147 = fadd float %142, %20
  %148 = fadd float %144, %21
  %149 = fadd float %146, %22
  %150 = fmul float %55, %147
  %151 = fmul float %56, %148
  %152 = fadd float %150, %151
  %153 = fmul float %57, %149
  %154 = fadd float %152, %153
  %155 = fmul float %58, 1.000000e+00
  %156 = fadd float %154, %155
  %157 = fmul float %59, %147
  %158 = fmul float %60, %148
  %159 = fadd float %157, %158
  %160 = fmul float %61, %149
  %161 = fadd float %159, %160
  %162 = fmul float %62, 1.000000e+00
  %163 = fadd float %161, %162
  %164 = fsub float -0.000000e+00, %49
  %165 = fadd float %147, %164
  %166 = fsub float -0.000000e+00, %50
  %167 = fadd float %148, %166
  %168 = fsub float -0.000000e+00, %51
  %169 = fadd float %149, %168
  %170 = fmul float %103, %52
  %171 = fadd float %170, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %165, float %167, float %169, float %171)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %156, float %163, float %129, float %130)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %131, float %130, float %131, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %101, float %102, float %103, float %104)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c2020129
c2028125
bf8c0070
7e020205
d2820001
04040904
c2020128
c2028124
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012a
c2028126
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820008
04140904
c2020102
bf8c007f
100a0404
c2028106
bf8c007f
d2820005
04160205
c204010a
bf8c007f
d2820005
04160608
c204810e
bf8c007f
060a0a09
c204812e
bf8c007f
0a120a09
c2048101
bf8c007f
100c0409
c2058105
bf8c007f
d2820006
041a020b
c2050109
bf8c007f
d2820006
041a060a
c206010d
bf8c007f
060c0c0c
c206012d
bf8c007f
0a140c0c
c2060100
bf8c007f
100e040c
c2068104
bf8c007f
d2820007
041e020d
c2070108
bf8c007f
d2820007
041e060e
c207810c
bf8c007f
060e0e0f
c207812c
bf8c007f
0a160e0f
f800021f
08090a0b
c0880704
bf8c000f
e00c2000
80040900
c2030137
bf8c0070
10001406
10101206
101a1009
d282000d
0436000b
10121606
d282000a
0436120a
1016100c
d282000b
042e000d
d282000b
042e120e
1018170b
d282000c
0432150a
10101004
d2820000
04220005
d2820000
04021208
d2820008
04320100
7e125b08
1010130a
1014130b
c202013d
bf8c007f
10160c04
c202013c
bf8c007f
d282000b
042e0e04
c202013e
bf8c007f
d282000b
042e0a04
c202013f
bf8c007f
06161604
c2020139
bf8c007f
100c0c04
c2020138
bf8c007f
d2820006
041a0e04
c202013a
bf8c007f
d2820005
041a0a04
c202013b
bf8c007f
060a0a04
f800022f
080a0b05
10001300
bf8c070f
7e0a0280
f800023f
05000800
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MOV OUT[0], TEMP[1]
 28: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %42 = load <32 x i8> addrspace(2)* %41, !tbaa !0
  %43 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %56 = fmul float %13, %39
  %57 = fadd float %56, %40
  %58 = bitcast float %54 to i32
  %59 = bitcast float %55 to i32
  %60 = insertelement <2 x i32> undef, i32 %58, i32 0
  %61 = insertelement <2 x i32> %60, i32 %59, i32 1
  %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %42, <16 x i8> %44, i32 2)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = fmul float %63, 0x3FCB367A00000000
  %68 = fmul float %64, 0x3FE6E2EB20000000
  %69 = fadd float %68, %67
  %70 = fmul float %65, 0x3FB27BB300000000
  %71 = fadd float %69, %70
  %72 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %63)
  %73 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %64)
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %65)
  %75 = fmul float %72, %35
  %76 = fmul float %73, %36
  %77 = fadd float %75, %76
  %78 = fmul float %74, %37
  %79 = fadd float %77, %78
  %80 = fmul float %66, %38
  %81 = fadd float %79, %80
  %82 = fadd float %81, %30
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDGPU.lrp(float %83, float %49, float 1.000000e+00)
  %85 = call float @llvm.AMDGPU.lrp(float %83, float %50, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %83, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %83, float %52, float 1.000000e+00)
  %88 = fmul float %72, %84
  %89 = fmul float %73, %85
  %90 = fmul float %74, %86
  %91 = fmul float %66, %87
  %92 = fmul float %12, %22
  %93 = fmul float %57, %23
  %94 = bitcast float %92 to i32
  %95 = bitcast float %93 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %46, <16 x i8> %48, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = extractelement <4 x float> %98, i32 3
  %103 = fmul float %72, %31
  %104 = fmul float %73, %32
  %105 = fadd float %103, %104
  %106 = fmul float %74, %33
  %107 = fadd float %105, %106
  %108 = fmul float %66, %34
  %109 = fadd float %107, %108
  %110 = fadd float %109, %29
  %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00)
  %112 = fmul float %111, %102
  %113 = fmul float %99, 0x3FCB367A00000000
  %114 = fmul float %100, 0x3FE6E2EB20000000
  %115 = fadd float %114, %113
  %116 = fmul float %101, 0x3FB27BB300000000
  %117 = fadd float %115, %116
  %118 = fcmp uge float %117, 0x3F50624DE0000000
  %119 = select i1 %118, float %117, float 0x3F50624DE0000000
  %120 = fdiv float 1.000000e+00, %119
  %121 = fmul float %99, %120
  %122 = fmul float %100, %120
  %123 = fmul float %101, %120
  %124 = fmul float %88, %99
  %125 = fmul float %89, %100
  %126 = fmul float %90, %101
  %127 = fmul float %112, %121
  %128 = fadd float %127, %124
  %129 = fmul float %112, %122
  %130 = fadd float %129, %125
  %131 = fmul float %112, %123
  %132 = fadd float %131, %126
  %133 = fmul float %128, 4.000000e+00
  %134 = fmul float %130, 4.000000e+00
  %135 = fmul float %132, 4.000000e+00
  %136 = fcmp uge float %53, %27
  %137 = select i1 %136, float %53, float %27
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.lrp(float %138, float %133, float %24)
  %140 = call float @llvm.AMDGPU.lrp(float %138, float %134, float %25)
  %141 = call float @llvm.AMDGPU.lrp(float %138, float %135, float %26)
  %142 = call i32 @llvm.SI.packf16(float %139, float %140)
  %143 = bitcast i32 %142 to float
  %144 = call i32 @llvm.SI.packf16(float %141, float %91)
  %145 = bitcast i32 %144 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %143, float %145, float %143, float %145)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430504
bf8c0770
10080aff
3e59b3d0
7e2002ff
3f371759
d2820004
04122106
7e2202ff
3d93dd98
d282000a
04122307
c0840100
bf8c007f
c2000910
bf8c007f
d208000b
020000f2
10080b0b
d2820004
04121400
10120d0b
d2820009
04261400
c2008921
bf8c007f
10181201
c2008920
bf8c007f
d282000c
04300304
10160f0b
d2820012
042e1400
c2000922
bf8c007f
d282000a
04300112
c2000923
bf8c007f
d282000a
04280108
c2000919
bf8c007f
06141400
d206080a
0201010a
081614f2
c8300200
c8310201
d282000c
042e190a
10261912
c200092c
c200892d
bf8c007f
7e180201
d2820003
04300103
c2000909
bf8c007f
101a0600
c2000908
bf8c007f
10180400
c0800304
c0c60508
bf8c007f
f0800f00
00030c0c
bf8c0770
10261d13
c200091d
bf8c007f
10041200
c200091c
bf8c007f
d2820002
04080104
c200091e
bf8c007f
d2820002
04080112
c200091f
bf8c007f
d2820002
04080108
c2000918
bf8c007f
06040400
d2060802
02010102
10041f02
100618ff
3e59b3d0
d2820003
040e210d
d2820003
040e230e
7e2002ff
3a83126f
d00c0000
02022103
d2000003
00020710
7e065503
1020070e
d2820010
044e2102
102420f6
c8400700
c8410701
c200090f
bf8c007f
d00c0002
02000110
7e220200
d2000010
000a2111
d2060810
02010110
082220f2
c200090e
bf8c007f
10262200
d2820012
044e2510
c84c0300
c84d0301
d2820013
042e270a
100a2708
5e0a0b12
c8180100
c8190101
d2820006
042e0d0a
100c0d09
100c1b06
100e070d
d2820006
041a0f02
100c0cf6
c200090d
bf8c007f
100e2200
d2820006
041e0d10
c81c0000
c81d0001
d2820000
042e0f0a
10000104
10001900
1002070c
d2820000
04020302
100000f6
c200090c
bf8c007f
10022200
d2820000
04060110
5e000d00
f8001c0f
05000500
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[7].xyzz, CONST[6].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MUL TEMP[3], CONST[0], TEMP[1].xxxx
  8: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 10: ADD TEMP[0], TEMP[0], CONST[3]
 11: MOV TEMP[2].w, IMM[0].xxxx
 12: MOV TEMP[2].xyz, CONST[4].xyzx
 13: MUL TEMP[2], TEMP[2], IN[3]
 14: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[9].xyzz
 15: MAD TEMP[3].x, TEMP[0].zzzz, CONST[8].xxxx, CONST[8].yyyy
 16: MOV TEMP[1].w, TEMP[3].xxxx
 17: MAD TEMP[3].xy, IN[5].xyyy, CONST[5].xyyy, CONST[5].zwww
 18: MOV OUT[3], TEMP[3]
 19: MOV OUT[1], TEMP[2]
 20: MOV OUT[2], TEMP[1]
 21: MOV OUT[0], TEMP[0]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %5)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = extractelement <4 x float> %47, i32 2
  %51 = extractelement <4 x float> %47, i32 3
  %52 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %5)
  %55 = extractelement <4 x float> %54, i32 0
  %56 = extractelement <4 x float> %54, i32 1
  %57 = extractelement <4 x float> %54, i32 2
  %58 = extractelement <4 x float> %54, i32 3
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = extractelement <4 x float> %68, i32 2
  %72 = extractelement <4 x float> %68, i32 3
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = extractelement <4 x float> %75, i32 2
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = fmul float %76, %37
  %85 = fadd float %84, %34
  %86 = fmul float %77, %38
  %87 = fadd float %86, %35
  %88 = fmul float %78, %39
  %89 = fadd float %88, %36
  %90 = fmul float %85, %48
  %91 = fmul float %87, %49
  %92 = fadd float %90, %91
  %93 = fmul float %89, %50
  %94 = fadd float %92, %93
  %95 = fmul float 1.000000e+00, %51
  %96 = fadd float %94, %95
  %97 = fmul float %85, %55
  %98 = fmul float %87, %56
  %99 = fadd float %97, %98
  %100 = fmul float %89, %57
  %101 = fadd float %99, %100
  %102 = fmul float 1.000000e+00, %58
  %103 = fadd float %101, %102
  %104 = fmul float %85, %62
  %105 = fmul float %87, %63
  %106 = fadd float %104, %105
  %107 = fmul float %89, %64
  %108 = fadd float %106, %107
  %109 = fmul float 1.000000e+00, %65
  %110 = fadd float %108, %109
  %111 = fmul float %11, %96
  %112 = fmul float %12, %96
  %113 = fmul float %13, %96
  %114 = fmul float %14, %96
  %115 = fmul float %15, %103
  %116 = fadd float %115, %111
  %117 = fmul float %16, %103
  %118 = fadd float %117, %112
  %119 = fmul float %17, %103
  %120 = fadd float %119, %113
  %121 = fmul float %18, %103
  %122 = fadd float %121, %114
  %123 = fmul float %19, %110
  %124 = fadd float %123, %116
  %125 = fmul float %20, %110
  %126 = fadd float %125, %118
  %127 = fmul float %21, %110
  %128 = fadd float %127, %120
  %129 = fmul float %22, %110
  %130 = fadd float %129, %122
  %131 = fadd float %124, %23
  %132 = fadd float %126, %24
  %133 = fadd float %128, %25
  %134 = fadd float %130, %26
  %135 = fmul float %27, %69
  %136 = fmul float %28, %70
  %137 = fmul float %29, %71
  %138 = fmul float 1.000000e+00, %72
  %139 = fsub float -0.000000e+00, %42
  %140 = fadd float %96, %139
  %141 = fsub float -0.000000e+00, %43
  %142 = fadd float %103, %141
  %143 = fsub float -0.000000e+00, %44
  %144 = fadd float %110, %143
  %145 = fmul float %133, %40
  %146 = fadd float %145, %41
  %147 = fmul float %82, %30
  %148 = fadd float %147, %32
  %149 = fmul float %83, %31
  %150 = fadd float %149, %33
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %135, float %136, float %137, float %138)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %140, float %142, float %144, float %146)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %148, float %150, float %113, float %114)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %131, float %132, float %133, float %134)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020500
c202011d
c2028119
bf8c0070
7e020205
d2820003
04040906
c0840704
bf8c007f
e00c2000
80020900
bf8c0770
10021503
c202011c
c2028118
bf8c007f
7e040205
d2820004
04080905
d2820001
04061304
c202011e
c202811a
bf8c007f
7e040205
d2820006
04080907
d2820001
04061706
06021901
c0840700
bf8c007f
e00c2000
80020700
bf8c0770
10041103
d2820002
040a0f04
d2820002
040a1306
06041502
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820007
04160204
c0840708
bf8c007f
e00c2000
80020800
bf8c0770
10061303
d2820003
040e1104
d2820003
040e1506
06061703
c202010a
bf8c007f
d2820004
041e0604
c202010e
bf8c007f
06080804
c2020120
c2028121
bf8c007f
7e0c0205
d2820006
04180904
c2020126
bf8c007f
0a0e0604
c2020125
bf8c007f
0a100204
c2020124
bf8c007f
0a120404
f800021f
06070809
c2020103
bf8c000f
100c0404
c0820714
bf8c007f
e00c2000
80010700
c2020115
c2028117
bf8c0070
7e000205
d2820000
04000908
c2020114
c2028116
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020107
bf8c000f
d2820000
041a0204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[9..10]
DCL CONST[2..8]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: DP4 TEMP[2].x, TEMP[1], CONST[7]
  5: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[5].yyyy
  6: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].xxxx
  7: MUL TEMP[2], TEMP[1], TEMP[2]
  8: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[3].xy, TEMP[3].xyyy
 10: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 11: DP4 TEMP[1].x, TEMP[1], CONST[6]
 12: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[5].xxxx
 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].wwww
 14: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].yzww
 15: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 16: RCP TEMP[4].x, TEMP[4].xxxx
 17: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 18: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 19: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 21: MAX TEMP[1].x, IN[2].wwww, CONST[3].wwww
 22: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 23: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[3].xyzz
 24: MOV OUT[0], TEMP[2]
 25: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %45 = load <32 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %55 = fmul float %13, %38
  %56 = fadd float %55, %39
  %57 = bitcast float %53 to i32
  %58 = bitcast float %54 to i32
  %59 = insertelement <2 x i32> undef, i32 %57, i32 0
  %60 = insertelement <2 x i32> %59, i32 %58, i32 1
  %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %41, <16 x i8> %43, i32 2)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = fmul float %62, %34
  %67 = fmul float %63, %35
  %68 = fadd float %66, %67
  %69 = fmul float %64, %36
  %70 = fadd float %68, %69
  %71 = fmul float %65, %37
  %72 = fadd float %70, %71
  %73 = fadd float %72, %29
  %74 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00)
  %75 = call float @llvm.AMDGPU.lrp(float %74, float %48, float 1.000000e+00)
  %76 = call float @llvm.AMDGPU.lrp(float %74, float %49, float 1.000000e+00)
  %77 = call float @llvm.AMDGPU.lrp(float %74, float %50, float 1.000000e+00)
  %78 = call float @llvm.AMDGPU.lrp(float %74, float %51, float 1.000000e+00)
  %79 = fmul float %62, %75
  %80 = fmul float %63, %76
  %81 = fmul float %64, %77
  %82 = fmul float %65, %78
  %83 = fmul float %12, %22
  %84 = fmul float %56, %23
  %85 = bitcast float %83 to i32
  %86 = bitcast float %84 to i32
  %87 = insertelement <2 x i32> undef, i32 %85, i32 0
  %88 = insertelement <2 x i32> %87, i32 %86, i32 1
  %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %45, <16 x i8> %47, i32 2)
  %90 = extractelement <4 x float> %89, i32 0
  %91 = extractelement <4 x float> %89, i32 1
  %92 = extractelement <4 x float> %89, i32 2
  %93 = extractelement <4 x float> %89, i32 3
  %94 = fmul float %62, %30
  %95 = fmul float %63, %31
  %96 = fadd float %94, %95
  %97 = fmul float %64, %32
  %98 = fadd float %96, %97
  %99 = fmul float %65, %33
  %100 = fadd float %98, %99
  %101 = fadd float %100, %28
  %102 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00)
  %103 = fmul float %102, %93
  %104 = fmul float %90, 0x3FCB367A00000000
  %105 = fmul float %91, 0x3FE6E2EB20000000
  %106 = fadd float %105, %104
  %107 = fmul float %92, 0x3FB27BB300000000
  %108 = fadd float %106, %107
  %109 = fcmp uge float %108, 0x3F50624DE0000000
  %110 = select i1 %109, float %108, float 0x3F50624DE0000000
  %111 = fdiv float 1.000000e+00, %110
  %112 = fmul float %90, %111
  %113 = fmul float %91, %111
  %114 = fmul float %92, %111
  %115 = fmul float %79, %90
  %116 = fmul float %80, %91
  %117 = fmul float %81, %92
  %118 = fmul float %103, %112
  %119 = fadd float %118, %115
  %120 = fmul float %103, %113
  %121 = fadd float %120, %116
  %122 = fmul float %103, %114
  %123 = fadd float %122, %117
  %124 = fmul float %119, 4.000000e+00
  %125 = fmul float %121, 4.000000e+00
  %126 = fmul float %123, 4.000000e+00
  %127 = fcmp uge float %52, %27
  %128 = select i1 %127, float %52, float %27
  %129 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00)
  %130 = call float @llvm.AMDGPU.lrp(float %129, float %124, float %24)
  %131 = call float @llvm.AMDGPU.lrp(float %129, float %125, float %25)
  %132 = call float @llvm.AMDGPU.lrp(float %129, float %126, float %26)
  %133 = call i32 @llvm.SI.packf16(float %130, float %131)
  %134 = bitcast i32 %133 to float
  %135 = call i32 @llvm.SI.packf16(float %132, float %82)
  %136 = bitcast i32 %135 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %134, float %136, float %134, float %136)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
c0840100
bf8c0070
c200091d
bf8c007f
10100a00
c200091c
bf8c007f
d2820008
04200104
c200091e
bf8c007f
d2820008
04200106
c200091f
bf8c007f
d2820008
04200107
c2000915
bf8c007f
06101000
d2060808
02010108
081210f2
c8280200
c8290201
d282000a
04261508
101c1506
c2000928
c2008929
bf8c007f
7e140201
d2820003
04280103
c2000909
bf8c007f
10160600
c2000908
bf8c007f
10140400
c0800304
c0c60508
bf8c007f
f0800f00
00030a0a
bf8c0770
101c190e
100414ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070b
7e0602ff
3d93dd98
d2820002
040a070c
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
101e050c
c2000919
bf8c007f
10060a00
c2000918
bf8c007f
d2820003
040c0104
c200091a
bf8c007f
d2820003
040c0106
c200091b
bf8c007f
d2820003
040c0107
c2000914
bf8c007f
06060600
d2060803
02010103
10061b03
d282000e
043a1f03
10201cf6
c8380700
c8390701
c200090f
bf8c007f
d00c0002
0200010e
7e1e0200
d200000e
000a1d0f
d206080e
0201010e
081e1cf2
c200090e
bf8c007f
10221e00
d2820010
0446210e
c8440300
c8450301
d2820011
04262308
10222307
5e202310
c8440100
c8450101
d2820011
04262308
10222305
10221711
1024050b
d2820011
04462503
102222f6
c200090d
bf8c007f
10241e00
d2820011
044a230e
c8480000
c8490001
d2820000
04262508
10000104
10001500
1002050a
d2820000
04020303
100000f6
c200090c
bf8c007f
10021e00
d2820000
0406010e
5e002300
f8001c0f
10001000
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[7].xyzz, CONST[6].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MUL TEMP[3], CONST[0], TEMP[1].xxxx
  8: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 10: ADD TEMP[0], TEMP[0], CONST[3]
 11: MOV TEMP[2].w, IMM[0].xxxx
 12: MOV TEMP[2].xyz, CONST[4].xyzx
 13: MUL TEMP[2], TEMP[2], IN[3]
 14: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[9].xyzz
 15: MAD TEMP[3].x, TEMP[0].zzzz, CONST[8].xxxx, CONST[8].yyyy
 16: MOV TEMP[1].w, TEMP[3].xxxx
 17: MAD TEMP[3].xy, IN[5].xyyy, CONST[5].xyyy, CONST[5].zwww
 18: MOV OUT[3], TEMP[3]
 19: MOV OUT[1], TEMP[2]
 20: MOV OUT[2], TEMP[1]
 21: MOV OUT[0], TEMP[0]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %5)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = extractelement <4 x float> %47, i32 2
  %51 = extractelement <4 x float> %47, i32 3
  %52 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %5)
  %55 = extractelement <4 x float> %54, i32 0
  %56 = extractelement <4 x float> %54, i32 1
  %57 = extractelement <4 x float> %54, i32 2
  %58 = extractelement <4 x float> %54, i32 3
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = extractelement <4 x float> %68, i32 2
  %72 = extractelement <4 x float> %68, i32 3
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = extractelement <4 x float> %75, i32 2
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = fmul float %76, %37
  %85 = fadd float %84, %34
  %86 = fmul float %77, %38
  %87 = fadd float %86, %35
  %88 = fmul float %78, %39
  %89 = fadd float %88, %36
  %90 = fmul float %85, %48
  %91 = fmul float %87, %49
  %92 = fadd float %90, %91
  %93 = fmul float %89, %50
  %94 = fadd float %92, %93
  %95 = fmul float 1.000000e+00, %51
  %96 = fadd float %94, %95
  %97 = fmul float %85, %55
  %98 = fmul float %87, %56
  %99 = fadd float %97, %98
  %100 = fmul float %89, %57
  %101 = fadd float %99, %100
  %102 = fmul float 1.000000e+00, %58
  %103 = fadd float %101, %102
  %104 = fmul float %85, %62
  %105 = fmul float %87, %63
  %106 = fadd float %104, %105
  %107 = fmul float %89, %64
  %108 = fadd float %106, %107
  %109 = fmul float 1.000000e+00, %65
  %110 = fadd float %108, %109
  %111 = fmul float %11, %96
  %112 = fmul float %12, %96
  %113 = fmul float %13, %96
  %114 = fmul float %14, %96
  %115 = fmul float %15, %103
  %116 = fadd float %115, %111
  %117 = fmul float %16, %103
  %118 = fadd float %117, %112
  %119 = fmul float %17, %103
  %120 = fadd float %119, %113
  %121 = fmul float %18, %103
  %122 = fadd float %121, %114
  %123 = fmul float %19, %110
  %124 = fadd float %123, %116
  %125 = fmul float %20, %110
  %126 = fadd float %125, %118
  %127 = fmul float %21, %110
  %128 = fadd float %127, %120
  %129 = fmul float %22, %110
  %130 = fadd float %129, %122
  %131 = fadd float %124, %23
  %132 = fadd float %126, %24
  %133 = fadd float %128, %25
  %134 = fadd float %130, %26
  %135 = fmul float %27, %69
  %136 = fmul float %28, %70
  %137 = fmul float %29, %71
  %138 = fmul float 1.000000e+00, %72
  %139 = fsub float -0.000000e+00, %42
  %140 = fadd float %96, %139
  %141 = fsub float -0.000000e+00, %43
  %142 = fadd float %103, %141
  %143 = fsub float -0.000000e+00, %44
  %144 = fadd float %110, %143
  %145 = fmul float %133, %40
  %146 = fadd float %145, %41
  %147 = fmul float %82, %30
  %148 = fadd float %147, %32
  %149 = fmul float %83, %31
  %150 = fadd float %149, %33
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %135, float %136, float %137, float %138)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %140, float %142, float %144, float %146)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %148, float %150, float %113, float %114)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %131, float %132, float %133, float %134)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020500
c202011d
c2028119
bf8c0070
7e020205
d2820003
04040906
c0840704
bf8c007f
e00c2000
80020900
bf8c0770
10021503
c202011c
c2028118
bf8c007f
7e040205
d2820004
04080905
d2820001
04061304
c202011e
c202811a
bf8c007f
7e040205
d2820006
04080907
d2820001
04061706
06021901
c0840700
bf8c007f
e00c2000
80020700
bf8c0770
10041103
d2820002
040a0f04
d2820002
040a1306
06041502
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820007
04160204
c0840708
bf8c007f
e00c2000
80020800
bf8c0770
10061303
d2820003
040e1104
d2820003
040e1506
06061703
c202010a
bf8c007f
d2820004
041e0604
c202010e
bf8c007f
06080804
c2020120
c2028121
bf8c007f
7e0c0205
d2820006
04180904
c2020126
bf8c007f
0a0e0604
c2020125
bf8c007f
0a100204
c2020124
bf8c007f
0a120404
f800021f
06070809
c2020103
bf8c000f
100c0404
c0820714
bf8c007f
e00c2000
80010700
c2020115
c2028117
bf8c0070
7e000205
d2820000
04000908
c2020114
c2028116
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020107
bf8c000f
d2820000
041a0204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: DP4 TEMP[0].x, IMM[0].xxxx, CONST[4]
  1: ADD_SAT TEMP[0].x, TEMP[0].xxxx, CONST[2].yyyy
  2: LRP TEMP[0], TEMP[0].xxxx, IN[0], IMM[0].xxxx
  3: MOV TEMP[1].w, TEMP[0].wwww
  4: MAX TEMP[2].x, IN[1].wwww, CONST[0].wwww
  5: MOV_SAT TEMP[2].x, TEMP[2].xxxx
  6: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[0].xyzz
  7: MOV OUT[0], TEMP[1]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %36 = fmul float 1.000000e+00, %27
  %37 = fmul float 1.000000e+00, %28
  %38 = fadd float %36, %37
  %39 = fmul float 1.000000e+00, %29
  %40 = fadd float %38, %39
  %41 = fmul float 1.000000e+00, %30
  %42 = fadd float %40, %41
  %43 = fadd float %42, %26
  %44 = call float @llvm.AMDIL.clamp.(float %43, float 0.000000e+00, float 1.000000e+00)
  %45 = call float @llvm.AMDGPU.lrp(float %44, float %31, float 1.000000e+00)
  %46 = call float @llvm.AMDGPU.lrp(float %44, float %32, float 1.000000e+00)
  %47 = call float @llvm.AMDGPU.lrp(float %44, float %33, float 1.000000e+00)
  %48 = call float @llvm.AMDGPU.lrp(float %44, float %34, float 1.000000e+00)
  %49 = fcmp uge float %35, %25
  %50 = select i1 %49, float %35, float %25
  %51 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00)
  %52 = call float @llvm.AMDGPU.lrp(float %51, float %45, float %22)
  %53 = call float @llvm.AMDGPU.lrp(float %51, float %46, float %23)
  %54 = call float @llvm.AMDGPU.lrp(float %51, float %47, float %24)
  %55 = call i32 @llvm.SI.packf16(float %52, float %53)
  %56 = bitcast i32 %55 to float
  %57 = call i32 @llvm.SI.packf16(float %54, float %48)
  %58 = bitcast i32 %57 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080700
c8090701
c0800100
bf8c007f
c2020103
bf8c007f
d00c0008
02000902
7e060204
d2000002
00220503
d2060802
02010102
080604f2
c2020101
bf8c007f
100c0604
c81c0100
c81d0101
c2020110
c2028111
bf8c007f
7e080205
d2060004
02020804
c2020112
bf8c007f
06080804
c2020113
bf8c007f
06080804
c2020109
bf8c007f
06080804
d2060804
02010104
080a08f2
d2820007
04160f04
d2820006
041a0f02
c2020100
bf8c007f
100e0604
c8200000
c8210001
d2820008
04161104
d2820007
041e1102
5e0c0d07
c2000102
bf8c007f
10060600
c81c0200
c81d0201
d2820007
04160f04
d2820002
040e0f02
c80c0300
c80d0301
d2820000
04160704
5e000102
f8001c0f
00060006
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..12]
DCL TEMP[0..2], LOCAL
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2], CONST[0], TEMP[0].xxxx
  6: MAD TEMP[2], CONST[1], TEMP[0].yyyy, TEMP[2]
  7: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[2]
  8: ADD TEMP[0].xyz, TEMP[0], CONST[3]
  9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[11].xyzz
 10: MAD TEMP[2].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 11: MOV TEMP[0].w, TEMP[2].xxxx
 12: MOV OUT[1], CONST[8]
 13: MOV OUT[2], TEMP[0]
 14: MOV OUT[0], TEMP[1]
 15: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %5)
  %57 = extractelement <4 x float> %56, i32 0
  %58 = extractelement <4 x float> %56, i32 1
  %59 = extractelement <4 x float> %56, i32 2
  %60 = fmul float %57, %46
  %61 = fadd float %60, %43
  %62 = fmul float %58, %47
  %63 = fadd float %62, %44
  %64 = fmul float %59, %48
  %65 = fadd float %64, %45
  %66 = fmul float %23, %61
  %67 = fmul float %24, %61
  %68 = fmul float %25, %61
  %69 = fmul float %26, %61
  %70 = fmul float %27, %63
  %71 = fadd float %70, %66
  %72 = fmul float %28, %63
  %73 = fadd float %72, %67
  %74 = fmul float %29, %63
  %75 = fadd float %74, %68
  %76 = fmul float %30, %63
  %77 = fadd float %76, %69
  %78 = fmul float %31, %65
  %79 = fadd float %78, %71
  %80 = fmul float %32, %65
  %81 = fadd float %80, %73
  %82 = fmul float %33, %65
  %83 = fadd float %82, %75
  %84 = fmul float %34, %65
  %85 = fadd float %84, %77
  %86 = fadd float %79, %35
  %87 = fadd float %81, %36
  %88 = fadd float %83, %37
  %89 = fadd float %85, %38
  %90 = fmul float %11, %61
  %91 = fmul float %12, %61
  %92 = fmul float %13, %61
  %93 = fmul float %14, %63
  %94 = fadd float %93, %90
  %95 = fmul float %15, %63
  %96 = fadd float %95, %91
  %97 = fmul float %16, %63
  %98 = fadd float %97, %92
  %99 = fmul float %17, %65
  %100 = fadd float %99, %94
  %101 = fmul float %18, %65
  %102 = fadd float %101, %96
  %103 = fmul float %19, %65
  %104 = fadd float %103, %98
  %105 = fadd float %100, %20
  %106 = fadd float %102, %21
  %107 = fadd float %104, %22
  %108 = fsub float -0.000000e+00, %49
  %109 = fadd float %105, %108
  %110 = fsub float -0.000000e+00, %50
  %111 = fadd float %106, %110
  %112 = fsub float -0.000000e+00, %51
  %113 = fadd float %107, %112
  %114 = fmul float %88, %52
  %115 = fadd float %114, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %109, float %111, float %113, float %115)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %86, float %87, float %88, float %89)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0820700
bf8c000f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020112
bf8c007f
100c0204
c2020116
bf8c007f
d2820006
041a0004
c202012a
c2028126
bf8c007f
7e0e0205
d2820002
041c0904
c202011a
bf8c007f
d2820003
041a0404
c202011e
bf8c007f
06060604
c2020130
c2028131
bf8c007f
7e080205
d2820004
04100903
c2020102
bf8c007f
100a0204
c2020106
bf8c007f
d2820005
04160004
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c202012e
bf8c007f
0a0a0a04
c2020101
bf8c007f
100c0204
c2020105
bf8c007f
d2820006
041a0004
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c202012d
bf8c007f
0a0c0c04
c2020100
bf8c007f
100e0204
c2020104
bf8c007f
d2820007
041e0004
c2020108
bf8c007f
d2820007
041e0404
c202010c
bf8c007f
060e0e04
c202012c
bf8c007f
0a0e0e04
f800021f
04050607
c2020113
bf8c000f
10080204
c2020117
bf8c007f
d2820004
04120004
c202011b
bf8c007f
d2820004
04120404
c202011f
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
04030500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[12..13]
DCL CONST[5..11]
DCL TEMP[0..1]
DCL TEMP[2..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[13].xxxx, CONST[13].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: MOV TEMP[6].xy, IN[5].xyyy
 14: TEX TEMP[6], TEMP[6], SAMP[1], 2D
 15: MAD TEMP[6].yw, IMM[0].zzzz, TEMP[6], IMM[0].xxxx
 16: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz
 17: RSQ TEMP[7].x, TEMP[7].xxxx
 18: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 19: DP2 TEMP[7].x, TEMP[6].ywww, TEMP[6].ywww
 20: ADD TEMP[7].x, IMM[0].yyyy, -TEMP[7].xxxx
 21: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
 22: RSQ TEMP[8].x, TEMP[7].xxxx
 23: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[7].xxxx
 24: CMP TEMP[8].x, -TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].wwww
 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
 26: DP3 TEMP[7].x, IN[6].yzww, IN[6].yzww
 27: RSQ TEMP[7].x, TEMP[7].xxxx
 28: MUL TEMP[7].xyz, IN[6].yzww, TEMP[7].xxxx
 29: DP3 TEMP[8].x, IN[7].xyzz, IN[7].xyzz
 30: RSQ TEMP[8].x, TEMP[8].xxxx
 31: MUL TEMP[8].xyz, IN[7].xyzz, TEMP[8].xxxx
 32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[6].wwww
 33: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[6].yyyy, TEMP[8].xyzz
 34: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
 35: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 36: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 37: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 38: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 39: MOV TEMP[2].xyz, TEMP[2].xyzz
 40: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 41: DP4 TEMP[3].x, TEMP[4], CONST[11]
 42: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[8].zzzz
 43: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 44: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[4].xyzz
 45: DP4 TEMP[2].x, TEMP[4], CONST[10]
 46: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[8].yyyy
 47: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 48: MUL TEMP[2], TEMP[5], TEMP[2]
 49: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 50: MOV TEMP[3].xy, TEMP[3].xyyy
 51: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 52: DP4 TEMP[4].x, TEMP[4], CONST[9]
 53: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[8].xxxx
 54: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 55: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 56: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww
 57: RCP TEMP[5].x, TEMP[5].xxxx
 58: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 59: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 60: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 61: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[2].xxxx
 62: MOV TEMP[3].xy, IN[3].zwww
 63: TEX TEMP[3].xyz, TEMP[3], SAMP[3], 2D
 64: MAD TEMP[2].xyz, CONST[12].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
 65: MAX TEMP[3].x, IN[4].wwww, CONST[6].wwww
 66: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 67: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[6].xyzz
 68: MOV OUT[0], TEMP[2]
 69: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %91 = fmul float %13, %46
  %92 = fadd float %91, %47
  %93 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %97 = bitcast float %93 to i32
  %98 = icmp ne i32 %97, 0
  %. = select i1 %98, float -1.000000e+00, float 1.000000e+00
  %99 = bitcast float %80 to i32
  %100 = bitcast float %81 to i32
  %101 = insertelement <2 x i32> undef, i32 %99, i32 0
  %102 = insertelement <2 x i32> %101, i32 %100, i32 1
  %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %49, <16 x i8> %51, i32 2)
  %104 = extractelement <4 x float> %103, i32 0
  %105 = extractelement <4 x float> %103, i32 1
  %106 = extractelement <4 x float> %103, i32 2
  %107 = extractelement <4 x float> %103, i32 3
  %108 = bitcast float %80 to i32
  %109 = bitcast float %81 to i32
  %110 = insertelement <2 x i32> undef, i32 %108, i32 0
  %111 = insertelement <2 x i32> %110, i32 %109, i32 1
  %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %53, <16 x i8> %55, i32 2)
  %113 = extractelement <4 x float> %112, i32 1
  %114 = extractelement <4 x float> %112, i32 3
  %115 = fmul float 2.000000e+00, %113
  %116 = fadd float %115, -1.000000e+00
  %117 = fmul float 2.000000e+00, %114
  %118 = fadd float %117, -1.000000e+00
  %119 = fmul float %82, %82
  %120 = fmul float %83, %83
  %121 = fadd float %120, %119
  %122 = fmul float %84, %84
  %123 = fadd float %121, %122
  %124 = call float @llvm.AMDGPU.rsq(float %123)
  %125 = fmul float %82, %124
  %126 = fmul float %83, %124
  %127 = fmul float %84, %124
  %128 = fmul float %116, %116
  %129 = fmul float %118, %118
  %130 = fadd float %128, %129
  %131 = fsub float -0.000000e+00, %130
  %132 = fadd float 1.000000e+00, %131
  %133 = fcmp uge float 0.000000e+00, %132
  %134 = select i1 %133, float 0.000000e+00, float %132
  %135 = call float @llvm.AMDGPU.rsq(float %134)
  %136 = fmul float %135, %134
  %137 = fsub float -0.000000e+00, %134
  %138 = call float @llvm.AMDGPU.cndlt(float %137, float %136, float 0.000000e+00)
  %139 = fmul float %125, %138
  %140 = fmul float %126, %138
  %141 = fmul float %127, %138
  %142 = fmul float %85, %85
  %143 = fmul float %86, %86
  %144 = fadd float %143, %142
  %145 = fmul float %87, %87
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %85, %147
  %149 = fmul float %86, %147
  %150 = fmul float %87, %147
  %151 = fmul float %88, %88
  %152 = fmul float %89, %89
  %153 = fadd float %152, %151
  %154 = fmul float %90, %90
  %155 = fadd float %153, %154
  %156 = call float @llvm.AMDGPU.rsq(float %155)
  %157 = fmul float %88, %156
  %158 = fmul float %89, %156
  %159 = fmul float %90, %156
  %160 = fmul float %157, %118
  %161 = fmul float %158, %118
  %162 = fmul float %159, %118
  %163 = fmul float %148, %116
  %164 = fadd float %163, %160
  %165 = fmul float %149, %116
  %166 = fadd float %165, %161
  %167 = fmul float %150, %116
  %168 = fadd float %167, %162
  %169 = fmul float %139, %.
  %170 = fadd float %169, %164
  %171 = fmul float %140, %.
  %172 = fadd float %171, %166
  %173 = fmul float %141, %.
  %174 = fadd float %173, %168
  %175 = fmul float %170, %76
  %176 = fmul float %172, %77
  %177 = fadd float %176, %175
  %178 = fmul float %174, %78
  %179 = fadd float %177, %178
  %180 = fmul float %179, %170
  %181 = fmul float %179, %172
  %182 = fmul float %179, %174
  %183 = fmul float 2.000000e+00, %180
  %184 = fmul float 2.000000e+00, %181
  %185 = fmul float 2.000000e+00, %182
  %186 = fsub float -0.000000e+00, %183
  %187 = fadd float %76, %186
  %188 = fsub float -0.000000e+00, %184
  %189 = fadd float %77, %188
  %190 = fsub float -0.000000e+00, %185
  %191 = fadd float %78, %190
  %192 = insertelement <4 x float> undef, float %187, i32 0
  %193 = insertelement <4 x float> %192, float %189, i32 1
  %194 = insertelement <4 x float> %193, float %191, i32 2
  %195 = insertelement <4 x float> %194, float 0.000000e+00, i32 3
  %196 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %195)
  %197 = extractelement <4 x float> %196, i32 0
  %198 = extractelement <4 x float> %196, i32 1
  %199 = extractelement <4 x float> %196, i32 2
  %200 = extractelement <4 x float> %196, i32 3
  %201 = call float @fabs(float %199)
  %202 = fdiv float 1.000000e+00, %201
  %203 = fmul float %197, %202
  %204 = fadd float %203, 1.500000e+00
  %205 = fmul float %198, %202
  %206 = fadd float %205, 1.500000e+00
  %207 = bitcast float %206 to i32
  %208 = bitcast float %204 to i32
  %209 = bitcast float %200 to i32
  %210 = insertelement <4 x i32> undef, i32 %207, i32 0
  %211 = insertelement <4 x i32> %210, i32 %208, i32 1
  %212 = insertelement <4 x i32> %211, i32 %209, i32 2
  %213 = insertelement <4 x i32> %212, i32 undef, i32 3
  %214 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %213, <32 x i8> %57, <16 x i8> %59, i32 4)
  %215 = extractelement <4 x float> %214, i32 0
  %216 = extractelement <4 x float> %214, i32 1
  %217 = extractelement <4 x float> %214, i32 2
  %218 = extractelement <4 x float> %214, i32 3
  %219 = fmul float %104, %39
  %220 = fmul float %105, %40
  %221 = fadd float %219, %220
  %222 = fmul float %106, %41
  %223 = fadd float %221, %222
  %224 = fmul float %107, %42
  %225 = fadd float %223, %224
  %226 = fadd float %225, %30
  %227 = call float @llvm.AMDIL.clamp.(float %226, float 0.000000e+00, float 1.000000e+00)
  %228 = fmul float %218, %227
  %229 = call float @llvm.AMDGPU.lrp(float %228, float %215, float %104)
  %230 = call float @llvm.AMDGPU.lrp(float %228, float %216, float %105)
  %231 = call float @llvm.AMDGPU.lrp(float %228, float %217, float %106)
  %232 = fmul float %104, %35
  %233 = fmul float %105, %36
  %234 = fadd float %232, %233
  %235 = fmul float %106, %37
  %236 = fadd float %234, %235
  %237 = fmul float %107, %38
  %238 = fadd float %236, %237
  %239 = fadd float %238, %29
  %240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00)
  %241 = call float @llvm.AMDGPU.lrp(float %240, float %70, float 1.000000e+00)
  %242 = call float @llvm.AMDGPU.lrp(float %240, float %71, float 1.000000e+00)
  %243 = call float @llvm.AMDGPU.lrp(float %240, float %72, float 1.000000e+00)
  %244 = call float @llvm.AMDGPU.lrp(float %240, float %73, float 1.000000e+00)
  %245 = fmul float %229, %241
  %246 = fmul float %230, %242
  %247 = fmul float %231, %243
  %248 = fmul float %107, %244
  %249 = fmul float %12, %22
  %250 = fmul float %92, %23
  %251 = bitcast float %249 to i32
  %252 = bitcast float %250 to i32
  %253 = insertelement <2 x i32> undef, i32 %251, i32 0
  %254 = insertelement <2 x i32> %253, i32 %252, i32 1
  %255 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %65, <16 x i8> %67, i32 2)
  %256 = extractelement <4 x float> %255, i32 0
  %257 = extractelement <4 x float> %255, i32 1
  %258 = extractelement <4 x float> %255, i32 2
  %259 = extractelement <4 x float> %255, i32 3
  %260 = fmul float %104, %31
  %261 = fmul float %105, %32
  %262 = fadd float %260, %261
  %263 = fmul float %106, %33
  %264 = fadd float %262, %263
  %265 = fmul float %107, %34
  %266 = fadd float %264, %265
  %267 = fadd float %266, %28
  %268 = call float @llvm.AMDIL.clamp.(float %267, float 0.000000e+00, float 1.000000e+00)
  %269 = fmul float %268, %259
  %270 = fmul float %256, 0x3FCB367A00000000
  %271 = fmul float %257, 0x3FE6E2EB20000000
  %272 = fadd float %271, %270
  %273 = fmul float %258, 0x3FB27BB300000000
  %274 = fadd float %272, %273
  %275 = fcmp uge float %274, 0x3F50624DE0000000
  %276 = select i1 %275, float %274, float 0x3F50624DE0000000
  %277 = fdiv float 1.000000e+00, %276
  %278 = fmul float %256, %277
  %279 = fmul float %257, %277
  %280 = fmul float %258, %277
  %281 = fmul float %245, %256
  %282 = fmul float %246, %257
  %283 = fmul float %247, %258
  %284 = fmul float %269, %278
  %285 = fadd float %284, %281
  %286 = fmul float %269, %279
  %287 = fadd float %286, %282
  %288 = fmul float %269, %280
  %289 = fadd float %288, %283
  %290 = fmul float %285, 4.000000e+00
  %291 = fmul float %287, 4.000000e+00
  %292 = fmul float %289, 4.000000e+00
  %293 = bitcast float %74 to i32
  %294 = bitcast float %75 to i32
  %295 = insertelement <2 x i32> undef, i32 %293, i32 0
  %296 = insertelement <2 x i32> %295, i32 %294, i32 1
  %297 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %296, <32 x i8> %61, <16 x i8> %63, i32 2)
  %298 = extractelement <4 x float> %297, i32 0
  %299 = extractelement <4 x float> %297, i32 1
  %300 = extractelement <4 x float> %297, i32 2
  %301 = fmul float %43, %298
  %302 = fadd float %301, %290
  %303 = fmul float %44, %299
  %304 = fadd float %303, %291
  %305 = fmul float %45, %300
  %306 = fadd float %305, %292
  %307 = fcmp uge float %79, %27
  %308 = select i1 %307, float %79, float %27
  %309 = call float @llvm.AMDIL.clamp.(float %308, float 0.000000e+00, float 1.000000e+00)
  %310 = call float @llvm.AMDGPU.lrp(float %309, float %302, float %24)
  %311 = call float @llvm.AMDGPU.lrp(float %309, float %304, float %25)
  %312 = call float @llvm.AMDGPU.lrp(float %309, float %306, float %26)
  %313 = call i32 @llvm.SI.packf16(float %310, float %311)
  %314 = bitcast i32 %313 to float
  %315 = call i32 @llvm.SI.packf16(float %312, float %248)
  %316 = bitcast i32 %315 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %314, float %316, float %314, float %316)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340f00
c8350f01
c8300e00
c8310e01
1014190c
d282000b
042a1b0d
c8281000
c8291001
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441500
c8451501
c8401400
c8411401
101a2110
d282000e
04362311
c8341600
c8351601
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541200
c8551201
c8501100
c8511101
10222914
d2820012
04462b15
c8441300
c8451301
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0800
c83d0801
10281f0c
c8400900
c8410901
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200a00
c8210a01
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430808
c0840300
c0c60500
bf8c0070
f0800f00
00430405
c0840100
bf8c0070
c200092d
bf8c007f
10180a00
c200092c
bf8c007f
d282000c
04300104
c200092e
bf8c007f
d282000c
04300106
c200092f
bf8c007f
d282000c
04300107
c2000922
bf8c007f
06181800
d206080c
0201010c
101c190b
081e1cf2
10180b0f
d2820010
0432130e
c2000929
bf8c007f
10180a00
c2000928
bf8c007f
d282000c
04300104
c200092a
bf8c007f
d282000c
04300106
c200092b
bf8c007f
d282000c
04300107
c2000921
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8440100
c8450101
d2820011
0436230c
10282310
c2000934
c2008935
bf8c007f
7e200201
d2820003
04400103
c2000915
bf8c007f
10220600
c2000914
bf8c007f
10200400
c0860310
c0c80520
bf8c007f
f0800f00
00641010
bf8c0770
10282314
100420ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0711
7e0602ff
3d93dd98
d2820002
040a0712
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
102a0511
c2000925
bf8c007f
10060a00
c2000924
bf8c007f
d2820003
040c0104
c2000926
bf8c007f
d2820003
040c0106
c2000927
bf8c007f
d2820003
040c0107
c2000920
bf8c007f
06060600
d2060803
02010103
10062703
d2820014
04522b03
102e28f6
c8540700
c8550701
c8500600
c8510601
c080030c
c0c60518
bf8c007f
f0800700
00031414
c2000931
bf8c0070
d2820019
045e2a00
c85c0b00
c85d0b01
c200091b
bf8c007f
d00c0002
02000117
7e300200
d2000017
000a2f18
d2060817
02010117
08302ef2
c2000919
bf8c007f
10343000
d2820019
046a3317
1034090f
d282001a
046a110e
c86c0000
c86d0001
d282001b
0436370c
1034371a
1034211a
10360510
d282001a
046a3703
103434f6
c2000930
bf8c007f
d282001a
046a2800
c2000918
bf8c007f
10363000
d282001a
046e3517
5e32331a
101e0d0f
d2820008
043e150e
c8240200
c8250201
d2820009
0436130c
10101308
10102508
10040512
d2820002
04220503
100404f6
c2000932
bf8c007f
d2820002
040a2c00
c200091a
bf8c007f
10063000
d2820002
040e0517
c80c0300
c80d0301
d2820000
0436070c
10000107
5e000102
f8001c0f
00190019
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL IN[7]
DCL IN[8]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..10]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[8].xyzz, CONST[7].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MOV TEMP[4].w, IMM[0].yyyy
 10: MOV TEMP[4].xyz, IN[8].xyzx
 11: MUL TEMP[5], CONST[0], TEMP[1].xxxx
 12: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[5]
 13: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 14: ADD TEMP[0], TEMP[0], CONST[3]
 15: MOV TEMP[2].w, IMM[0].xxxx
 16: MOV TEMP[2].xyz, CONST[4].xyzx
 17: DP4 TEMP[5].x, TEMP[3], IN[0]
 18: DP4 TEMP[6].x, TEMP[3], IN[1]
 19: MOV TEMP[5].y, TEMP[6].xxxx
 20: DP4 TEMP[3].x, TEMP[3], IN[2]
 21: MOV TEMP[5].z, TEMP[3].xxxx
 22: MUL TEMP[3].xyz, TEMP[5].xyzz, CONST[6].wwww
 23: DP4 TEMP[5].x, TEMP[4], IN[0]
 24: DP4 TEMP[6].x, TEMP[4], IN[1]
 25: MOV TEMP[5].y, TEMP[6].xxxx
 26: DP4 TEMP[4].x, TEMP[4], IN[2]
 27: MOV TEMP[5].z, TEMP[4].xxxx
 28: MUL TEMP[4].xyz, TEMP[5].xyzz, CONST[6].wwww
 29: MUL TEMP[2], TEMP[2], IN[3]
 30: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[10].xyzz
 31: MAD TEMP[5].x, TEMP[0].zzzz, CONST[9].xxxx, CONST[9].yyyy
 32: MOV TEMP[1].w, TEMP[5].xxxx
 33: MAD TEMP[5].xy, IN[6].xyyy, CONST[5].xyyy, CONST[5].zwww
 34: MOV TEMP[5].zw, TEMP[3].yyxy
 35: MOV TEMP[6].x, TEMP[3].zzzz
 36: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 37: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 38: MOV TEMP[6].yzw, TEMP[3].yxyz
 39: MOV TEMP[3].xyz, TEMP[4].xyzx
 40: MOV OUT[1], TEMP[2]
 41: MOV OUT[2], IN[7]
 42: MOV OUT[4], TEMP[5]
 43: MOV OUT[6], TEMP[3]
 44: MOV OUT[5], TEMP[6]
 45: MOV OUT[3], TEMP[1]
 46: MOV OUT[0], TEMP[0]
 47: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %5)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = getelementptr <16 x i8> addrspace(2)* %3, i32 7
  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
  %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %5)
  %94 = extractelement <4 x float> %93, i32 0
  %95 = extractelement <4 x float> %93, i32 1
  %96 = extractelement <4 x float> %93, i32 2
  %97 = extractelement <4 x float> %93, i32 3
  %98 = getelementptr <16 x i8> addrspace(2)* %3, i32 8
  %99 = load <16 x i8> addrspace(2)* %98, !tbaa !0
  %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %5)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = fmul float %77, %38
  %105 = fadd float %104, %35
  %106 = fmul float %78, %39
  %107 = fadd float %106, %36
  %108 = fmul float %79, %40
  %109 = fadd float %108, %37
  %110 = fmul float %105, %49
  %111 = fmul float %107, %50
  %112 = fadd float %110, %111
  %113 = fmul float %109, %51
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %52
  %116 = fadd float %114, %115
  %117 = fmul float %105, %56
  %118 = fmul float %107, %57
  %119 = fadd float %117, %118
  %120 = fmul float %109, %58
  %121 = fadd float %119, %120
  %122 = fmul float 1.000000e+00, %59
  %123 = fadd float %121, %122
  %124 = fmul float %105, %63
  %125 = fmul float %107, %64
  %126 = fadd float %124, %125
  %127 = fmul float %109, %65
  %128 = fadd float %126, %127
  %129 = fmul float 1.000000e+00, %66
  %130 = fadd float %128, %129
  %131 = fmul float %11, %116
  %132 = fmul float %12, %116
  %133 = fmul float %13, %116
  %134 = fmul float %14, %116
  %135 = fmul float %15, %123
  %136 = fadd float %135, %131
  %137 = fmul float %16, %123
  %138 = fadd float %137, %132
  %139 = fmul float %17, %123
  %140 = fadd float %139, %133
  %141 = fmul float %18, %123
  %142 = fadd float %141, %134
  %143 = fmul float %19, %130
  %144 = fadd float %143, %136
  %145 = fmul float %20, %130
  %146 = fadd float %145, %138
  %147 = fmul float %21, %130
  %148 = fadd float %147, %140
  %149 = fmul float %22, %130
  %150 = fadd float %149, %142
  %151 = fadd float %144, %23
  %152 = fadd float %146, %24
  %153 = fadd float %148, %25
  %154 = fadd float %150, %26
  %155 = fmul float %83, %49
  %156 = fmul float %84, %50
  %157 = fadd float %155, %156
  %158 = fmul float %85, %51
  %159 = fadd float %157, %158
  %160 = fmul float 0.000000e+00, %52
  %161 = fadd float %159, %160
  %162 = fmul float %83, %56
  %163 = fmul float %84, %57
  %164 = fadd float %162, %163
  %165 = fmul float %85, %58
  %166 = fadd float %164, %165
  %167 = fmul float 0.000000e+00, %59
  %168 = fadd float %166, %167
  %169 = fmul float %83, %63
  %170 = fmul float %84, %64
  %171 = fadd float %169, %170
  %172 = fmul float %85, %65
  %173 = fadd float %171, %172
  %174 = fmul float 0.000000e+00, %66
  %175 = fadd float %173, %174
  %176 = fmul float %161, %34
  %177 = fmul float %168, %34
  %178 = fmul float %175, %34
  %179 = fmul float %101, %49
  %180 = fmul float %102, %50
  %181 = fadd float %179, %180
  %182 = fmul float %103, %51
  %183 = fadd float %181, %182
  %184 = fmul float 0.000000e+00, %52
  %185 = fadd float %183, %184
  %186 = fmul float %101, %56
  %187 = fmul float %102, %57
  %188 = fadd float %186, %187
  %189 = fmul float %103, %58
  %190 = fadd float %188, %189
  %191 = fmul float 0.000000e+00, %59
  %192 = fadd float %190, %191
  %193 = fmul float %101, %63
  %194 = fmul float %102, %64
  %195 = fadd float %193, %194
  %196 = fmul float %103, %65
  %197 = fadd float %195, %196
  %198 = fmul float 0.000000e+00, %66
  %199 = fadd float %197, %198
  %200 = fmul float %185, %34
  %201 = fmul float %192, %34
  %202 = fmul float %199, %34
  %203 = fmul float %27, %70
  %204 = fmul float %28, %71
  %205 = fmul float %29, %72
  %206 = fmul float 1.000000e+00, %73
  %207 = fsub float -0.000000e+00, %43
  %208 = fadd float %116, %207
  %209 = fsub float -0.000000e+00, %44
  %210 = fadd float %123, %209
  %211 = fsub float -0.000000e+00, %45
  %212 = fadd float %130, %211
  %213 = fmul float %153, %41
  %214 = fadd float %213, %42
  %215 = fmul float %89, %30
  %216 = fadd float %215, %32
  %217 = fmul float %90, %31
  %218 = fadd float %217, %33
  %219 = fmul float %202, %177
  %220 = fmul float %200, %178
  %221 = fmul float %201, %176
  %222 = fsub float -0.000000e+00, %219
  %223 = fmul float %201, %178
  %224 = fadd float %223, %222
  %225 = fsub float -0.000000e+00, %220
  %226 = fmul float %202, %176
  %227 = fadd float %226, %225
  %228 = fsub float -0.000000e+00, %221
  %229 = fmul float %200, %177
  %230 = fadd float %229, %228
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %203, float %204, float %205, float %206)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %94, float %95, float %96, float %97)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %208, float %210, float %212, float %214)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %216, float %218, float %176, float %177)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %178, float %224, float %227, float %230)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %200, float %201, float %202, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %151, float %152, float %153, float %154)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c084071c
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840710
bf8c000f
e00c2000
80020900
c2020121
c202811d
bf8c0070
7e020205
d2820003
0404090a
c0840704
bf8c007f
e00c2000
80020500
bf8c0770
10020d03
c2020120
c202811c
bf8c007f
7e040205
d2820004
04080909
d2820001
04060b04
c2020122
c202811e
bf8c007f
7e040205
d2820011
0408090b
d2820001
04060f11
06021101
c0840700
bf8c007f
e00c2000
80020900
bf8c0770
10041503
d2820002
040a1304
d2820002
040a1711
06041902
c2020102
bf8c007f
101a0404
c2020106
bf8c007f
d2820012
04360204
c0840708
bf8c007f
e00c2000
80020d00
bf8c0770
10061d03
d2820003
040e1b04
d2820003
040e1f11
06062103
c202010a
bf8c007f
d2820004
044a0604
c202010e
bf8c007f
06080804
c2020124
c2028125
bf8c007f
7e220205
d2820011
04440904
c202012a
bf8c007f
0a240604
c2020129
bf8c007f
0a260204
c2020128
bf8c007f
0a280404
f800022f
11121314
c0840714
bf8c000f
e00c2000
80021200
bf8c0770
10220d13
d2820011
04460b12
d2820011
04460f14
d2820011
04450108
c202011b
bf8c007f
10222204
102c1513
d2820016
045a1312
d2820016
045a1714
d2820016
0459010c
102c2c04
c0840718
bf8c007f
e00c2000
80021700
c2028115
c2040117
bf8c0070
7e360208
d282001b
046c0b18
c2028114
c2040116
bf8c007f
7e380208
d2820017
04700b17
f800023f
11161b17
c0840720
bf8c000f
e00c2000
80021700
bf8c0770
10000d18
d2820000
04020b17
d2820000
04020f19
d2820000
04010108
10000004
100c2d00
100a1518
d2820005
04161317
d2820005
04161719
d2820005
0415010c
100a0a04
100e2305
080c0d07
100e1d13
d2820007
041e1b12
d2820007
041e1f14
d2820007
041d0110
100e0e04
10120f05
10101d18
d2820008
04221b17
d2820008
04221f19
d2820008
04210110
10101004
10142d08
0812130a
10142308
10160f00
0814150b
f800024f
06090a07
bf8c070f
7e0c0280
f800025f
06080005
c2020103
bf8c000f
10000404
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[14..15]
DCL CONST[5..13]
DCL TEMP[0..1]
DCL TEMP[2..6], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,    -0.0100}
IMM[1] FLT32 {    0.0000,    -0.5000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[15].xxxx, CONST[15].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: UIF TEMP[1].xxxx :3
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :3
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[4].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: DP3 TEMP[4].x, IN[5].xyzz, IN[5].xyzz
 11: RSQ TEMP[4].x, TEMP[4].xxxx
 12: MUL TEMP[4].xyz, IN[5].xyzz, TEMP[4].xxxx
 13: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xxxx
 14: DP3 TEMP[4].x, TEMP[2].xyzz, IN[3].xyzz
 15: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz
 16: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 17: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 18: MOV TEMP[2].xyz, TEMP[2].xyzz
 19: TEX TEMP[2], TEMP[2], SAMP[1], CUBE
 20: DP4 TEMP[4].x, TEMP[3], CONST[13]
 21: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[11].yyyy
 22: LRP TEMP[4], TEMP[4].xxxx, IN[2], IMM[0].yyyy
 23: MUL TEMP[4], TEMP[3], TEMP[4]
 24: MUL TEMP[5].xyz, TEMP[2].xyzz, TEMP[2].wwww
 25: DP4 TEMP[3].x, TEMP[3], CONST[14]
 26: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[11].zzzz
 27: MAD TEMP[2].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 28: MAX TEMP[3].x, IN[3].wwww, CONST[7].wwww
 29: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 30: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[7].xyzz
 31: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 32: MOV TEMP[5].xy, TEMP[3].xyyy
 33: TEX TEMP[5].x, TEMP[5], SAMP[2], 2D
 34: MAD TEMP[5].x, TEMP[5].xxxx, CONST[6].zzzz, CONST[6].wwww
 35: RCP TEMP[5].x, TEMP[5].xxxx
 36: ADD TEMP[5].x, TEMP[5].xxxx, -IN[4].zzzz
 37: MUL_SAT TEMP[5].x, TEMP[5].xxxx, IN[4].wwww
 38: MUL TEMP[4].x, TEMP[4].wwww, TEMP[5].xxxx
 39: MOV TEMP[2].w, TEMP[4].xxxx
 40: MAD TEMP[5].xy, CONST[9].xzzz, TEMP[4].xxxx, CONST[9].ywww
 41: ADD TEMP[6].x, TEMP[5].xxxx, IMM[0].wwww
 42: SLT TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx
 43: F2I TEMP[6].x, -TEMP[6]
 44: UIF TEMP[6].xxxx :3
 45:   KILL
 46: ENDIF
 47: MUL TEMP[6].xy, TEMP[3].xyyy, CONST[8].yzzz
 48: MOV TEMP[6].xy, TEMP[6].xyyy
 49: TEX TEMP[6].xy, TEMP[6], SAMP[3], 2D
 50: ADD TEMP[6].xy, TEMP[6].xyyy, IMM[1].yyyy
 51: MUL TEMP[4].x, CONST[8].xxxx, TEMP[4].xxxx
 52: MAD TEMP[3].xy, TEMP[6].xyyy, TEMP[4].xxxx, TEMP[3].xyyy
 53: MOV TEMP[3].xy, TEMP[3].xyyy
 54: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 55: MUL TEMP[2], TEMP[2], TEMP[5].xxxx
 56: MAD TEMP[2], TEMP[3], TEMP[5].yyyy, TEMP[2]
 57: MOV OUT[0], TEMP[2]
 58: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %58 = load <32 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
  %63 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
  %67 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = fcmp ugt float %16, 0.000000e+00
  %70 = select i1 %69, float 1.000000e+00, float 0.000000e+00
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %86 = fmul float %13, %47
  %87 = fadd float %86, %48
  %88 = call float @llvm.AMDIL.clamp.(float %70, float 0.000000e+00, float 1.000000e+00)
  %89 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %90 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %91 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %92 = bitcast float %88 to i32
  %93 = icmp ne i32 %92, 0
  %. = select i1 %93, float -1.000000e+00, float 1.000000e+00
  %94 = bitcast float %79 to i32
  %95 = bitcast float %80 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %50, <16 x i8> %52, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = extractelement <4 x float> %98, i32 3
  %103 = fmul float %83, %83
  %104 = fmul float %84, %84
  %105 = fadd float %104, %103
  %106 = fmul float %85, %85
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %83, %108
  %110 = fmul float %84, %108
  %111 = fmul float %85, %108
  %112 = fmul float %109, %.
  %113 = fmul float %110, %.
  %114 = fmul float %111, %.
  %115 = fmul float %112, %75
  %116 = fmul float %113, %76
  %117 = fadd float %116, %115
  %118 = fmul float %114, %77
  %119 = fadd float %117, %118
  %120 = fmul float %119, %112
  %121 = fmul float %119, %113
  %122 = fmul float %119, %114
  %123 = fmul float 2.000000e+00, %120
  %124 = fmul float 2.000000e+00, %121
  %125 = fmul float 2.000000e+00, %122
  %126 = fsub float -0.000000e+00, %123
  %127 = fadd float %75, %126
  %128 = fsub float -0.000000e+00, %124
  %129 = fadd float %76, %128
  %130 = fsub float -0.000000e+00, %125
  %131 = fadd float %77, %130
  %132 = insertelement <4 x float> undef, float %127, i32 0
  %133 = insertelement <4 x float> %132, float %129, i32 1
  %134 = insertelement <4 x float> %133, float %131, i32 2
  %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3
  %136 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %135)
  %137 = extractelement <4 x float> %136, i32 0
  %138 = extractelement <4 x float> %136, i32 1
  %139 = extractelement <4 x float> %136, i32 2
  %140 = extractelement <4 x float> %136, i32 3
  %141 = call float @fabs(float %139)
  %142 = fdiv float 1.000000e+00, %141
  %143 = fmul float %137, %142
  %144 = fadd float %143, 1.500000e+00
  %145 = fmul float %138, %142
  %146 = fadd float %145, 1.500000e+00
  %147 = bitcast float %146 to i32
  %148 = bitcast float %144 to i32
  %149 = bitcast float %140 to i32
  %150 = insertelement <4 x i32> undef, i32 %147, i32 0
  %151 = insertelement <4 x i32> %150, i32 %148, i32 1
  %152 = insertelement <4 x i32> %151, i32 %149, i32 2
  %153 = insertelement <4 x i32> %152, i32 undef, i32 3
  %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %153, <32 x i8> %54, <16 x i8> %56, i32 4)
  %155 = extractelement <4 x float> %154, i32 0
  %156 = extractelement <4 x float> %154, i32 1
  %157 = extractelement <4 x float> %154, i32 2
  %158 = extractelement <4 x float> %154, i32 3
  %159 = fmul float %99, %39
  %160 = fmul float %100, %40
  %161 = fadd float %159, %160
  %162 = fmul float %101, %41
  %163 = fadd float %161, %162
  %164 = fmul float %102, %42
  %165 = fadd float %163, %164
  %166 = fadd float %165, %37
  %167 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00)
  %168 = call float @llvm.AMDGPU.lrp(float %167, float %71, float 1.000000e+00)
  %169 = call float @llvm.AMDGPU.lrp(float %167, float %72, float 1.000000e+00)
  %170 = call float @llvm.AMDGPU.lrp(float %167, float %73, float 1.000000e+00)
  %171 = call float @llvm.AMDGPU.lrp(float %167, float %74, float 1.000000e+00)
  %172 = fmul float %99, %168
  %173 = fmul float %100, %169
  %174 = fmul float %101, %170
  %175 = fmul float %102, %171
  %176 = fmul float %155, %158
  %177 = fmul float %156, %158
  %178 = fmul float %157, %158
  %179 = fmul float %99, %43
  %180 = fmul float %100, %44
  %181 = fadd float %179, %180
  %182 = fmul float %101, %45
  %183 = fadd float %181, %182
  %184 = fmul float %102, %46
  %185 = fadd float %183, %184
  %186 = fadd float %185, %38
  %187 = call float @llvm.AMDIL.clamp.(float %186, float 0.000000e+00, float 1.000000e+00)
  %188 = fmul float %176, %187
  %189 = fadd float %188, %172
  %190 = fmul float %177, %187
  %191 = fadd float %190, %173
  %192 = fmul float %178, %187
  %193 = fadd float %192, %174
  %194 = fcmp uge float %78, %29
  %195 = select i1 %194, float %78, float %29
  %196 = call float @llvm.AMDIL.clamp.(float %195, float 0.000000e+00, float 1.000000e+00)
  %197 = call float @llvm.AMDGPU.lrp(float %196, float %189, float %26)
  %198 = call float @llvm.AMDGPU.lrp(float %196, float %191, float %27)
  %199 = call float @llvm.AMDGPU.lrp(float %196, float %193, float %28)
  %200 = fmul float %12, %22
  %201 = fmul float %87, %23
  %202 = bitcast float %200 to i32
  %203 = bitcast float %201 to i32
  %204 = insertelement <2 x i32> undef, i32 %202, i32 0
  %205 = insertelement <2 x i32> %204, i32 %203, i32 1
  %206 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %205, <32 x i8> %58, <16 x i8> %60, i32 2)
  %207 = extractelement <4 x float> %206, i32 0
  %208 = fmul float %207, %24
  %209 = fadd float %208, %25
  %210 = fdiv float 1.000000e+00, %209
  %211 = fsub float -0.000000e+00, %81
  %212 = fadd float %210, %211
  %213 = fmul float %212, %82
  %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00)
  %215 = fmul float %175, %214
  %216 = fmul float %33, %215
  %217 = fadd float %216, %34
  %218 = fmul float %35, %215
  %219 = fadd float %218, %36
  %220 = fadd float %217, 0xBF847AE140000000
  %221 = fcmp ult float %220, 0.000000e+00
  %222 = select i1 %221, float 1.000000e+00, float 0.000000e+00
  %223 = fsub float -0.000000e+00, %222
  %224 = fptosi float %223 to i32
  %225 = bitcast i32 %224 to float
  %226 = bitcast float %225 to i32
  %227 = icmp ne i32 %226, 0
  br i1 %227, label %IF29, label %ENDIF28

IF29:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF28

ENDIF28:                                          ; preds = %main_body, %IF29
  %228 = fmul float %200, %31
  %229 = fmul float %201, %32
  %230 = bitcast float %228 to i32
  %231 = bitcast float %229 to i32
  %232 = insertelement <2 x i32> undef, i32 %230, i32 0
  %233 = insertelement <2 x i32> %232, i32 %231, i32 1
  %234 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %233, <32 x i8> %62, <16 x i8> %64, i32 2)
  %235 = extractelement <4 x float> %234, i32 0
  %236 = extractelement <4 x float> %234, i32 1
  %237 = fadd float %235, -5.000000e-01
  %238 = fadd float %236, -5.000000e-01
  %239 = fmul float %30, %215
  %240 = fmul float %237, %239
  %241 = fadd float %240, %200
  %242 = fmul float %238, %239
  %243 = fadd float %242, %201
  %244 = bitcast float %241 to i32
  %245 = bitcast float %243 to i32
  %246 = insertelement <2 x i32> undef, i32 %244, i32 0
  %247 = insertelement <2 x i32> %246, i32 %245, i32 1
  %248 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %247, <32 x i8> %66, <16 x i8> %68, i32 2)
  %249 = extractelement <4 x float> %248, i32 0
  %250 = extractelement <4 x float> %248, i32 1
  %251 = extractelement <4 x float> %248, i32 2
  %252 = extractelement <4 x float> %248, i32 3
  %253 = fmul float %197, %217
  %254 = fmul float %198, %217
  %255 = fmul float %199, %217
  %256 = fmul float %215, %217
  %257 = fmul float %249, %219
  %258 = fadd float %257, %253
  %259 = fmul float %250, %219
  %260 = fadd float %259, %254
  %261 = fmul float %251, %219
  %262 = fadd float %261, %255
  %263 = fmul float %252, %219
  %264 = fadd float %263, %256
  %265 = call i32 @llvm.SI.packf16(float %258, float %260)
  %266 = bitcast i32 %265 to float
  %267 = call i32 @llvm.SI.packf16(float %262, float %264)
  %268 = bitcast i32 %267 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %266, float %268, float %266, float %268)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c81c0c00
c81d0c01
100a0f07
d2820008
04160d06
c8140e00
c8150e01
d2820008
04220b05
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000009
0021e6f2
100c1306
10081107
10081304
c81c0400
c81d0401
10160f04
c8280500
c8290501
d282000b
042e1506
100a1105
100a1305
c8200600
c8210601
d2820009
042e1105
10160d09
d2820006
042e0d09
081a0d0a
100c0909
d2820004
041a0909
08180907
10080b09
d2820004
04120b09
081c0908
7e1e0280
d28a0005
043a1b0c
d28c0004
043a1b0c
d28e0006
043a1b0c
d2880007
043a1b0c
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840304
c0c60508
bf8c007f
f0800f00
00430605
bf8c0770
10081308
c82c0900
c82d0901
c8280800
c8290801
c0840300
c0c60500
bf8c007f
f0800f00
00430a0a
c0840100
bf8c0070
c2000935
bf8c007f
100a1600
c2000934
bf8c007f
d2820005
0414010a
c2000936
bf8c007f
d2820005
0414010c
c2000937
bf8c007f
d2820005
0414010d
c200092d
bf8c007f
060a0a00
d206080e
02010105
081e1cf2
c8140200
c8150201
d2820005
043e0b0e
100a0b0c
c2000939
bf8c007f
10201600
c2000938
bf8c007f
d2820010
0440010a
c200093a
bf8c007f
d2820010
0440010c
c200093b
bf8c007f
d2820010
0440010d
c200092e
bf8c007f
06202000
d2060810
02010110
d2820004
04162104
c8140700
c8150701
c200091f
bf8c007f
d00c000c
02000105
7e220200
d2000005
00320b11
d2060811
02010105
082422f2
c200091e
bf8c007f
100a2400
d2820004
04160911
100a1307
c84c0100
c84d0101
d2820013
043e270e
1026270b
d2820005
044e2105
c200091d
bf8c007f
10262400
d2820005
044e0b11
100c1306
c81c0000
c81d0001
d2820007
043e0f0e
100e0f0a
d2820006
041e2106
c200091c
bf8c007f
100e2400
d2820006
041e0d11
c81c0300
c81d0301
d2820007
043e0f0e
10120f0d
c8280a00
c8290a01
c200093c
c200893d
bf8c007f
7e0e0201
d2820003
041c0103
c2000915
bf8c007f
10100600
c2000914
bf8c007f
100e0400
c0860308
c0c80510
bf8c007f
f0800100
00640207
c200091a
c200891b
bf8c0070
7e060201
d2820002
040c0102
7e045502
08041502
c80c0b00
c80d0b01
10000702
d2060800
02010100
10040109
c2000926
c2008927
bf8c007f
7e000201
d2820000
04020400
c2000924
c2008925
bf8c007f
7e020201
d2820001
04060400
060602ff
bc23d70a
d0020000
02010103
d2000003
0001e480
d2060003
22010103
7e061103
d10a0006
02010103
c0860310
c0c80520
c080030c
c0cc0518
c2020922
c2028921
c2040920
bf8c007f
7e140204
7e120205
7e060208
be842406
8984047e
7e1602f3
7c261680
88fe047e
10161508
10141307
f0800300
00060b0a
bf8c0770
061218f1
10060503
d282000a
04220709
061616f1
d2820009
041e070b
f0800f00
00640709
10040302
bf8c0770
d2820002
040a010a
10060304
d2820003
040e0109
5e040503
10060305
d2820003
040e0108
10020306
d2820000
04060107
5e000700
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..18]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[15].xyzz, CONST[14].xyzz
  1: MUL TEMP[1], CONST[8], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[9], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[10], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[11]
  5: MUL TEMP[2].xyz, IN[1].xyzz, CONST[18].wwww
  6: MUL TEMP[3], CONST[0], TEMP[2].xxxx
  7: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
  8: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
  9: MUL TEMP[3], CONST[0], TEMP[0].xxxx
 10: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
 11: MAD TEMP[3], CONST[2], TEMP[0].zzzz, TEMP[3]
 12: ADD TEMP[3].xyz, TEMP[3], CONST[3]
 13: ADD TEMP[3].xyz, TEMP[3].xyzz, -CONST[16].xyzz
 14: MAD TEMP[4].x, TEMP[1].zzzz, CONST[17].xxxx, CONST[17].yyyy
 15: MOV TEMP[3].w, TEMP[4].xxxx
 16: MUL TEMP[4], CONST[4], TEMP[0].xxxx
 17: MAD TEMP[4], CONST[5], TEMP[0].yyyy, TEMP[4]
 18: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[4]
 19: ADD TEMP[0].z, TEMP[0], CONST[7]
 20: MAD TEMP[0].x, TEMP[0].zzzz, CONST[18].xxxx, CONST[18].yyyy
 21: MOV TEMP[0].y, CONST[18].zzzz
 22: MAD TEMP[4].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
 23: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 24: RSQ TEMP[5].x, TEMP[5].xxxx
 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 26: MOV TEMP[4].zw, TEMP[0].yyxy
 27: MOV OUT[4], TEMP[2]
 28: MOV OUT[3], TEMP[4]
 29: MOV OUT[1], CONST[12]
 30: MOV OUT[2], TEMP[3]
 31: MOV OUT[0], TEMP[1]
 32: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 204)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 288)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 292)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 296)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 300)
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = extractelement <4 x float> %68, i32 2
  %72 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %5)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = extractelement <4 x float> %74, i32 1
  %77 = extractelement <4 x float> %74, i32 2
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = fmul float %69, %54
  %84 = fadd float %83, %51
  %85 = fmul float %70, %55
  %86 = fadd float %85, %52
  %87 = fmul float %71, %56
  %88 = fadd float %87, %53
  %89 = fmul float %27, %84
  %90 = fmul float %28, %84
  %91 = fmul float %29, %84
  %92 = fmul float %30, %84
  %93 = fmul float %31, %86
  %94 = fadd float %93, %89
  %95 = fmul float %32, %86
  %96 = fadd float %95, %90
  %97 = fmul float %33, %86
  %98 = fadd float %97, %91
  %99 = fmul float %34, %86
  %100 = fadd float %99, %92
  %101 = fmul float %35, %88
  %102 = fadd float %101, %94
  %103 = fmul float %36, %88
  %104 = fadd float %103, %96
  %105 = fmul float %37, %88
  %106 = fadd float %105, %98
  %107 = fmul float %38, %88
  %108 = fadd float %107, %100
  %109 = fadd float %102, %39
  %110 = fadd float %104, %40
  %111 = fadd float %106, %41
  %112 = fadd float %108, %42
  %113 = fmul float %75, %65
  %114 = fmul float %76, %65
  %115 = fmul float %77, %65
  %116 = fmul float %11, %113
  %117 = fmul float %12, %113
  %118 = fmul float %13, %113
  %119 = fmul float %14, %114
  %120 = fadd float %119, %116
  %121 = fmul float %15, %114
  %122 = fadd float %121, %117
  %123 = fmul float %16, %114
  %124 = fadd float %123, %118
  %125 = fmul float %17, %115
  %126 = fadd float %125, %120
  %127 = fmul float %18, %115
  %128 = fadd float %127, %122
  %129 = fmul float %19, %115
  %130 = fadd float %129, %124
  %131 = fmul float %11, %84
  %132 = fmul float %12, %84
  %133 = fmul float %13, %84
  %134 = fmul float %14, %86
  %135 = fadd float %134, %131
  %136 = fmul float %15, %86
  %137 = fadd float %136, %132
  %138 = fmul float %16, %86
  %139 = fadd float %138, %133
  %140 = fmul float %17, %88
  %141 = fadd float %140, %135
  %142 = fmul float %18, %88
  %143 = fadd float %142, %137
  %144 = fmul float %19, %88
  %145 = fadd float %144, %139
  %146 = fadd float %141, %20
  %147 = fadd float %143, %21
  %148 = fadd float %145, %22
  %149 = fsub float -0.000000e+00, %57
  %150 = fadd float %146, %149
  %151 = fsub float -0.000000e+00, %58
  %152 = fadd float %147, %151
  %153 = fsub float -0.000000e+00, %59
  %154 = fadd float %148, %153
  %155 = fmul float %111, %60
  %156 = fadd float %155, %61
  %157 = fmul float %23, %84
  %158 = fmul float %24, %86
  %159 = fadd float %158, %157
  %160 = fmul float %25, %88
  %161 = fadd float %160, %159
  %162 = fadd float %161, %26
  %163 = fmul float %162, %62
  %164 = fadd float %163, %63
  %165 = fmul float %81, %47
  %166 = fadd float %165, %49
  %167 = fmul float %82, %48
  %168 = fadd float %167, %50
  %169 = fmul float %126, %126
  %170 = fmul float %128, %128
  %171 = fadd float %170, %169
  %172 = fmul float %130, %130
  %173 = fadd float %171, %172
  %174 = call float @llvm.AMDGPU.rsq(float %173)
  %175 = fmul float %126, %174
  %176 = fmul float %128, %174
  %177 = fmul float %130, %174
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %43, float %44, float %45, float %46)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %150, float %152, float %154, float %156)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %166, float %168, float %164, float %64)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %175, float %176, float %177, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %109, float %110, float %111, float %112)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020133
c2028132
c2040131
c2048130
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202013d
c2028139
bf8c0070
7e020205
d2820001
04040904
c202013c
c2028138
bf8c007f
7e040205
d2820002
04080903
c2020122
bf8c007f
100e0404
c2020126
bf8c007f
d2820007
041e0204
c202013e
c202813a
bf8c007f
7e100205
d2820003
04200905
c202012a
bf8c007f
d2820004
041e0604
c202012e
bf8c007f
06080804
c2020144
c2028145
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2028106
bf8c007f
d2820006
041a0205
c204010a
bf8c007f
d2820006
041a0608
c204810e
bf8c007f
060c0c09
c2048142
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060141
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078140
bf8c007f
0a10100f
f800021f
05060708
c2078112
bf8c000f
100a040f
c2078116
bf8c007f
d2820005
0416020f
c207811a
bf8c007f
d2820005
0416060f
c207811e
bf8c007f
060a0a0f
c2078148
c2080149
bf8c007f
7e0c0210
d2820005
04181f05
c0880708
bf8c007f
e00c2000
80040600
c2078135
c2080137
bf8c0070
7e140210
d282000a
04281f07
c2078134
c2080136
bf8c007f
7e160210
d2820006
042c1f06
c207814a
bf8c007f
7e0e020f
f800022f
07050a06
c0880704
bf8c000f
e00c2000
80040700
c203014b
bf8c0070
10001006
100c0e06
100a0c09
d2820005
0416000b
10101206
d2820005
0416100a
100e0c0c
d2820007
041e000d
d2820007
041e100e
10120f07
d2820009
04260b05
100c0c04
d2820000
041a0005
d2820000
04021008
d2820006
04260100
7e0c5b06
10000d00
100a0d05
100c0d07
7e0e0280
f800023f
07000506
c2020123
bf8c000f
10000404
c2020127
bf8c007f
d2820000
04020204
c202012b
bf8c007f
d2820000
04020604
c202012f
bf8c007f
06000004
c2020121
bf8c007f
100a0404
c2020125
bf8c007f
d2820005
04160204
c2020129
bf8c007f
d2820005
04160604
c202012d
bf8c007f
060a0a04
c2020120
bf8c007f
10040404
c2020124
bf8c007f
d2820001
040a0204
c2020128
bf8c007f
d2820001
04060604
c200012c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1]
DCL TEMP[0..1], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MUL TEMP[0], IN[0], TEMP[0]
  3: MAX TEMP[1].x, IN[2].xxxx, CONST[1].wwww
  4: MOV_SAT TEMP[1].x, TEMP[1].xxxx
  5: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[1].xyzz
  6: MOV OUT[0], TEMP[0]
  7: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28)
  %26 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0
  %28 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0
  %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %37 = bitcast float %34 to i32
  %38 = bitcast float %35 to i32
  %39 = insertelement <2 x i32> undef, i32 %37, i32 0
  %40 = insertelement <2 x i32> %39, i32 %38, i32 1
  %41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %27, <16 x i8> %29, i32 2)
  %42 = extractelement <4 x float> %41, i32 0
  %43 = extractelement <4 x float> %41, i32 1
  %44 = extractelement <4 x float> %41, i32 2
  %45 = extractelement <4 x float> %41, i32 3
  %46 = fmul float %30, %42
  %47 = fmul float %31, %43
  %48 = fmul float %32, %44
  %49 = fmul float %33, %45
  %50 = fcmp uge float %36, %25
  %51 = select i1 %50, float %36, float %25
  %52 = call float @llvm.AMDIL.clamp.(float %51, float 0.000000e+00, float 1.000000e+00)
  %53 = call float @llvm.AMDGPU.lrp(float %52, float %46, float %22)
  %54 = call float @llvm.AMDGPU.lrp(float %52, float %47, float %23)
  %55 = call float @llvm.AMDGPU.lrp(float %52, float %48, float %24)
  %56 = call i32 @llvm.SI.packf16(float %53, float %54)
  %57 = bitcast i32 %56 to float
  %58 = call i32 @llvm.SI.packf16(float %55, float %49)
  %59 = bitcast i32 %58 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080800
c8090801
c0840100
bf8c007f
c2000907
bf8c007f
d00c000c
02000102
7e060200
d2000002
00320503
d2060802
02010102
080604f2
c2000905
bf8c007f
10100600
c8140500
c8150501
c8100400
c8110401
c0800300
c0c60500
bf8c007f
f0800f00
00030404
c8240100
c8250101
bf8c0770
10120b09
d2820008
04221302
c2000904
bf8c007f
10120600
c8280000
c8290001
1014090a
d2820009
04261502
5e101109
c2000906
bf8c007f
10060600
c8240200
c8250201
10120d09
d2820002
040e1302
c80c0300
c80d0301
10000f03
5e000102
f8001c0f
00080008
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..19]
DCL TEMP[0..5], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {0, 1, 2, 3}
  0: SGE TEMP[0].x, IN[0].wwww, CONST[6].yyyy
  1: F2I TEMP[0].x, -TEMP[0]
  2: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
  3: SGE TEMP[1].x, IN[0].wwww, CONST[6].zzzz
  4: F2I TEMP[1].x, -TEMP[1]
  5: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx
  6: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
  7: F2I TEMP[0].x, TEMP[0].xxxx
  8: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
  9: I2F TEMP[2].y, TEMP[1].yyyy
 10: CMP TEMP[2].x, TEMP[2].yyyy, CONST[15].yyyy, CONST[15].xxxx
 11: I2F TEMP[3].z, TEMP[1].zzzz
 12: CMP TEMP[2].x, TEMP[3].zzzz, CONST[15].zzzz, TEMP[2].xxxx
 13: I2F TEMP[1].w, TEMP[1].wwww
 14: CMP TEMP[2].x, TEMP[1].wwww, CONST[15].wwww, TEMP[2].xxxx
 15: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
 16: I2F TEMP[4].y, TEMP[1].yyyy
 17: CMP TEMP[3].x, TEMP[4].yyyy, CONST[6].yyyy, CONST[6].xxxx
 18: I2F TEMP[4].z, TEMP[1].zzzz
 19: CMP TEMP[3].x, TEMP[4].zzzz, CONST[6].zzzz, TEMP[3].xxxx
 20: I2F TEMP[1].w, TEMP[1].wwww
 21: CMP TEMP[3].x, TEMP[1].wwww, CONST[6].wwww, TEMP[3].xxxx
 22: ADD TEMP[1].x, IN[0].wwww, -TEMP[3].xxxx
 23: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 24: USEQ TEMP[2].yzw, TEMP[0].xxxx, IMM[1]
 25: I2F TEMP[4].y, TEMP[2].yyyy
 26: CMP TEMP[3].x, TEMP[4].yyyy, CONST[11].yyyy, CONST[11].xxxx
 27: I2F TEMP[4].z, TEMP[2].zzzz
 28: CMP TEMP[3].x, TEMP[4].zzzz, CONST[11].zzzz, TEMP[3].xxxx
 29: I2F TEMP[2].w, TEMP[2].wwww
 30: CMP TEMP[3].x, TEMP[2].wwww, CONST[11].wwww, TEMP[3].xxxx
 31: UADD TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy
 32: USEQ TEMP[2].yzw, TEMP[2].xxxx, IMM[1]
 33: I2F TEMP[5].y, TEMP[2].yyyy
 34: CMP TEMP[4].x, TEMP[5].yyyy, CONST[11].yyyy, CONST[11].xxxx
 35: I2F TEMP[5].z, TEMP[2].zzzz
 36: CMP TEMP[4].x, TEMP[5].zzzz, CONST[11].zzzz, TEMP[4].xxxx
 37: I2F TEMP[2].w, TEMP[2].wwww
 38: CMP TEMP[4].x, TEMP[2].wwww, CONST[11].wwww, TEMP[4].xxxx
 39: ADD TEMP[2].xy, IN[1].xyyy, -CONST[12].xyyy
 40: LRP TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx
 41: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xxxx
 42: ADD TEMP[3].x, IMM[0].xxxx, -IN[2].xxxx
 43: MUL TEMP[3].xy, CONST[13].xyyy, TEMP[3].xxxx
 44: MAD TEMP[3].xy, CONST[13].zwww, IN[2].xxxx, TEMP[3].xyyy
 45: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy
 46: ADD TEMP[3].xyz, IN[0].xyzz, -CONST[16].xyzz
 47: MUL TEMP[4].xyz, IN[3].zxyy, TEMP[3].yzxx
 48: MAD TEMP[3].xyz, IN[3].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz
 49: DP3 TEMP[4].x, IN[3].xyzz, IN[3].xyzz
 50: RSQ TEMP[4].x, TEMP[4].xxxx
 51: MUL TEMP[4].xyz, IN[3].xyzz, TEMP[4].xxxx
 52: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz
 53: RSQ TEMP[5].x, TEMP[5].xxxx
 54: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 55: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[2].xxxx
 56: MAD TEMP[3].xyz, -TEMP[4].xyzz, TEMP[2].yyyy, TEMP[3].xyzz
 57: ADD TEMP[2].xyz, IN[0].xyzz, TEMP[3].xyzz
 58: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 59: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 60: MAD TEMP[2], CONST[2], TEMP[2].zzzz, TEMP[3]
 61: ADD TEMP[2], TEMP[2], CONST[3]
 62: UADD TEMP[3].x, TEMP[0].xxxx, IMM[1].yyyy
 63: UADD TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz
 64: UARL ADDR[0].x, TEMP[3].xxxx
 65: UARL ADDR[0].x, TEMP[3].xxxx
 66: MOV TEMP[3], CONST[ADDR[0].x+6]
 67: UARL ADDR[0].x, TEMP[0].xxxx
 68: LRP TEMP[0], TEMP[1].xxxx, CONST[ADDR[0].x+6], TEMP[3]
 69: MUL TEMP[0], TEMP[0], CONST[4]
 70: MUL TEMP[1].x, IN[2].wwww, CONST[14].zzzz
 71: FLR TEMP[1].xy, TEMP[1].xxxx
 72: MUL TEMP[1].xy, CONST[14].xxxx, TEMP[1].xyyy
 73: FLR TEMP[3].xy, TEMP[1].xyyy
 74: MOV TEMP[4].yw, TEMP[3].yxyy
 75: ADD TEMP[1].xy, TEMP[1].xyyy, -TEMP[3].xyyy
 76: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[14].wwww
 77: FLR TEMP[1].xy, TEMP[1].xyyy
 78: MOV TEMP[4].xz, TEMP[1].xxyx
 79: ADD TEMP[1], IN[1].xyxy, TEMP[4]
 80: MUL TEMP[1], TEMP[1], CONST[14].xyxy
 81: MAD TEMP[3].x, TEMP[2].zzzz, CONST[5].xxxx, CONST[5].yyyy
 82: MOV OUT[2], TEMP[1]
 83: MOV OUT[3], TEMP[3]
 84: MOV OUT[1], TEMP[0]
 85: MOV OUT[0], TEMP[2]
 86: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = extractelement <4 x float> %60, i32 3
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 3
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = fcmp uge float %64, %34
  %82 = select i1 %81, float 1.000000e+00, float 0.000000e+00
  %83 = fsub float -0.000000e+00, %82
  %84 = fptosi float %83 to i32
  %85 = bitcast i32 %84 to float
  %86 = bitcast float %85 to i32
  %87 = and i32 %86, 1065353216
  %88 = bitcast i32 %87 to float
  %89 = fcmp uge float %64, %35
  %90 = select i1 %89, float 1.000000e+00, float 0.000000e+00
  %91 = fsub float -0.000000e+00, %90
  %92 = fptosi float %91 to i32
  %93 = bitcast i32 %92 to float
  %94 = bitcast float %93 to i32
  %95 = and i32 %94, 1065353216
  %96 = bitcast i32 %95 to float
  %97 = fadd float %88, %96
  %98 = fptosi float %97 to i32
  %99 = bitcast i32 %98 to float
  %100 = bitcast float %99 to i32
  %101 = icmp eq i32 %100, 1
  %102 = sext i1 %101 to i32
  %103 = bitcast float %99 to i32
  %104 = icmp eq i32 %103, 2
  %105 = sext i1 %104 to i32
  %106 = bitcast float %99 to i32
  %107 = icmp eq i32 %106, 3
  %108 = sext i1 %107 to i32
  %109 = bitcast i32 %102 to float
  %110 = bitcast i32 %105 to float
  %111 = bitcast i32 %108 to float
  %112 = bitcast float %109 to i32
  %113 = sitofp i32 %112 to float
  %114 = call float @llvm.AMDGPU.cndlt(float %113, float %52, float %51)
  %115 = bitcast float %110 to i32
  %116 = sitofp i32 %115 to float
  %117 = call float @llvm.AMDGPU.cndlt(float %116, float %53, float %114)
  %118 = bitcast float %111 to i32
  %119 = sitofp i32 %118 to float
  %120 = call float @llvm.AMDGPU.cndlt(float %119, float %54, float %117)
  %121 = bitcast float %99 to i32
  %122 = icmp eq i32 %121, 1
  %123 = sext i1 %122 to i32
  %124 = bitcast float %99 to i32
  %125 = icmp eq i32 %124, 2
  %126 = sext i1 %125 to i32
  %127 = bitcast float %99 to i32
  %128 = icmp eq i32 %127, 3
  %129 = sext i1 %128 to i32
  %130 = bitcast i32 %123 to float
  %131 = bitcast i32 %126 to float
  %132 = bitcast i32 %129 to float
  %133 = bitcast float %130 to i32
  %134 = sitofp i32 %133 to float
  %135 = call float @llvm.AMDGPU.cndlt(float %134, float %34, float %33)
  %136 = bitcast float %131 to i32
  %137 = sitofp i32 %136 to float
  %138 = call float @llvm.AMDGPU.cndlt(float %137, float %35, float %135)
  %139 = bitcast float %132 to i32
  %140 = sitofp i32 %139 to float
  %141 = call float @llvm.AMDGPU.cndlt(float %140, float %36, float %138)
  %142 = fsub float -0.000000e+00, %141
  %143 = fadd float %64, %142
  %144 = fmul float %143, %120
  %145 = call float @llvm.AMDIL.clamp.(float %144, float 0.000000e+00, float 1.000000e+00)
  %146 = bitcast float %99 to i32
  %147 = icmp eq i32 %146, 1
  %148 = sext i1 %147 to i32
  %149 = bitcast float %99 to i32
  %150 = icmp eq i32 %149, 2
  %151 = sext i1 %150 to i32
  %152 = bitcast float %99 to i32
  %153 = icmp eq i32 %152, 3
  %154 = sext i1 %153 to i32
  %155 = bitcast i32 %148 to float
  %156 = bitcast i32 %151 to float
  %157 = bitcast i32 %154 to float
  %158 = bitcast float %155 to i32
  %159 = sitofp i32 %158 to float
  %160 = call float @llvm.AMDGPU.cndlt(float %159, float %38, float %37)
  %161 = bitcast float %156 to i32
  %162 = sitofp i32 %161 to float
  %163 = call float @llvm.AMDGPU.cndlt(float %162, float %39, float %160)
  %164 = bitcast float %157 to i32
  %165 = sitofp i32 %164 to float
  %166 = call float @llvm.AMDGPU.cndlt(float %165, float %40, float %163)
  %167 = bitcast float %99 to i32
  %168 = add i32 %167, 1
  %169 = bitcast i32 %168 to float
  %170 = bitcast float %169 to i32
  %171 = icmp eq i32 %170, 1
  %172 = sext i1 %171 to i32
  %173 = bitcast float %169 to i32
  %174 = icmp eq i32 %173, 2
  %175 = sext i1 %174 to i32
  %176 = bitcast float %169 to i32
  %177 = icmp eq i32 %176, 3
  %178 = sext i1 %177 to i32
  %179 = bitcast i32 %172 to float
  %180 = bitcast i32 %175 to float
  %181 = bitcast i32 %178 to float
  %182 = bitcast float %179 to i32
  %183 = sitofp i32 %182 to float
  %184 = call float @llvm.AMDGPU.cndlt(float %183, float %38, float %37)
  %185 = bitcast float %180 to i32
  %186 = sitofp i32 %185 to float
  %187 = call float @llvm.AMDGPU.cndlt(float %186, float %39, float %184)
  %188 = bitcast float %181 to i32
  %189 = sitofp i32 %188 to float
  %190 = call float @llvm.AMDGPU.cndlt(float %189, float %40, float %187)
  %191 = fsub float -0.000000e+00, %41
  %192 = fadd float %68, %191
  %193 = fsub float -0.000000e+00, %42
  %194 = fadd float %69, %193
  %195 = call float @llvm.AMDGPU.lrp(float %145, float %190, float %166)
  %196 = fmul float %192, %195
  %197 = fmul float %194, %195
  %198 = fsub float -0.000000e+00, %73
  %199 = fadd float 1.000000e+00, %198
  %200 = fmul float %43, %199
  %201 = fmul float %44, %199
  %202 = fmul float %45, %73
  %203 = fadd float %202, %200
  %204 = fmul float %46, %73
  %205 = fadd float %204, %201
  %206 = fmul float %196, %203
  %207 = fmul float %197, %205
  %208 = fsub float -0.000000e+00, %55
  %209 = fadd float %61, %208
  %210 = fsub float -0.000000e+00, %56
  %211 = fadd float %62, %210
  %212 = fsub float -0.000000e+00, %57
  %213 = fadd float %63, %212
  %214 = fmul float %80, %211
  %215 = fmul float %78, %213
  %216 = fmul float %79, %209
  %217 = fsub float -0.000000e+00, %214
  %218 = fmul float %79, %213
  %219 = fadd float %218, %217
  %220 = fsub float -0.000000e+00, %215
  %221 = fmul float %80, %209
  %222 = fadd float %221, %220
  %223 = fsub float -0.000000e+00, %216
  %224 = fmul float %78, %211
  %225 = fadd float %224, %223
  %226 = fmul float %78, %78
  %227 = fmul float %79, %79
  %228 = fadd float %227, %226
  %229 = fmul float %80, %80
  %230 = fadd float %228, %229
  %231 = call float @llvm.AMDGPU.rsq(float %230)
  %232 = fmul float %78, %231
  %233 = fmul float %79, %231
  %234 = fmul float %80, %231
  %235 = fmul float %219, %219
  %236 = fmul float %222, %222
  %237 = fadd float %236, %235
  %238 = fmul float %225, %225
  %239 = fadd float %237, %238
  %240 = call float @llvm.AMDGPU.rsq(float %239)
  %241 = fmul float %219, %240
  %242 = fmul float %222, %240
  %243 = fmul float %225, %240
  %244 = fmul float %241, %206
  %245 = fmul float %242, %206
  %246 = fmul float %243, %206
  %247 = fsub float -0.000000e+00, %232
  %248 = fmul float %247, %207
  %249 = fadd float %248, %244
  %250 = fsub float -0.000000e+00, %233
  %251 = fmul float %250, %207
  %252 = fadd float %251, %245
  %253 = fsub float -0.000000e+00, %234
  %254 = fmul float %253, %207
  %255 = fadd float %254, %246
  %256 = fadd float %61, %249
  %257 = fadd float %62, %252
  %258 = fadd float %63, %255
  %259 = fmul float %11, %256
  %260 = fmul float %12, %256
  %261 = fmul float %13, %256
  %262 = fmul float %14, %256
  %263 = fmul float %15, %257
  %264 = fadd float %263, %259
  %265 = fmul float %16, %257
  %266 = fadd float %265, %260
  %267 = fmul float %17, %257
  %268 = fadd float %267, %261
  %269 = fmul float %18, %257
  %270 = fadd float %269, %262
  %271 = fmul float %19, %258
  %272 = fadd float %271, %264
  %273 = fmul float %20, %258
  %274 = fadd float %273, %266
  %275 = fmul float %21, %258
  %276 = fadd float %275, %268
  %277 = fmul float %22, %258
  %278 = fadd float %277, %270
  %279 = fadd float %272, %23
  %280 = fadd float %274, %24
  %281 = fadd float %276, %25
  %282 = fadd float %278, %26
  %283 = bitcast float %99 to i32
  %284 = add i32 %283, 1
  %285 = bitcast i32 %284 to float
  %286 = bitcast float %99 to i32
  %287 = add i32 %286, 2
  %288 = bitcast i32 %287 to float
  %289 = bitcast float %285 to i32
  %290 = shl i32 %289, 4
  %291 = add i32 %290, 96
  %292 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %291)
  %293 = shl i32 %289, 4
  %294 = add i32 %293, 100
  %295 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %294)
  %296 = shl i32 %289, 4
  %297 = add i32 %296, 104
  %298 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %297)
  %299 = shl i32 %289, 4
  %300 = add i32 %299, 108
  %301 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %300)
  %302 = bitcast float %288 to i32
  %303 = shl i32 %302, 4
  %304 = add i32 %303, 96
  %305 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %304)
  %306 = call float @llvm.AMDGPU.lrp(float %145, float %305, float %292)
  %307 = shl i32 %302, 4
  %308 = add i32 %307, 100
  %309 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %308)
  %310 = call float @llvm.AMDGPU.lrp(float %145, float %309, float %295)
  %311 = shl i32 %302, 4
  %312 = add i32 %311, 104
  %313 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %312)
  %314 = call float @llvm.AMDGPU.lrp(float %145, float %313, float %298)
  %315 = shl i32 %302, 4
  %316 = add i32 %315, 108
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = call float @llvm.AMDGPU.lrp(float %145, float %317, float %301)
  %319 = fmul float %306, %27
  %320 = fmul float %310, %28
  %321 = fmul float %314, %29
  %322 = fmul float %318, %30
  %323 = fmul float %74, %49
  %324 = call float @floor(float %323)
  %325 = call float @floor(float %323)
  %326 = fmul float %47, %324
  %327 = fmul float %47, %325
  %328 = call float @floor(float %326)
  %329 = call float @floor(float %327)
  %330 = fsub float -0.000000e+00, %328
  %331 = fadd float %326, %330
  %332 = fsub float -0.000000e+00, %329
  %333 = fadd float %327, %332
  %334 = fmul float %331, %50
  %335 = fmul float %333, %50
  %336 = call float @floor(float %334)
  %337 = call float @floor(float %335)
  %338 = fadd float %68, %336
  %339 = fadd float %69, %328
  %340 = fadd float %68, %337
  %341 = fadd float %69, %329
  %342 = fmul float %338, %47
  %343 = fmul float %339, %48
  %344 = fmul float %340, %47
  %345 = fmul float %341, %48
  %346 = fmul float %281, %31
  %347 = fadd float %346, %32
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %319, float %320, float %321, float %322)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %342, float %343, float %344, float %345)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %347, float %329, float %298, float %301)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %279, float %280, float %281, float %282)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readonly
declare float @floor(float) #3

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840700
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c206011a
bf8c007f
d00c0004
02001904
d2000005
0011e480
d2060005
22010105
7e0a1105
360a0af2
c2068119
bf8c007f
d00c0004
02001b04
d2000006
0011e480
d2060006
22010106
7e0c1106
360c0cf2
060a0b06
7e0c1105
d1040004
02010306
d2000005
00118280
7e0a0b05
d0080004
02020a80
c204013d
bf8c007f
7e0a0208
c204013c
bf8c007f
7e0e0208
d2000005
00120b07
d1040008
02010506
d2000007
00218280
7e0e0b07
d0080008
02020e80
c205013e
bf8c007f
7e0e020a
d2000005
00220f05
d104000a
02010706
d2000007
00298280
7e0e0b07
d008000a
02020e80
c207013f
bf8c007f
7e0e020e
d2000005
002a0f05
7e0e020d
c2068118
bf8c007f
7e10020d
d2000007
00120f08
7e10020c
d2000007
00221107
c206011b
bf8c007f
7e10020c
d2000007
002a1107
080e0f04
100a0b07
d2060808
02010105
081c10f2
4a120c81
340e1284
4a0a0eff
0000006c
e0301000
80000505
bf8c0770
10160b0e
4a0c0c82
34140c84
4a0c14ff
0000006c
e0301000
80000606
bf8c0770
d2820006
042e0d08
c2060113
bf8c007f
10160c0c
4a0c0eff
00000068
e0301000
80000606
bf8c0770
10180d0e
4a1a14ff
00000068
e0301000
80000d0d
bf8c0770
d282000c
04321b08
c2060112
bf8c007f
1018180c
4a1a0eff
00000064
e0301000
80000d0d
bf8c0770
101a1b0e
4a1e14ff
00000064
e0301000
80000f0f
bf8c0770
d282000d
04361f08
c2060111
bf8c007f
101a1a0c
4a0e0eff
00000060
e0301000
80000707
bf8c0770
100e0f0e
4a1414ff
00000060
e0301000
80000a0a
bf8c0770
d2820007
041e1508
c2060110
bf8c007f
100e0e0c
f800020f
0b0c0d07
c0860704
bf8c000f
e00c2000
80030f00
c0860708
bf8c0070
e00c2000
80030a00
c206013a
bf8c0070
100e1a0c
7e0e4907
c2060138
bf8c007f
10280e0c
7e0e4914
06260f10
c2068139
bf8c007f
1026260d
08280f14
c206813b
bf8c007f
1028280d
7e284914
0628290f
1028280c
f800021f
13141314
c206012d
bf8c000f
7e28020c
c206012c
bf8c007f
7e2c020c
d2000013
00122916
c202012e
bf8c007f
7e2a0204
d2000017
00222b13
c202012f
bf8c007f
7e260204
d2000017
002a2717
101c2f0e
d1040004
02010309
d2000017
00118280
7e2e0b17
d0080004
02022e80
d2000014
00122916
d1040004
02010509
d2000016
00118280
7e2c0b16
d0080004
02022c80
d2000014
00122b14
d1040004
02010709
d2000009
00118280
7e120b09
d0080004
02021280
d2000009
00122714
d282000e
043a1308
c2020131
bf8c007f
0a102004
10101d08
082e14f2
c2020135
bf8c007f
10122e04
c2020137
bf8c007f
d2820009
04261404
10101308
c082070c
bf8c007f
e00c2000
80011300
bf8c0770
10002713
d2820000
04022914
d2820000
04022b15
7e005b00
10120114
10121109
c2020130
bf8c007f
0a1e1e04
101c1d0f
c2020134
bf8c007f
101e2e04
c2020136
bf8c007f
d282000a
043e1404
1014150e
c2020142
bf8c007f
0a180604
10161913
c2020140
bf8c007f
0a1a0204
101c1b15
0816170e
c2020141
bf8c007f
0a1e0404
101c1f15
10181914
081c1d0c
10181d0e
d2820010
0432170b
10181b14
101a1f13
0818190d
d282000d
0442190c
7e1a5b0d
10161b0b
1016150b
0812130b
06121302
10160113
1016110b
101c1b0e
101c150e
0816170e
06161701
c2020102
bf8c007f
101c1604
c2020106
bf8c007f
d282000e
043a1204
10000115
10001100
10101b0c
10101508
08000108
06000103
c202010a
bf8c007f
d2820001
043a0004
c202010e
bf8c007f
06020204
c2020114
c2028115
bf8c007f
7e040205
d2820002
04080901
f800022f
05060702
c2020103
bf8c000f
10041604
c2020107
bf8c007f
d2820002
040a1204
c202010b
bf8c007f
d2820002
040a0004
c202010f
bf8c007f
06040404
c2020101
bf8c007f
10061604
c2020105
bf8c007f
d2820003
040e1204
c2020109
bf8c007f
d2820003
040e0004
c202010d
bf8c007f
06060604
c2020100
bf8c007f
10081604
c2020104
bf8c007f
d2820004
04121204
c2020108
bf8c007f
d2820000
04120004
c200010c
bf8c007f
06000000
f80008cf
02010300
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[10..11]
DCL CONST[3..9]
DCL TEMP[0..1]
DCL TEMP[2..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[4].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MAD TEMP[4].yw, IMM[0].zzzz, TEMP[4], IMM[0].xxxx
 13: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 14: RSQ TEMP[5].x, TEMP[5].xxxx
 15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 16: DP2 TEMP[5].x, TEMP[4].ywww, TEMP[4].ywww
 17: ADD TEMP[5].x, IMM[0].yyyy, -TEMP[5].xxxx
 18: MAX TEMP[5].x, IMM[0].wwww, TEMP[5].xxxx
 19: RSQ TEMP[6].x, TEMP[5].xxxx
 20: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 21: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 23: DP3 TEMP[5].x, IN[5].yzww, IN[5].yzww
 24: RSQ TEMP[5].x, TEMP[5].xxxx
 25: MUL TEMP[5].xyz, IN[5].yzww, TEMP[5].xxxx
 26: DP3 TEMP[6].x, IN[6].xyzz, IN[6].xyzz
 27: RSQ TEMP[6].x, TEMP[6].xxxx
 28: MUL TEMP[6].xyz, IN[6].xyzz, TEMP[6].xxxx
 29: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[4].wwww
 30: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].yyyy, TEMP[6].xyzz
 31: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 32: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 33: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 34: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 35: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 36: MOV TEMP[2].xyz, TEMP[2].xyzz
 37: TEX TEMP[2], TEMP[2], SAMP[1], CUBE
 38: DP4 TEMP[3].x, IMM[0].yyyy, CONST[8]
 39: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[6].yyyy
 40: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 41: MOV TEMP[4].w, TEMP[3].wwww
 42: MUL TEMP[5].xy, TEMP[0].xyyy, CONST[3].xyyy
 43: MOV TEMP[5].xy, TEMP[5].xyyy
 44: TEX TEMP[5], TEMP[5], SAMP[2], 2D
 45: DP4 TEMP[6].x, IMM[0].yyyy, CONST[7]
 46: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[6].xxxx
 47: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].wwww
 48: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[1].xyzz
 49: MAX TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
 50: RCP TEMP[7].x, TEMP[7].xxxx
 51: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 52: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz
 53: MAD TEMP[3].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[3].xyzz
 54: MUL TEMP[4].xyz, TEMP[3].xyzz, IMM[2].xxxx
 55: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 56: DP4 TEMP[3].x, IMM[0].yyyy, CONST[9]
 57: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[6].zzzz
 58: MAD TEMP[4].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 59: MAX TEMP[2].x, IN[3].wwww, CONST[4].wwww
 60: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 61: LRP TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz, CONST[4].xyzz
 62: MOV OUT[0], TEMP[4]
 63: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = fcmp ugt float %16, 0.000000e+00
  %58 = select i1 %57, float 1.000000e+00, float 0.000000e+00
  %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %66 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %67 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %68 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %78 = fmul float %13, %43
  %79 = fadd float %78, %44
  %80 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %81 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %82 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %83 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %84 = bitcast float %80 to i32
  %85 = icmp ne i32 %84, 0
  %. = select i1 %85, float -1.000000e+00, float 1.000000e+00
  %86 = bitcast float %67 to i32
  %87 = bitcast float %68 to i32
  %88 = insertelement <2 x i32> undef, i32 %86, i32 0
  %89 = insertelement <2 x i32> %88, i32 %87, i32 1
  %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %46, <16 x i8> %48, i32 2)
  %91 = extractelement <4 x float> %90, i32 1
  %92 = extractelement <4 x float> %90, i32 3
  %93 = fmul float 2.000000e+00, %91
  %94 = fadd float %93, -1.000000e+00
  %95 = fmul float 2.000000e+00, %92
  %96 = fadd float %95, -1.000000e+00
  %97 = fmul float %69, %69
  %98 = fmul float %70, %70
  %99 = fadd float %98, %97
  %100 = fmul float %71, %71
  %101 = fadd float %99, %100
  %102 = call float @llvm.AMDGPU.rsq(float %101)
  %103 = fmul float %69, %102
  %104 = fmul float %70, %102
  %105 = fmul float %71, %102
  %106 = fmul float %94, %94
  %107 = fmul float %96, %96
  %108 = fadd float %106, %107
  %109 = fsub float -0.000000e+00, %108
  %110 = fadd float 1.000000e+00, %109
  %111 = fcmp uge float 0.000000e+00, %110
  %112 = select i1 %111, float 0.000000e+00, float %110
  %113 = call float @llvm.AMDGPU.rsq(float %112)
  %114 = fmul float %113, %112
  %115 = fsub float -0.000000e+00, %112
  %116 = call float @llvm.AMDGPU.cndlt(float %115, float %114, float 0.000000e+00)
  %117 = fmul float %103, %116
  %118 = fmul float %104, %116
  %119 = fmul float %105, %116
  %120 = fmul float %72, %72
  %121 = fmul float %73, %73
  %122 = fadd float %121, %120
  %123 = fmul float %74, %74
  %124 = fadd float %122, %123
  %125 = call float @llvm.AMDGPU.rsq(float %124)
  %126 = fmul float %72, %125
  %127 = fmul float %73, %125
  %128 = fmul float %74, %125
  %129 = fmul float %75, %75
  %130 = fmul float %76, %76
  %131 = fadd float %130, %129
  %132 = fmul float %77, %77
  %133 = fadd float %131, %132
  %134 = call float @llvm.AMDGPU.rsq(float %133)
  %135 = fmul float %75, %134
  %136 = fmul float %76, %134
  %137 = fmul float %77, %134
  %138 = fmul float %135, %96
  %139 = fmul float %136, %96
  %140 = fmul float %137, %96
  %141 = fmul float %126, %94
  %142 = fadd float %141, %138
  %143 = fmul float %127, %94
  %144 = fadd float %143, %139
  %145 = fmul float %128, %94
  %146 = fadd float %145, %140
  %147 = fmul float %117, %.
  %148 = fadd float %147, %142
  %149 = fmul float %118, %.
  %150 = fadd float %149, %144
  %151 = fmul float %119, %.
  %152 = fadd float %151, %146
  %153 = fmul float %148, %63
  %154 = fmul float %150, %64
  %155 = fadd float %154, %153
  %156 = fmul float %152, %65
  %157 = fadd float %155, %156
  %158 = fmul float %157, %148
  %159 = fmul float %157, %150
  %160 = fmul float %157, %152
  %161 = fmul float 2.000000e+00, %158
  %162 = fmul float 2.000000e+00, %159
  %163 = fmul float 2.000000e+00, %160
  %164 = fsub float -0.000000e+00, %161
  %165 = fadd float %63, %164
  %166 = fsub float -0.000000e+00, %162
  %167 = fadd float %64, %166
  %168 = fsub float -0.000000e+00, %163
  %169 = fadd float %65, %168
  %170 = insertelement <4 x float> undef, float %165, i32 0
  %171 = insertelement <4 x float> %170, float %167, i32 1
  %172 = insertelement <4 x float> %171, float %169, i32 2
  %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 3
  %174 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %173)
  %175 = extractelement <4 x float> %174, i32 0
  %176 = extractelement <4 x float> %174, i32 1
  %177 = extractelement <4 x float> %174, i32 2
  %178 = extractelement <4 x float> %174, i32 3
  %179 = call float @fabs(float %177)
  %180 = fdiv float 1.000000e+00, %179
  %181 = fmul float %175, %180
  %182 = fadd float %181, 1.500000e+00
  %183 = fmul float %176, %180
  %184 = fadd float %183, 1.500000e+00
  %185 = bitcast float %184 to i32
  %186 = bitcast float %182 to i32
  %187 = bitcast float %178 to i32
  %188 = insertelement <4 x i32> undef, i32 %185, i32 0
  %189 = insertelement <4 x i32> %188, i32 %186, i32 1
  %190 = insertelement <4 x i32> %189, i32 %187, i32 2
  %191 = insertelement <4 x i32> %190, i32 undef, i32 3
  %192 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %191, <32 x i8> %50, <16 x i8> %52, i32 4)
  %193 = extractelement <4 x float> %192, i32 0
  %194 = extractelement <4 x float> %192, i32 1
  %195 = extractelement <4 x float> %192, i32 2
  %196 = extractelement <4 x float> %192, i32 3
  %197 = fmul float 1.000000e+00, %35
  %198 = fmul float 1.000000e+00, %36
  %199 = fadd float %197, %198
  %200 = fmul float 1.000000e+00, %37
  %201 = fadd float %199, %200
  %202 = fmul float 1.000000e+00, %38
  %203 = fadd float %201, %202
  %204 = fadd float %203, %29
  %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00)
  %206 = call float @llvm.AMDGPU.lrp(float %205, float %59, float 1.000000e+00)
  %207 = call float @llvm.AMDGPU.lrp(float %205, float %60, float 1.000000e+00)
  %208 = call float @llvm.AMDGPU.lrp(float %205, float %61, float 1.000000e+00)
  %209 = call float @llvm.AMDGPU.lrp(float %205, float %62, float 1.000000e+00)
  %210 = fmul float %12, %22
  %211 = fmul float %79, %23
  %212 = bitcast float %210 to i32
  %213 = bitcast float %211 to i32
  %214 = insertelement <2 x i32> undef, i32 %212, i32 0
  %215 = insertelement <2 x i32> %214, i32 %213, i32 1
  %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %54, <16 x i8> %56, i32 2)
  %217 = extractelement <4 x float> %216, i32 0
  %218 = extractelement <4 x float> %216, i32 1
  %219 = extractelement <4 x float> %216, i32 2
  %220 = extractelement <4 x float> %216, i32 3
  %221 = fmul float 1.000000e+00, %31
  %222 = fmul float 1.000000e+00, %32
  %223 = fadd float %221, %222
  %224 = fmul float 1.000000e+00, %33
  %225 = fadd float %223, %224
  %226 = fmul float 1.000000e+00, %34
  %227 = fadd float %225, %226
  %228 = fadd float %227, %28
  %229 = call float @llvm.AMDIL.clamp.(float %228, float 0.000000e+00, float 1.000000e+00)
  %230 = fmul float %229, %220
  %231 = fmul float %217, 0x3FCB367A00000000
  %232 = fmul float %218, 0x3FE6E2EB20000000
  %233 = fadd float %232, %231
  %234 = fmul float %219, 0x3FB27BB300000000
  %235 = fadd float %233, %234
  %236 = fcmp uge float %235, 0x3F50624DE0000000
  %237 = select i1 %236, float %235, float 0x3F50624DE0000000
  %238 = fdiv float 1.000000e+00, %237
  %239 = fmul float %217, %238
  %240 = fmul float %218, %238
  %241 = fmul float %219, %238
  %242 = fmul float %206, %217
  %243 = fmul float %207, %218
  %244 = fmul float %208, %219
  %245 = fmul float %230, %239
  %246 = fadd float %245, %242
  %247 = fmul float %230, %240
  %248 = fadd float %247, %243
  %249 = fmul float %230, %241
  %250 = fadd float %249, %244
  %251 = fmul float %246, 4.000000e+00
  %252 = fmul float %248, 4.000000e+00
  %253 = fmul float %250, 4.000000e+00
  %254 = fmul float %193, %196
  %255 = fmul float %194, %196
  %256 = fmul float %195, %196
  %257 = fmul float 1.000000e+00, %39
  %258 = fmul float 1.000000e+00, %40
  %259 = fadd float %257, %258
  %260 = fmul float 1.000000e+00, %41
  %261 = fadd float %259, %260
  %262 = fmul float 1.000000e+00, %42
  %263 = fadd float %261, %262
  %264 = fadd float %263, %30
  %265 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00)
  %266 = fmul float %254, %265
  %267 = fadd float %266, %251
  %268 = fmul float %255, %265
  %269 = fadd float %268, %252
  %270 = fmul float %256, %265
  %271 = fadd float %270, %253
  %272 = fcmp uge float %66, %27
  %273 = select i1 %272, float %66, float %27
  %274 = call float @llvm.AMDIL.clamp.(float %273, float 0.000000e+00, float 1.000000e+00)
  %275 = call float @llvm.AMDGPU.lrp(float %274, float %267, float %24)
  %276 = call float @llvm.AMDGPU.lrp(float %274, float %269, float %25)
  %277 = call float @llvm.AMDGPU.lrp(float %274, float %271, float %26)
  %278 = call i32 @llvm.SI.packf16(float %275, float %276)
  %279 = bitcast i32 %278 to float
  %280 = call i32 @llvm.SI.packf16(float %277, float %209)
  %281 = bitcast i32 %280 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %279, float %281, float %279, float %281)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c8140800
c8150801
c0840300
c0c60500
bf8c007f
f0800a00
00430605
bf8c0770
060a0d06
060a0af3
060c0f07
060c0cf3
100e0d06
d2820007
041e0b05
080e0ef2
d0060008
02010107
d2000007
00210107
7e105b07
10100f08
d2060007
22010107
d0080008
02020e80
d2000007
00221080
c82c0b00
c82d0b01
c8280a00
c8290a01
1010150a
d2820009
0422170b
c8200c00
c8210c01
d2820009
04261108
7e125b09
1016130b
101a0f0b
c83c1100
c83d1101
c8381000
c8391001
10161d0e
d282000c
042e1f0f
c82c1200
c82d1201
d282000c
0432170b
7e185b0c
101e190f
10220d0f
c84c0e00
c84d0e01
c8480d00
c8490d01
101e2512
d2820010
043e2713
c83c0f00
c83d0f01
d2820010
04421f0f
7e205b10
10262113
d2820013
04460b13
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000011
0021e6f2
d2820004
044e230d
1014130a
10140f0a
101a190e
101a0d0d
101c2112
d282000d
04360b0e
d282000a
0436230a
c8340400
c8350401
10241b0a
c8380500
c8390501
d2820012
044a1d04
10101308
100e0f08
1010190b
100c0d08
1010210f
d2820005
041a0b08
d2820005
04162307
c8180600
c8190601
d2820007
044a0d05
10100907
d2820004
04220907
081e090e
10081507
d2820004
04121507
081c090d
10080b07
d2820004
04120b07
08200906
7e220280
d28a0005
04421f0e
d28c0004
04421f0e
d28e0006
04421f0e
d2880007
04421f0e
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840304
c0c60508
bf8c007f
f0800f00
00430405
bf8c0770
101a0f05
c0840100
bf8c007f
c200092c
c200892d
bf8c007f
7e100201
d2820003
04200103
c200090d
bf8c007f
10120600
c200090c
bf8c007f
10100400
c0800308
c0c60510
bf8c007f
f0800f00
00030808
bf8c0770
100410ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0709
7e0602ff
3d93dd98
d2820002
040a070a
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e185502
101e1909
c8380100
c8390101
c2000920
c2008921
bf8c007f
7e040201
d2060002
02020400
c2000922
bf8c007f
06040400
c2000923
bf8c007f
06040400
c2000919
bf8c007f
06040400
d2060802
02010102
080604f2
d282000e
040e1d02
1020130e
c200091c
c200891d
bf8c007f
7e1c0201
d206000e
02021c00
c200091e
bf8c007f
061c1c00
c200091f
bf8c007f
061c1c00
c2000918
bf8c007f
061c1c00
d206080e
0201010e
101c170e
d282000f
04421f0e
10201ef6
c2000924
c2008925
bf8c007f
7e1e0201
d206000f
02021e00
c2000926
bf8c007f
061e1e00
c2000927
bf8c007f
061e1e00
c200091a
bf8c007f
061e1e00
d206080f
0201010f
d2820011
04421f0d
c8340700
c8350701
c2000913
bf8c007f
d00c0002
0200010d
7e200200
d200000d
000a1b10
d206080d
0201010d
08201af2
c2000911
bf8c007f
10242000
d2820011
044a230d
10240f04
10261908
c8500000
c8510001
d2820014
040e2902
10281114
d2820013
0452270e
102626f6
d2820012
044e1f12
c2000910
bf8c007f
10262000
d2820012
044e250d
5e222312
10080f06
100a190a
c8180200
c8190201
d2820006
040e0d02
100c1506
d2820005
041a0b0e
100a0af6
d2820004
04161f04
c2000912
bf8c007f
100a2000
d2820004
0416090d
c8140300
c8150301
d2820000
040e0b02
5e000104
f8001c0f
00110011
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL IN[7]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL CONST[0..10]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[8].xyzz, CONST[7].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MOV TEMP[4].w, IMM[0].yyyy
 10: MOV TEMP[4].xyz, IN[7].xyzx
 11: MUL TEMP[5], CONST[0], TEMP[1].xxxx
 12: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[5]
 13: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 14: ADD TEMP[0], TEMP[0], CONST[3]
 15: MOV TEMP[2].w, IMM[0].xxxx
 16: MOV TEMP[2].xyz, CONST[4].xyzx
 17: DP4 TEMP[5].x, TEMP[3], IN[0]
 18: DP4 TEMP[6].x, TEMP[3], IN[1]
 19: MOV TEMP[5].y, TEMP[6].xxxx
 20: DP4 TEMP[3].x, TEMP[3], IN[2]
 21: MOV TEMP[5].z, TEMP[3].xxxx
 22: MUL TEMP[3].xyz, TEMP[5].xyzz, CONST[6].wwww
 23: DP4 TEMP[5].x, TEMP[4], IN[0]
 24: DP4 TEMP[6].x, TEMP[4], IN[1]
 25: MOV TEMP[5].y, TEMP[6].xxxx
 26: DP4 TEMP[4].x, TEMP[4], IN[2]
 27: MOV TEMP[5].z, TEMP[4].xxxx
 28: MUL TEMP[4].xyz, TEMP[5].xyzz, CONST[6].wwww
 29: MUL TEMP[2], TEMP[2], IN[3]
 30: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[10].xyzz
 31: MAD TEMP[5].x, TEMP[0].zzzz, CONST[9].xxxx, CONST[9].yyyy
 32: MOV TEMP[1].w, TEMP[5].xxxx
 33: MAD TEMP[5].xy, IN[6].xyyy, CONST[5].xyyy, CONST[5].zwww
 34: MOV TEMP[5].zw, TEMP[3].yyxy
 35: MOV TEMP[6].x, TEMP[3].zzzz
 36: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 37: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 38: MOV TEMP[6].yzw, TEMP[3].yxyz
 39: MOV TEMP[3].xyz, TEMP[4].xyzx
 40: MOV OUT[1], TEMP[2]
 41: MOV OUT[3], TEMP[5]
 42: MOV OUT[5], TEMP[3]
 43: MOV OUT[4], TEMP[6]
 44: MOV OUT[2], TEMP[1]
 45: MOV OUT[0], TEMP[0]
 46: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %5)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = getelementptr <16 x i8> addrspace(2)* %3, i32 7
  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
  %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %5)
  %94 = extractelement <4 x float> %93, i32 0
  %95 = extractelement <4 x float> %93, i32 1
  %96 = extractelement <4 x float> %93, i32 2
  %97 = fmul float %77, %38
  %98 = fadd float %97, %35
  %99 = fmul float %78, %39
  %100 = fadd float %99, %36
  %101 = fmul float %79, %40
  %102 = fadd float %101, %37
  %103 = fmul float %98, %49
  %104 = fmul float %100, %50
  %105 = fadd float %103, %104
  %106 = fmul float %102, %51
  %107 = fadd float %105, %106
  %108 = fmul float 1.000000e+00, %52
  %109 = fadd float %107, %108
  %110 = fmul float %98, %56
  %111 = fmul float %100, %57
  %112 = fadd float %110, %111
  %113 = fmul float %102, %58
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %59
  %116 = fadd float %114, %115
  %117 = fmul float %98, %63
  %118 = fmul float %100, %64
  %119 = fadd float %117, %118
  %120 = fmul float %102, %65
  %121 = fadd float %119, %120
  %122 = fmul float 1.000000e+00, %66
  %123 = fadd float %121, %122
  %124 = fmul float %11, %109
  %125 = fmul float %12, %109
  %126 = fmul float %13, %109
  %127 = fmul float %14, %109
  %128 = fmul float %15, %116
  %129 = fadd float %128, %124
  %130 = fmul float %16, %116
  %131 = fadd float %130, %125
  %132 = fmul float %17, %116
  %133 = fadd float %132, %126
  %134 = fmul float %18, %116
  %135 = fadd float %134, %127
  %136 = fmul float %19, %123
  %137 = fadd float %136, %129
  %138 = fmul float %20, %123
  %139 = fadd float %138, %131
  %140 = fmul float %21, %123
  %141 = fadd float %140, %133
  %142 = fmul float %22, %123
  %143 = fadd float %142, %135
  %144 = fadd float %137, %23
  %145 = fadd float %139, %24
  %146 = fadd float %141, %25
  %147 = fadd float %143, %26
  %148 = fmul float %83, %49
  %149 = fmul float %84, %50
  %150 = fadd float %148, %149
  %151 = fmul float %85, %51
  %152 = fadd float %150, %151
  %153 = fmul float 0.000000e+00, %52
  %154 = fadd float %152, %153
  %155 = fmul float %83, %56
  %156 = fmul float %84, %57
  %157 = fadd float %155, %156
  %158 = fmul float %85, %58
  %159 = fadd float %157, %158
  %160 = fmul float 0.000000e+00, %59
  %161 = fadd float %159, %160
  %162 = fmul float %83, %63
  %163 = fmul float %84, %64
  %164 = fadd float %162, %163
  %165 = fmul float %85, %65
  %166 = fadd float %164, %165
  %167 = fmul float 0.000000e+00, %66
  %168 = fadd float %166, %167
  %169 = fmul float %154, %34
  %170 = fmul float %161, %34
  %171 = fmul float %168, %34
  %172 = fmul float %94, %49
  %173 = fmul float %95, %50
  %174 = fadd float %172, %173
  %175 = fmul float %96, %51
  %176 = fadd float %174, %175
  %177 = fmul float 0.000000e+00, %52
  %178 = fadd float %176, %177
  %179 = fmul float %94, %56
  %180 = fmul float %95, %57
  %181 = fadd float %179, %180
  %182 = fmul float %96, %58
  %183 = fadd float %181, %182
  %184 = fmul float 0.000000e+00, %59
  %185 = fadd float %183, %184
  %186 = fmul float %94, %63
  %187 = fmul float %95, %64
  %188 = fadd float %186, %187
  %189 = fmul float %96, %65
  %190 = fadd float %188, %189
  %191 = fmul float 0.000000e+00, %66
  %192 = fadd float %190, %191
  %193 = fmul float %178, %34
  %194 = fmul float %185, %34
  %195 = fmul float %192, %34
  %196 = fmul float %27, %70
  %197 = fmul float %28, %71
  %198 = fmul float %29, %72
  %199 = fmul float 1.000000e+00, %73
  %200 = fsub float -0.000000e+00, %43
  %201 = fadd float %109, %200
  %202 = fsub float -0.000000e+00, %44
  %203 = fadd float %116, %202
  %204 = fsub float -0.000000e+00, %45
  %205 = fadd float %123, %204
  %206 = fmul float %146, %41
  %207 = fadd float %206, %42
  %208 = fmul float %89, %30
  %209 = fadd float %208, %32
  %210 = fmul float %90, %31
  %211 = fadd float %210, %33
  %212 = fmul float %195, %170
  %213 = fmul float %193, %171
  %214 = fmul float %194, %169
  %215 = fsub float -0.000000e+00, %212
  %216 = fmul float %194, %171
  %217 = fadd float %216, %215
  %218 = fsub float -0.000000e+00, %213
  %219 = fmul float %195, %169
  %220 = fadd float %219, %218
  %221 = fsub float -0.000000e+00, %214
  %222 = fmul float %193, %170
  %223 = fadd float %222, %221
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %196, float %197, float %198, float %199)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %201, float %203, float %205, float %207)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %209, float %211, float %169, float %170)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %171, float %217, float %220, float %223)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %193, float %194, float %195, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %144, float %145, float %146, float %147)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020900
c2020121
c202811d
bf8c0070
7e020205
d2820003
0404090a
c0840704
bf8c007f
e00c2000
80020500
bf8c0770
10020d03
c2020120
c202811c
bf8c007f
7e040205
d2820004
04080909
d2820001
04060b04
c2020122
c202811e
bf8c007f
7e040205
d2820011
0408090b
d2820001
04060f11
06021101
c0840700
bf8c007f
e00c2000
80020900
bf8c0770
10041503
d2820002
040a1304
d2820002
040a1711
06041902
c2020102
bf8c007f
101a0404
c2020106
bf8c007f
d2820012
04360204
c0840708
bf8c007f
e00c2000
80020d00
bf8c0770
10061d03
d2820003
040e1b04
d2820003
040e1f11
06062103
c202010a
bf8c007f
d2820004
044a0604
c202010e
bf8c007f
06080804
c2020124
c2028125
bf8c007f
7e220205
d2820011
04440904
c202012a
bf8c007f
0a240604
c2020129
bf8c007f
0a260204
c2020128
bf8c007f
0a280404
f800021f
11121314
c0840714
bf8c000f
e00c2000
80021200
bf8c0770
10220d13
d2820011
04460b12
d2820011
04460f14
d2820011
04450108
c202011b
bf8c007f
10222204
102c1513
d2820016
045a1312
d2820016
045a1714
d2820016
0459010c
102c2c04
c0840718
bf8c007f
e00c2000
80021700
c2028115
c2040117
bf8c0070
7e360208
d282001b
046c0b18
c2028114
c2040116
bf8c007f
7e380208
d2820017
04700b17
f800022f
11161b17
c084071c
bf8c000f
e00c2000
80021700
bf8c0770
10000d18
d2820000
04020b17
d2820000
04020f19
d2820000
04010108
10000004
100c2d00
100a1518
d2820005
04161317
d2820005
04161719
d2820005
0415010c
100a0a04
100e2305
080c0d07
100e1d13
d2820007
041e1b12
d2820007
041e1f14
d2820007
041d0110
100e0e04
10120f05
10101d18
d2820008
04221b17
d2820008
04221f19
d2820008
04210110
10101004
10142d08
0812130a
10142308
10160f00
0814150b
f800023f
06090a07
bf8c070f
7e0c0280
f800024f
06080005
c2020103
bf8c000f
10000404
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[4..5]
DCL CONST[2..3]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[5].xxxx, CONST[5].yyyy
  2: MOV TEMP[1].xy, IN[2].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MUL TEMP[1], IN[1], TEMP[1]
  5: MAX TEMP[2].x, IN[3].zzzz, CONST[3].wwww
  6: MOV_SAT TEMP[2].x, TEMP[2].xxxx
  7: MUL TEMP[1], TEMP[1], TEMP[2].xxxx
  8: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[2].xy, TEMP[2].xyyy
 10: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
 11: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].zzzz, CONST[4].wwww
 12: RCP TEMP[2].x, TEMP[2].xxxx
 13: ADD TEMP[2].x, TEMP[2].xxxx, -IN[3].xxxx
 14: MUL_SAT TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy
 15: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx
 16: MOV TEMP[1].w, TEMP[2].xxxx
 17: MOV OUT[0], TEMP[1]
 18: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %29 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %30 = load <32 x i8> addrspace(2)* %29, !tbaa !0
  %31 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %34 = load <32 x i8> addrspace(2)* %33, !tbaa !0
  %35 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %36 = load <16 x i8> addrspace(2)* %35, !tbaa !0
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %46 = fmul float %13, %27
  %47 = fadd float %46, %28
  %48 = bitcast float %41 to i32
  %49 = bitcast float %42 to i32
  %50 = insertelement <2 x i32> undef, i32 %48, i32 0
  %51 = insertelement <2 x i32> %50, i32 %49, i32 1
  %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %30, <16 x i8> %32, i32 2)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = extractelement <4 x float> %52, i32 1
  %55 = extractelement <4 x float> %52, i32 2
  %56 = extractelement <4 x float> %52, i32 3
  %57 = fmul float %37, %53
  %58 = fmul float %38, %54
  %59 = fmul float %39, %55
  %60 = fmul float %40, %56
  %61 = fcmp uge float %45, %24
  %62 = select i1 %61, float %45, float %24
  %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
  %64 = fmul float %57, %63
  %65 = fmul float %58, %63
  %66 = fmul float %59, %63
  %67 = fmul float %60, %63
  %68 = fmul float %12, %22
  %69 = fmul float %47, %23
  %70 = bitcast float %68 to i32
  %71 = bitcast float %69 to i32
  %72 = insertelement <2 x i32> undef, i32 %70, i32 0
  %73 = insertelement <2 x i32> %72, i32 %71, i32 1
  %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %34, <16 x i8> %36, i32 2)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = fmul float %75, %25
  %77 = fadd float %76, %26
  %78 = fdiv float 1.000000e+00, %77
  %79 = fsub float -0.000000e+00, %43
  %80 = fadd float %78, %79
  %81 = fmul float %80, %44
  %82 = call float @llvm.AMDIL.clamp.(float %81, float 0.000000e+00, float 1.000000e+00)
  %83 = fmul float %67, %82
  %84 = call i32 @llvm.SI.packf16(float %64, float %65)
  %85 = bitcast i32 %84 to float
  %86 = call i32 @llvm.SI.packf16(float %66, float %83)
  %87 = bitcast i32 %86 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %85, float %87, float %85, float %87)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140500
c8150501
c8100400
c8110401
c0840300
c0c60500
bf8c007f
f0800f00
00430504
c8100100
c8110101
bf8c0770
10080d04
c8240a00
c8250a01
c0840100
bf8c007f
c200090f
bf8c007f
d00c000c
02000109
7e140200
d2000009
0032130a
d206080a
02010109
10081504
c8240000
c8250001
10120b09
10121509
5e080909
c8240200
c8250201
10120f09
10121509
c82c0300
c82d0301
100a110b
100a1505
c8180800
c8190801
c2000914
c2008915
bf8c007f
7e0e0201
d2820003
041c0103
c2000909
bf8c007f
10100600
c2000908
bf8c007f
100e0400
c0800304
c0c60508
bf8c007f
f0800100
00030207
c2000912
c2008913
bf8c0070
7e060201
d2820002
040c0102
7e045502
08040d02
c80c0900
c80d0901
10000702
d2060800
02010100
10000105
5e000109
f8001c0f
00040004
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..24]
DCL TEMP[0..5], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     6.2820,    -1.0000,     0.0000}
IMM[1] INT32 {0, 1, 2, 3}
  0: SGE TEMP[0].x, IN[0].wwww, CONST[11].yyyy
  1: F2I TEMP[0].x, -TEMP[0]
  2: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
  3: SGE TEMP[1].x, IN[0].wwww, CONST[11].zzzz
  4: F2I TEMP[1].x, -TEMP[1]
  5: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx
  6: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
  7: F2I TEMP[0].x, TEMP[0].xxxx
  8: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
  9: I2F TEMP[2].y, TEMP[1].yyyy
 10: CMP TEMP[2].x, TEMP[2].yyyy, CONST[20].yyyy, CONST[20].xxxx
 11: I2F TEMP[3].z, TEMP[1].zzzz
 12: CMP TEMP[2].x, TEMP[3].zzzz, CONST[20].zzzz, TEMP[2].xxxx
 13: I2F TEMP[1].w, TEMP[1].wwww
 14: CMP TEMP[2].x, TEMP[1].wwww, CONST[20].wwww, TEMP[2].xxxx
 15: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
 16: I2F TEMP[4].y, TEMP[1].yyyy
 17: CMP TEMP[3].x, TEMP[4].yyyy, CONST[11].yyyy, CONST[11].xxxx
 18: I2F TEMP[4].z, TEMP[1].zzzz
 19: CMP TEMP[3].x, TEMP[4].zzzz, CONST[11].zzzz, TEMP[3].xxxx
 20: I2F TEMP[1].w, TEMP[1].wwww
 21: CMP TEMP[3].x, TEMP[1].wwww, CONST[11].wwww, TEMP[3].xxxx
 22: ADD TEMP[1].x, IN[0].wwww, -TEMP[3].xxxx
 23: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 24: USEQ TEMP[2].yzw, TEMP[0].xxxx, IMM[1]
 25: I2F TEMP[4].y, TEMP[2].yyyy
 26: CMP TEMP[3].x, TEMP[4].yyyy, CONST[16].yyyy, CONST[16].xxxx
 27: I2F TEMP[4].z, TEMP[2].zzzz
 28: CMP TEMP[3].x, TEMP[4].zzzz, CONST[16].zzzz, TEMP[3].xxxx
 29: I2F TEMP[2].w, TEMP[2].wwww
 30: CMP TEMP[3].x, TEMP[2].wwww, CONST[16].wwww, TEMP[3].xxxx
 31: UADD TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy
 32: USEQ TEMP[2].yzw, TEMP[2].xxxx, IMM[1]
 33: I2F TEMP[5].y, TEMP[2].yyyy
 34: CMP TEMP[4].x, TEMP[5].yyyy, CONST[16].yyyy, CONST[16].xxxx
 35: I2F TEMP[5].z, TEMP[2].zzzz
 36: CMP TEMP[4].x, TEMP[5].zzzz, CONST[16].zzzz, TEMP[4].xxxx
 37: I2F TEMP[2].w, TEMP[2].wwww
 38: CMP TEMP[4].x, TEMP[2].wwww, CONST[16].wwww, TEMP[4].xxxx
 39: ADD TEMP[2].xy, IN[1].xyyy, -CONST[17].xyyy
 40: LRP TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx
 41: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xxxx
 42: ADD TEMP[3].x, IMM[0].xxxx, -IN[2].xxxx
 43: MUL TEMP[3].xy, CONST[18].xyyy, TEMP[3].xxxx
 44: MAD TEMP[3].xy, CONST[18].zwww, IN[2].xxxx, TEMP[3].xyyy
 45: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy
 46: LRP TEMP[3].x, IN[2].zzzz, CONST[17].wwww, CONST[17].zzzz
 47: MUL TEMP[4].x, IMM[0].yyyy, IN[2].yyyy
 48: MAD TEMP[3].x, TEMP[3].xxxx, IN[0].wwww, TEMP[4].xxxx
 49: SIN TEMP[4].x, TEMP[3].xxxx
 50: COS TEMP[3].x, TEMP[3].xxxx
 51: MOV TEMP[4].y, TEMP[3].xxxx
 52: MUL TEMP[5].xy, TEMP[4].xyyy, IMM[0].zzzz
 53: MOV TEMP[4].z, TEMP[5].yyxy
 54: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[3].xxxx
 55: MAD TEMP[2].xy, TEMP[2].yxxx, TEMP[4].xzzz, TEMP[3].xyyy
 56: MUL TEMP[3].xyz, CONST[21].xyzz, TEMP[2].xxxx
 57: MAD TEMP[2].xyz, CONST[22].xyzz, TEMP[2].yyyy, TEMP[3].xyzz
 58: ADD TEMP[2].xyz, IN[0].xyzz, TEMP[2].xyzz
 59: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 60: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 61: MAD TEMP[3], CONST[2], TEMP[2].zzzz, TEMP[3]
 62: ADD TEMP[3], TEMP[3], CONST[3]
 63: UADD TEMP[4].x, TEMP[0].xxxx, IMM[1].yyyy
 64: UADD TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz
 65: UARL ADDR[0].x, TEMP[4].xxxx
 66: UARL ADDR[0].x, TEMP[4].xxxx
 67: MOV TEMP[4], CONST[ADDR[0].x+11]
 68: UARL ADDR[0].x, TEMP[0].xxxx
 69: LRP TEMP[0], TEMP[1].xxxx, CONST[ADDR[0].x+11], TEMP[4]
 70: MUL TEMP[0], TEMP[0], CONST[9]
 71: MUL TEMP[1], CONST[4], TEMP[2].xxxx
 72: MAD TEMP[1], CONST[5], TEMP[2].yyyy, TEMP[1]
 73: MAD TEMP[1], CONST[6], TEMP[2].zzzz, TEMP[1]
 74: ADD TEMP[1].z, TEMP[1], CONST[7]
 75: MAD TEMP[1].x, TEMP[1].zzzz, CONST[8].xxxx, CONST[8].yyyy
 76: MOV TEMP[1].y, CONST[8].zzzz
 77: MAD TEMP[2].x, TEMP[3].zzzz, CONST[10].xxxx, CONST[10].yyyy
 78: MOV TEMP[2].z, TEMP[2].xxxx
 79: MOV TEMP[2].xy, TEMP[1].xyxx
 80: MOV OUT[2], IN[1].xyxy
 81: MOV OUT[3], TEMP[2]
 82: MOV OUT[1], TEMP[0]
 83: MOV OUT[0], TEMP[3]
 84: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 268)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 280)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 284)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 288)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 292)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 296)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 300)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 320)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 324)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 328)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 332)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 336)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 340)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 344)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 352)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 356)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 360)
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = extractelement <4 x float> %68, i32 2
  %72 = extractelement <4 x float> %68, i32 3
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = fcmp uge float %72, %41
  %85 = select i1 %84, float 1.000000e+00, float 0.000000e+00
  %86 = fsub float -0.000000e+00, %85
  %87 = fptosi float %86 to i32
  %88 = bitcast i32 %87 to float
  %89 = bitcast float %88 to i32
  %90 = and i32 %89, 1065353216
  %91 = bitcast i32 %90 to float
  %92 = fcmp uge float %72, %42
  %93 = select i1 %92, float 1.000000e+00, float 0.000000e+00
  %94 = fsub float -0.000000e+00, %93
  %95 = fptosi float %94 to i32
  %96 = bitcast i32 %95 to float
  %97 = bitcast float %96 to i32
  %98 = and i32 %97, 1065353216
  %99 = bitcast i32 %98 to float
  %100 = fadd float %91, %99
  %101 = fptosi float %100 to i32
  %102 = bitcast i32 %101 to float
  %103 = bitcast float %102 to i32
  %104 = icmp eq i32 %103, 1
  %105 = sext i1 %104 to i32
  %106 = bitcast float %102 to i32
  %107 = icmp eq i32 %106, 2
  %108 = sext i1 %107 to i32
  %109 = bitcast float %102 to i32
  %110 = icmp eq i32 %109, 3
  %111 = sext i1 %110 to i32
  %112 = bitcast i32 %105 to float
  %113 = bitcast i32 %108 to float
  %114 = bitcast i32 %111 to float
  %115 = bitcast float %112 to i32
  %116 = sitofp i32 %115 to float
  %117 = call float @llvm.AMDGPU.cndlt(float %116, float %57, float %56)
  %118 = bitcast float %113 to i32
  %119 = sitofp i32 %118 to float
  %120 = call float @llvm.AMDGPU.cndlt(float %119, float %58, float %117)
  %121 = bitcast float %114 to i32
  %122 = sitofp i32 %121 to float
  %123 = call float @llvm.AMDGPU.cndlt(float %122, float %59, float %120)
  %124 = bitcast float %102 to i32
  %125 = icmp eq i32 %124, 1
  %126 = sext i1 %125 to i32
  %127 = bitcast float %102 to i32
  %128 = icmp eq i32 %127, 2
  %129 = sext i1 %128 to i32
  %130 = bitcast float %102 to i32
  %131 = icmp eq i32 %130, 3
  %132 = sext i1 %131 to i32
  %133 = bitcast i32 %126 to float
  %134 = bitcast i32 %129 to float
  %135 = bitcast i32 %132 to float
  %136 = bitcast float %133 to i32
  %137 = sitofp i32 %136 to float
  %138 = call float @llvm.AMDGPU.cndlt(float %137, float %41, float %40)
  %139 = bitcast float %134 to i32
  %140 = sitofp i32 %139 to float
  %141 = call float @llvm.AMDGPU.cndlt(float %140, float %42, float %138)
  %142 = bitcast float %135 to i32
  %143 = sitofp i32 %142 to float
  %144 = call float @llvm.AMDGPU.cndlt(float %143, float %43, float %141)
  %145 = fsub float -0.000000e+00, %144
  %146 = fadd float %72, %145
  %147 = fmul float %146, %123
  %148 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00)
  %149 = bitcast float %102 to i32
  %150 = icmp eq i32 %149, 1
  %151 = sext i1 %150 to i32
  %152 = bitcast float %102 to i32
  %153 = icmp eq i32 %152, 2
  %154 = sext i1 %153 to i32
  %155 = bitcast float %102 to i32
  %156 = icmp eq i32 %155, 3
  %157 = sext i1 %156 to i32
  %158 = bitcast i32 %151 to float
  %159 = bitcast i32 %154 to float
  %160 = bitcast i32 %157 to float
  %161 = bitcast float %158 to i32
  %162 = sitofp i32 %161 to float
  %163 = call float @llvm.AMDGPU.cndlt(float %162, float %45, float %44)
  %164 = bitcast float %159 to i32
  %165 = sitofp i32 %164 to float
  %166 = call float @llvm.AMDGPU.cndlt(float %165, float %46, float %163)
  %167 = bitcast float %160 to i32
  %168 = sitofp i32 %167 to float
  %169 = call float @llvm.AMDGPU.cndlt(float %168, float %47, float %166)
  %170 = bitcast float %102 to i32
  %171 = add i32 %170, 1
  %172 = bitcast i32 %171 to float
  %173 = bitcast float %172 to i32
  %174 = icmp eq i32 %173, 1
  %175 = sext i1 %174 to i32
  %176 = bitcast float %172 to i32
  %177 = icmp eq i32 %176, 2
  %178 = sext i1 %177 to i32
  %179 = bitcast float %172 to i32
  %180 = icmp eq i32 %179, 3
  %181 = sext i1 %180 to i32
  %182 = bitcast i32 %175 to float
  %183 = bitcast i32 %178 to float
  %184 = bitcast i32 %181 to float
  %185 = bitcast float %182 to i32
  %186 = sitofp i32 %185 to float
  %187 = call float @llvm.AMDGPU.cndlt(float %186, float %45, float %44)
  %188 = bitcast float %183 to i32
  %189 = sitofp i32 %188 to float
  %190 = call float @llvm.AMDGPU.cndlt(float %189, float %46, float %187)
  %191 = bitcast float %184 to i32
  %192 = sitofp i32 %191 to float
  %193 = call float @llvm.AMDGPU.cndlt(float %192, float %47, float %190)
  %194 = fsub float -0.000000e+00, %48
  %195 = fadd float %76, %194
  %196 = fsub float -0.000000e+00, %49
  %197 = fadd float %77, %196
  %198 = call float @llvm.AMDGPU.lrp(float %148, float %193, float %169)
  %199 = fmul float %195, %198
  %200 = fmul float %197, %198
  %201 = fsub float -0.000000e+00, %81
  %202 = fadd float 1.000000e+00, %201
  %203 = fmul float %52, %202
  %204 = fmul float %53, %202
  %205 = fmul float %54, %81
  %206 = fadd float %205, %203
  %207 = fmul float %55, %81
  %208 = fadd float %207, %204
  %209 = fmul float %199, %206
  %210 = fmul float %200, %208
  %211 = call float @llvm.AMDGPU.lrp(float %83, float %51, float %50)
  %212 = fmul float 0x401920C4A0000000, %82
  %213 = fmul float %211, %72
  %214 = fadd float %213, %212
  %215 = call float @llvm.sin.f32(float %214)
  %216 = call float @llvm.cos.f32(float %214)
  %217 = fmul float %215, -1.000000e+00
  %218 = fmul float %209, %216
  %219 = fmul float %210, %216
  %220 = fmul float %210, %215
  %221 = fadd float %220, %218
  %222 = fmul float %209, %217
  %223 = fadd float %222, %219
  %224 = fmul float %60, %221
  %225 = fmul float %61, %221
  %226 = fmul float %62, %221
  %227 = fmul float %63, %223
  %228 = fadd float %227, %224
  %229 = fmul float %64, %223
  %230 = fadd float %229, %225
  %231 = fmul float %65, %223
  %232 = fadd float %231, %226
  %233 = fadd float %69, %228
  %234 = fadd float %70, %230
  %235 = fadd float %71, %232
  %236 = fmul float %11, %233
  %237 = fmul float %12, %233
  %238 = fmul float %13, %233
  %239 = fmul float %14, %233
  %240 = fmul float %15, %234
  %241 = fadd float %240, %236
  %242 = fmul float %16, %234
  %243 = fadd float %242, %237
  %244 = fmul float %17, %234
  %245 = fadd float %244, %238
  %246 = fmul float %18, %234
  %247 = fadd float %246, %239
  %248 = fmul float %19, %235
  %249 = fadd float %248, %241
  %250 = fmul float %20, %235
  %251 = fadd float %250, %243
  %252 = fmul float %21, %235
  %253 = fadd float %252, %245
  %254 = fmul float %22, %235
  %255 = fadd float %254, %247
  %256 = fadd float %249, %23
  %257 = fadd float %251, %24
  %258 = fadd float %253, %25
  %259 = fadd float %255, %26
  %260 = bitcast float %102 to i32
  %261 = add i32 %260, 1
  %262 = bitcast i32 %261 to float
  %263 = bitcast float %102 to i32
  %264 = add i32 %263, 2
  %265 = bitcast i32 %264 to float
  %266 = bitcast float %262 to i32
  %267 = shl i32 %266, 4
  %268 = add i32 %267, 176
  %269 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %268)
  %270 = shl i32 %266, 4
  %271 = add i32 %270, 180
  %272 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %271)
  %273 = shl i32 %266, 4
  %274 = add i32 %273, 184
  %275 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %274)
  %276 = shl i32 %266, 4
  %277 = add i32 %276, 188
  %278 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %277)
  %279 = bitcast float %265 to i32
  %280 = shl i32 %279, 4
  %281 = add i32 %280, 176
  %282 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %281)
  %283 = call float @llvm.AMDGPU.lrp(float %148, float %282, float %269)
  %284 = shl i32 %279, 4
  %285 = add i32 %284, 180
  %286 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %285)
  %287 = call float @llvm.AMDGPU.lrp(float %148, float %286, float %272)
  %288 = shl i32 %279, 4
  %289 = add i32 %288, 184
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = call float @llvm.AMDGPU.lrp(float %148, float %290, float %275)
  %292 = shl i32 %279, 4
  %293 = add i32 %292, 188
  %294 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %293)
  %295 = call float @llvm.AMDGPU.lrp(float %148, float %294, float %278)
  %296 = fmul float %283, %34
  %297 = fmul float %287, %35
  %298 = fmul float %291, %36
  %299 = fmul float %295, %37
  %300 = fmul float %27, %233
  %301 = fmul float %28, %234
  %302 = fadd float %301, %300
  %303 = fmul float %29, %235
  %304 = fadd float %303, %302
  %305 = fadd float %304, %30
  %306 = fmul float %305, %31
  %307 = fadd float %306, %32
  %308 = fmul float %258, %38
  %309 = fadd float %308, %39
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %296, float %297, float %298, float %299)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %76, float %77, float %76, float %77)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %307, float %33, float %309, float %192)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %256, float %257, float %258, float %259)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.sin.f32(float) #3

; Function Attrs: nounwind readonly
declare float @llvm.cos.f32(float) #3

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840700
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c206012e
bf8c007f
d00c0004
02001904
d2000005
0011e480
d2060005
22010105
7e0a1105
360a0af2
c206812d
bf8c007f
d00c0004
02001b04
d2000006
0011e480
d2060006
22010106
7e0c1106
360c0cf2
060a0b06
7e0e1105
d1040004
02010307
d2000005
00118280
7e0a0b05
d0080004
02020a80
c2040151
bf8c007f
7e0a0208
c2040150
bf8c007f
7e0c0208
d2000005
00120b06
d1040008
02010507
d2000006
00218280
7e0c0b06
d0080008
02020c80
c2050152
bf8c007f
7e0c020a
d2000005
00220d05
d104000a
02010707
d2000006
00298280
7e0c0b06
d008000a
02020c80
c2070153
bf8c007f
7e0c020e
d2000005
002a0d05
7e0c020d
c206812c
bf8c007f
7e10020d
d2000006
00120d08
7e10020c
d2000006
00221106
c206012f
bf8c007f
7e10020c
d2000006
002a1106
080c0d04
100a0b06
d2060806
02010105
08160cf2
4a0a0e81
34100a84
4a1210ff
000000bc
e0301000
80000909
bf8c0770
1012130b
4a0e0e82
340e0e84
4a140eff
000000bc
e0301000
80000a0a
bf8c0770
d2820009
04261506
c2060127
bf8c007f
1012120c
4a1410ff
000000b8
e0301000
80000a0a
bf8c0770
1014150b
4a180eff
000000b8
e0301000
80000c0c
bf8c0770
d282000a
042a1906
c2060126
bf8c007f
1014140c
4a1810ff
000000b4
e0301000
80000c0c
bf8c0770
1018190b
4a1a0eff
000000b4
e0301000
80000d0d
bf8c0770
d282000c
04321b06
c2060125
bf8c007f
1018180c
4a1010ff
000000b0
e0301000
80000808
bf8c0770
1010110b
4a0e0eff
000000b0
e0301000
80000707
bf8c0770
d2820007
04220f06
c2060124
bf8c007f
100e0e0c
f800020f
090a0c07
c0860704
bf8c000f
e00c2000
80030700
bf8c0770
f800021f
08070807
c2060141
bf8c000f
7e1a020c
c2060140
bf8c007f
7e1e020c
d200000c
00121b0f
c2020142
bf8c007f
7e1c0204
d2000010
00221d0c
c2020143
bf8c007f
7e180204
d2000010
002a1910
1016210b
d1040004
02010305
d2000010
00118280
7e200b10
d0080004
02022080
d200000d
00121b0f
d1040004
02010505
d200000f
00118280
7e1e0b0f
d0080004
02021e80
d200000d
00121d0d
d1040004
02010705
d2000005
00118280
7e0a0b05
d0080004
02020a80
d200000c
0012190d
d2820006
042e1906
c2020144
bf8c007f
0a160e04
10200d0b
c0820708
bf8c007f
e00c2000
80010b00
bf8c0770
081e16f2
c2020148
bf8c007f
10001e04
c202014a
bf8c007f
d2820000
04021604
10000110
08201af2
c2020146
bf8c007f
10202004
c2020147
bf8c007f
d2820010
0440090d
102218ff
40c90625
d2820010
04460910
102220ff
3e22f983
7e206b11
10242100
c2020145
bf8c007f
0a0e1004
100c0d07
c2020149
bf8c007f
100e1e04
c202014b
bf8c007f
d2820007
041e1604
100c0f06
7e106d11
100e1106
080e2507
10001100
d2820008
04022106
c2020155
bf8c007f
10001004
c2020159
bf8c007f
d2820000
04020e04
06000102
c2020154
bf8c007f
100c1004
c2020158
bf8c007f
d2820006
041a0e04
060c0d01
c2020102
bf8c007f
10120c04
c2020106
bf8c007f
d2820009
04260004
c2020156
bf8c007f
10101004
c202015a
bf8c007f
d2820007
04220e04
06020f03
c202010a
bf8c007f
d2820002
04260204
c202010e
bf8c007f
06040404
c2020128
c2028129
bf8c007f
7e060205
d2820003
040c0902
c2020112
bf8c007f
10080c04
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120204
c202011e
bf8c007f
06080804
c2020120
c2028121
bf8c007f
7e0e0205
d2820004
041c0904
c2020122
bf8c007f
7e0e0204
f800022f
05030704
c2020103
bf8c000f
10060c04
c2020107
bf8c007f
d2820003
040e0004
c202010b
bf8c007f
d2820003
040e0204
c202010f
bf8c007f
06060604
c2020101
bf8c007f
10080c04
c2020105
bf8c007f
d2820004
04120004
c2020109
bf8c007f
d2820004
04120204
c202010d
bf8c007f
06080804
c2020100
bf8c007f
100a0c04
c2020104
bf8c007f
d2820000
04160004
c2020108
bf8c007f
d2820000
04020204
c200010c
bf8c007f
06000000
f80008cf
03020400
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[10..11]
DCL CONST[3..9]
DCL TEMP[0..1]
DCL TEMP[2..6], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0010}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[4].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[2].xyzz
 14: RSQ TEMP[6].x, TEMP[6].xxxx
 15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 16: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 17: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 18: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 19: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 20: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 21: MOV TEMP[2].xyz, TEMP[2].xyzz
 22: TEX TEMP[2], TEMP[2], SAMP[1], CUBE
 23: DP4 TEMP[3].x, TEMP[4], CONST[9]
 24: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[6].zzzz
 25: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 26: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[4].xyzz
 27: DP4 TEMP[2].x, TEMP[4], CONST[8]
 28: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].yyyy
 29: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 30: MUL TEMP[2], TEMP[5], TEMP[2]
 31: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[3].xyyy
 32: MOV TEMP[3].xy, TEMP[3].xyyy
 33: TEX TEMP[3], TEMP[3], SAMP[2], 2D
 34: DP4 TEMP[4].x, TEMP[4], CONST[7]
 35: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[6].xxxx
 36: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 37: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 38: MAX TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 39: RCP TEMP[5].x, TEMP[5].xxxx
 40: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 41: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 42: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 43: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[1].wwww
 44: MAX TEMP[3].x, IN[3].wwww, CONST[4].wwww
 45: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 46: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[4].xyzz
 47: MOV OUT[0], TEMP[2]
 48: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = fcmp ugt float %16, 0.000000e+00
  %58 = select i1 %57, float 1.000000e+00, float 0.000000e+00
  %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %66 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %67 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %68 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %72 = fmul float %13, %43
  %73 = fadd float %72, %44
  %74 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %75 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %76 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %77 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %78 = bitcast float %74 to i32
  %79 = icmp ne i32 %78, 0
  %. = select i1 %79, float -1.000000e+00, float 1.000000e+00
  %80 = bitcast float %67 to i32
  %81 = bitcast float %68 to i32
  %82 = insertelement <2 x i32> undef, i32 %80, i32 0
  %83 = insertelement <2 x i32> %82, i32 %81, i32 1
  %84 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %83, <32 x i8> %46, <16 x i8> %48, i32 2)
  %85 = extractelement <4 x float> %84, i32 0
  %86 = extractelement <4 x float> %84, i32 1
  %87 = extractelement <4 x float> %84, i32 2
  %88 = extractelement <4 x float> %84, i32 3
  %89 = fmul float %69, %69
  %90 = fmul float %70, %70
  %91 = fadd float %90, %89
  %92 = fmul float %71, %71
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %69, %94
  %96 = fmul float %70, %94
  %97 = fmul float %71, %94
  %98 = fmul float %95, %.
  %99 = fmul float %96, %.
  %100 = fmul float %97, %.
  %101 = fmul float %98, %63
  %102 = fmul float %99, %64
  %103 = fadd float %102, %101
  %104 = fmul float %100, %65
  %105 = fadd float %103, %104
  %106 = fmul float %105, %98
  %107 = fmul float %105, %99
  %108 = fmul float %105, %100
  %109 = fmul float 2.000000e+00, %106
  %110 = fmul float 2.000000e+00, %107
  %111 = fmul float 2.000000e+00, %108
  %112 = fsub float -0.000000e+00, %109
  %113 = fadd float %63, %112
  %114 = fsub float -0.000000e+00, %110
  %115 = fadd float %64, %114
  %116 = fsub float -0.000000e+00, %111
  %117 = fadd float %65, %116
  %118 = insertelement <4 x float> undef, float %113, i32 0
  %119 = insertelement <4 x float> %118, float %115, i32 1
  %120 = insertelement <4 x float> %119, float %117, i32 2
  %121 = insertelement <4 x float> %120, float 0.000000e+00, i32 3
  %122 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %121)
  %123 = extractelement <4 x float> %122, i32 0
  %124 = extractelement <4 x float> %122, i32 1
  %125 = extractelement <4 x float> %122, i32 2
  %126 = extractelement <4 x float> %122, i32 3
  %127 = call float @fabs(float %125)
  %128 = fdiv float 1.000000e+00, %127
  %129 = fmul float %123, %128
  %130 = fadd float %129, 1.500000e+00
  %131 = fmul float %124, %128
  %132 = fadd float %131, 1.500000e+00
  %133 = bitcast float %132 to i32
  %134 = bitcast float %130 to i32
  %135 = bitcast float %126 to i32
  %136 = insertelement <4 x i32> undef, i32 %133, i32 0
  %137 = insertelement <4 x i32> %136, i32 %134, i32 1
  %138 = insertelement <4 x i32> %137, i32 %135, i32 2
  %139 = insertelement <4 x i32> %138, i32 undef, i32 3
  %140 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %139, <32 x i8> %50, <16 x i8> %52, i32 4)
  %141 = extractelement <4 x float> %140, i32 0
  %142 = extractelement <4 x float> %140, i32 1
  %143 = extractelement <4 x float> %140, i32 2
  %144 = extractelement <4 x float> %140, i32 3
  %145 = fmul float %85, %39
  %146 = fmul float %86, %40
  %147 = fadd float %145, %146
  %148 = fmul float %87, %41
  %149 = fadd float %147, %148
  %150 = fmul float %88, %42
  %151 = fadd float %149, %150
  %152 = fadd float %151, %30
  %153 = call float @llvm.AMDIL.clamp.(float %152, float 0.000000e+00, float 1.000000e+00)
  %154 = fmul float %144, %153
  %155 = call float @llvm.AMDGPU.lrp(float %154, float %141, float %85)
  %156 = call float @llvm.AMDGPU.lrp(float %154, float %142, float %86)
  %157 = call float @llvm.AMDGPU.lrp(float %154, float %143, float %87)
  %158 = fmul float %85, %35
  %159 = fmul float %86, %36
  %160 = fadd float %158, %159
  %161 = fmul float %87, %37
  %162 = fadd float %160, %161
  %163 = fmul float %88, %38
  %164 = fadd float %162, %163
  %165 = fadd float %164, %29
  %166 = call float @llvm.AMDIL.clamp.(float %165, float 0.000000e+00, float 1.000000e+00)
  %167 = call float @llvm.AMDGPU.lrp(float %166, float %59, float 1.000000e+00)
  %168 = call float @llvm.AMDGPU.lrp(float %166, float %60, float 1.000000e+00)
  %169 = call float @llvm.AMDGPU.lrp(float %166, float %61, float 1.000000e+00)
  %170 = call float @llvm.AMDGPU.lrp(float %166, float %62, float 1.000000e+00)
  %171 = fmul float %155, %167
  %172 = fmul float %156, %168
  %173 = fmul float %157, %169
  %174 = fmul float %88, %170
  %175 = fmul float %12, %22
  %176 = fmul float %73, %23
  %177 = bitcast float %175 to i32
  %178 = bitcast float %176 to i32
  %179 = insertelement <2 x i32> undef, i32 %177, i32 0
  %180 = insertelement <2 x i32> %179, i32 %178, i32 1
  %181 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %180, <32 x i8> %54, <16 x i8> %56, i32 2)
  %182 = extractelement <4 x float> %181, i32 0
  %183 = extractelement <4 x float> %181, i32 1
  %184 = extractelement <4 x float> %181, i32 2
  %185 = extractelement <4 x float> %181, i32 3
  %186 = fmul float %85, %31
  %187 = fmul float %86, %32
  %188 = fadd float %186, %187
  %189 = fmul float %87, %33
  %190 = fadd float %188, %189
  %191 = fmul float %88, %34
  %192 = fadd float %190, %191
  %193 = fadd float %192, %28
  %194 = call float @llvm.AMDIL.clamp.(float %193, float 0.000000e+00, float 1.000000e+00)
  %195 = fmul float %194, %185
  %196 = fmul float %182, 0x3FCB367A00000000
  %197 = fmul float %183, 0x3FE6E2EB20000000
  %198 = fadd float %197, %196
  %199 = fmul float %184, 0x3FB27BB300000000
  %200 = fadd float %198, %199
  %201 = fcmp uge float %200, 0x3F50624DE0000000
  %202 = select i1 %201, float %200, float 0x3F50624DE0000000
  %203 = fdiv float 1.000000e+00, %202
  %204 = fmul float %182, %203
  %205 = fmul float %183, %203
  %206 = fmul float %184, %203
  %207 = fmul float %171, %182
  %208 = fmul float %172, %183
  %209 = fmul float %173, %184
  %210 = fmul float %195, %204
  %211 = fadd float %210, %207
  %212 = fmul float %195, %205
  %213 = fadd float %212, %208
  %214 = fmul float %195, %206
  %215 = fadd float %214, %209
  %216 = fmul float %211, 4.000000e+00
  %217 = fmul float %213, 4.000000e+00
  %218 = fmul float %215, 4.000000e+00
  %219 = fcmp uge float %66, %27
  %220 = select i1 %219, float %66, float %27
  %221 = call float @llvm.AMDIL.clamp.(float %220, float 0.000000e+00, float 1.000000e+00)
  %222 = call float @llvm.AMDGPU.lrp(float %221, float %216, float %24)
  %223 = call float @llvm.AMDGPU.lrp(float %221, float %217, float %25)
  %224 = call float @llvm.AMDGPU.lrp(float %221, float %218, float %26)
  %225 = call i32 @llvm.SI.packf16(float %222, float %223)
  %226 = bitcast i32 %225 to float
  %227 = call i32 @llvm.SI.packf16(float %224, float %174)
  %228 = bitcast i32 %227 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %226, float %228, float %226, float %228)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180b00
c8190b01
c81c0a00
c81d0a01
100a0f07
d2820008
04160d06
c8140c00
c8150c01
d2820008
04220b05
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000009
0021e6f2
100c1306
10081107
10081304
c81c0400
c81d0401
10160f04
c8280500
c8290501
d282000b
042e1506
100a1105
100a1305
c8200600
c8210601
d2820009
042e1105
10160d09
d2820006
042e0d09
081a0d0a
100c0909
d2820004
041a0909
08180907
10080b09
d2820004
04120b09
081c0908
7e1e0280
d28a0005
043a1b0c
d28c0004
043a1b0c
d28e0006
043a1b0c
d2880007
043a1b0c
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840304
c0c60508
bf8c007f
f0800f00
00430805
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c0070
f0800f00
00430404
c0840100
bf8c0070
c2000925
bf8c007f
10180a00
c2000924
bf8c007f
d282000c
04300104
c2000926
bf8c007f
d282000c
04300106
c2000927
bf8c007f
d282000c
04300107
c200091a
bf8c007f
06181800
d206080c
0201010c
101c190b
081e1cf2
10180b0f
d2820010
0432130e
c2000921
bf8c007f
10180a00
c2000920
bf8c007f
d282000c
04300104
c2000922
bf8c007f
d282000c
04300106
c2000923
bf8c007f
d282000c
04300107
c2000919
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8440100
c8450101
d2820011
0436230c
10282310
c200092c
c200892d
bf8c007f
7e200201
d2820003
04400103
c200090d
bf8c007f
10220600
c200090c
bf8c007f
10200400
c0800308
c0c60510
bf8c007f
f0800f00
00031010
bf8c0770
10282314
100420ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0711
7e0602ff
3d93dd98
d2820002
040a0712
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
102a0511
c200091d
bf8c007f
10060a00
c200091c
bf8c007f
d2820003
040c0104
c200091e
bf8c007f
d2820003
040c0106
c200091f
bf8c007f
d2820003
040c0107
c2000918
bf8c007f
06060600
d2060803
02010103
10062703
d2820014
04522b03
102c28f6
c8500700
c8510701
c2000913
bf8c007f
d00c0002
02000114
7e2a0200
d2000014
000a2915
d2060814
02010114
082a28f2
c2000911
bf8c007f
102e2a00
d2820016
045e2d14
102e090f
d2820017
045e110e
c8600000
c8610001
d2820018
0436310c
102e3117
102e2117
10300510
d2820017
045e3103
102e2ef6
c2000910
bf8c007f
10302a00
d2820017
04622f14
5e2c2d17
101e0d0f
d2820008
043e150e
c8240200
c8250201
d2820009
0436130c
10101308
10102508
10040512
d2820002
04220503
100404f6
c2000912
bf8c007f
10062a00
d2820002
040e0514
c80c0300
c80d0301
d2820000
0436070c
10000107
5e000102
f8001c0f
00160016
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..10]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[8].xyzz, CONST[7].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MUL TEMP[4], CONST[0], TEMP[1].xxxx
 10: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[4]
 11: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 12: ADD TEMP[0], TEMP[0], CONST[3]
 13: MOV TEMP[2].w, IMM[0].xxxx
 14: MOV TEMP[2].xyz, CONST[4].xyzx
 15: DP4 TEMP[4].x, TEMP[3], IN[0]
 16: DP4 TEMP[5].x, TEMP[3], IN[1]
 17: MOV TEMP[4].y, TEMP[5].xxxx
 18: DP4 TEMP[3].x, TEMP[3], IN[2]
 19: MOV TEMP[4].z, TEMP[3].xxxx
 20: MUL TEMP[3].xyz, TEMP[4].xyzz, CONST[6].wwww
 21: MUL TEMP[2], TEMP[2], IN[3]
 22: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[10].xyzz
 23: MAD TEMP[4].x, TEMP[0].zzzz, CONST[9].xxxx, CONST[9].yyyy
 24: MOV TEMP[1].w, TEMP[4].xxxx
 25: MAD TEMP[4].xy, IN[6].xyyy, CONST[5].xyyy, CONST[5].zwww
 26: MOV TEMP[4].zw, TEMP[3].yyxy
 27: MOV TEMP[3].x, TEMP[3].zzzz
 28: MOV OUT[1], TEMP[2]
 29: MOV OUT[3], TEMP[4]
 30: MOV OUT[4], TEMP[3]
 31: MOV OUT[2], TEMP[1]
 32: MOV OUT[0], TEMP[0]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %5)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = fmul float %77, %38
  %92 = fadd float %91, %35
  %93 = fmul float %78, %39
  %94 = fadd float %93, %36
  %95 = fmul float %79, %40
  %96 = fadd float %95, %37
  %97 = fmul float %92, %49
  %98 = fmul float %94, %50
  %99 = fadd float %97, %98
  %100 = fmul float %96, %51
  %101 = fadd float %99, %100
  %102 = fmul float 1.000000e+00, %52
  %103 = fadd float %101, %102
  %104 = fmul float %92, %56
  %105 = fmul float %94, %57
  %106 = fadd float %104, %105
  %107 = fmul float %96, %58
  %108 = fadd float %106, %107
  %109 = fmul float 1.000000e+00, %59
  %110 = fadd float %108, %109
  %111 = fmul float %92, %63
  %112 = fmul float %94, %64
  %113 = fadd float %111, %112
  %114 = fmul float %96, %65
  %115 = fadd float %113, %114
  %116 = fmul float 1.000000e+00, %66
  %117 = fadd float %115, %116
  %118 = fmul float %11, %103
  %119 = fmul float %12, %103
  %120 = fmul float %13, %103
  %121 = fmul float %14, %103
  %122 = fmul float %15, %110
  %123 = fadd float %122, %118
  %124 = fmul float %16, %110
  %125 = fadd float %124, %119
  %126 = fmul float %17, %110
  %127 = fadd float %126, %120
  %128 = fmul float %18, %110
  %129 = fadd float %128, %121
  %130 = fmul float %19, %117
  %131 = fadd float %130, %123
  %132 = fmul float %20, %117
  %133 = fadd float %132, %125
  %134 = fmul float %21, %117
  %135 = fadd float %134, %127
  %136 = fmul float %22, %117
  %137 = fadd float %136, %129
  %138 = fadd float %131, %23
  %139 = fadd float %133, %24
  %140 = fadd float %135, %25
  %141 = fadd float %137, %26
  %142 = fmul float %83, %49
  %143 = fmul float %84, %50
  %144 = fadd float %142, %143
  %145 = fmul float %85, %51
  %146 = fadd float %144, %145
  %147 = fmul float 0.000000e+00, %52
  %148 = fadd float %146, %147
  %149 = fmul float %83, %56
  %150 = fmul float %84, %57
  %151 = fadd float %149, %150
  %152 = fmul float %85, %58
  %153 = fadd float %151, %152
  %154 = fmul float 0.000000e+00, %59
  %155 = fadd float %153, %154
  %156 = fmul float %83, %63
  %157 = fmul float %84, %64
  %158 = fadd float %156, %157
  %159 = fmul float %85, %65
  %160 = fadd float %158, %159
  %161 = fmul float 0.000000e+00, %66
  %162 = fadd float %160, %161
  %163 = fmul float %148, %34
  %164 = fmul float %155, %34
  %165 = fmul float %162, %34
  %166 = fmul float %27, %70
  %167 = fmul float %28, %71
  %168 = fmul float %29, %72
  %169 = fmul float 1.000000e+00, %73
  %170 = fsub float -0.000000e+00, %43
  %171 = fadd float %103, %170
  %172 = fsub float -0.000000e+00, %44
  %173 = fadd float %110, %172
  %174 = fsub float -0.000000e+00, %45
  %175 = fadd float %117, %174
  %176 = fmul float %140, %41
  %177 = fadd float %176, %42
  %178 = fmul float %89, %30
  %179 = fadd float %178, %32
  %180 = fmul float %90, %31
  %181 = fadd float %180, %33
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %166, float %167, float %168, float %169)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %171, float %173, float %175, float %177)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %179, float %181, float %163, float %164)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %165, float %164, float %165, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %138, float %139, float %140, float %141)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020900
c2020121
c202811d
bf8c0070
7e020205
d2820003
0404090a
c0840704
bf8c007f
e00c2000
80020500
bf8c0770
10020d03
c2020120
c202811c
bf8c007f
7e040205
d2820004
04080909
d2820001
04060b04
c2020122
c202811e
bf8c007f
7e040205
d2820011
0408090b
d2820001
04060f11
06021101
c0840700
bf8c007f
e00c2000
80020d00
bf8c0770
10041d03
d2820002
040a1b04
d2820002
040a1f11
06042102
c2020102
bf8c007f
10120404
c2020106
bf8c007f
d2820012
04260204
c0840708
bf8c007f
e00c2000
80020900
bf8c0770
10061503
d2820003
040e1304
d2820003
040e1711
06061903
c202010a
bf8c007f
d2820004
044a0604
c202010e
bf8c007f
06080804
c2020124
c2028125
bf8c007f
7e220205
d2820011
04440904
c202012a
bf8c007f
0a240604
c2020129
bf8c007f
0a260204
c2020128
bf8c007f
0a280404
f800021f
11121314
c0840714
bf8c000f
e00c2000
80021100
bf8c0770
102a0d12
d2820015
04560b11
d2820015
04560f13
d2820005
04550108
c202011b
bf8c007f
100a0a04
100c1d12
d2820006
041a1b11
d2820006
041a1f13
d2820006
04190110
100c0c04
c0840718
bf8c007f
e00c2000
80020d00
c2028115
c2030117
bf8c0070
7e000206
d2820000
04000b0e
c2028114
c2030116
bf8c007f
7e0e0206
d2820007
041c0b0d
f800022f
05060007
bf8c070f
10001512
d2820000
04021311
d2820000
04021713
d2820000
0401010c
10000004
7e0c0280
f800023f
06000500
c2020103
bf8c000f
10000404
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[13..14]
DCL CONST[5..12]
DCL TEMP[0..1]
DCL TEMP[2..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: MOV TEMP[6].xy, IN[5].xyyy
 14: TEX TEMP[6], TEMP[6], SAMP[1], 2D
 15: MAD TEMP[6].yw, IMM[0].zzzz, TEMP[6], IMM[0].xxxx
 16: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz
 17: RSQ TEMP[7].x, TEMP[7].xxxx
 18: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 19: DP2 TEMP[7].x, TEMP[6].ywww, TEMP[6].ywww
 20: ADD TEMP[7].x, IMM[0].yyyy, -TEMP[7].xxxx
 21: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
 22: RSQ TEMP[8].x, TEMP[7].xxxx
 23: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[7].xxxx
 24: CMP TEMP[8].x, -TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].wwww
 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
 26: DP3 TEMP[7].x, IN[6].yzww, IN[6].yzww
 27: RSQ TEMP[7].x, TEMP[7].xxxx
 28: MUL TEMP[7].xyz, IN[6].yzww, TEMP[7].xxxx
 29: DP3 TEMP[8].x, IN[7].xyzz, IN[7].xyzz
 30: RSQ TEMP[8].x, TEMP[8].xxxx
 31: MUL TEMP[8].xyz, IN[7].xyzz, TEMP[8].xxxx
 32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[6].wwww
 33: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[6].yyyy, TEMP[8].xyzz
 34: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
 35: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 36: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 37: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 38: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 39: MOV TEMP[2].xyz, TEMP[2].xyzz
 40: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 41: DP4 TEMP[3].x, TEMP[4], CONST[12]
 42: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[9].zzzz
 43: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 44: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[4].xyzz
 45: DP4 TEMP[2].x, TEMP[4], CONST[11]
 46: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[9].yyyy
 47: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 48: MUL TEMP[2], TEMP[5], TEMP[2]
 49: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 50: MOV TEMP[3].xy, TEMP[3].xyyy
 51: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 52: DP4 TEMP[4].x, TEMP[4], CONST[10]
 53: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[9].xxxx
 54: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 55: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 56: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww
 57: RCP TEMP[5].x, TEMP[5].xxxx
 58: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 59: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 60: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 61: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[2].xxxx
 62: MOV TEMP[3].xy, IN[3].zwww
 63: TEX TEMP[3].xyz, TEMP[3], SAMP[3], 2D
 64: MAD TEMP[2].xyz, CONST[13].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
 65: MAX TEMP[3].x, IN[4].wwww, CONST[6].wwww
 66: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 67: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[6].xyzz
 68: MAD TEMP[3].x, TEMP[2].wwww, CONST[7].yyyy, CONST[7].zzzz
 69: SLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
 70: F2I TEMP[3].x, -TEMP[3]
 71: UIF TEMP[3].xxxx :3
 72:   KILL
 73: ENDIF
 74: MOV OUT[0], TEMP[2]
 75: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %50 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %51 = load <32 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %55 = load <32 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %59 = load <32 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %63 = load <32 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %67 = load <32 x i8> addrspace(2)* %66, !tbaa !0
  %68 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0
  %70 = fcmp ugt float %16, 0.000000e+00
  %71 = select i1 %70, float 1.000000e+00, float 0.000000e+00
  %72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %91 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %92 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %93 = fmul float %13, %48
  %94 = fadd float %93, %49
  %95 = call float @llvm.AMDIL.clamp.(float %71, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %97 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %98 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %99 = bitcast float %95 to i32
  %100 = icmp ne i32 %99, 0
  %. = select i1 %100, float -1.000000e+00, float 1.000000e+00
  %101 = bitcast float %82 to i32
  %102 = bitcast float %83 to i32
  %103 = insertelement <2 x i32> undef, i32 %101, i32 0
  %104 = insertelement <2 x i32> %103, i32 %102, i32 1
  %105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %51, <16 x i8> %53, i32 2)
  %106 = extractelement <4 x float> %105, i32 0
  %107 = extractelement <4 x float> %105, i32 1
  %108 = extractelement <4 x float> %105, i32 2
  %109 = extractelement <4 x float> %105, i32 3
  %110 = bitcast float %82 to i32
  %111 = bitcast float %83 to i32
  %112 = insertelement <2 x i32> undef, i32 %110, i32 0
  %113 = insertelement <2 x i32> %112, i32 %111, i32 1
  %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %55, <16 x i8> %57, i32 2)
  %115 = extractelement <4 x float> %114, i32 1
  %116 = extractelement <4 x float> %114, i32 3
  %117 = fmul float 2.000000e+00, %115
  %118 = fadd float %117, -1.000000e+00
  %119 = fmul float 2.000000e+00, %116
  %120 = fadd float %119, -1.000000e+00
  %121 = fmul float %84, %84
  %122 = fmul float %85, %85
  %123 = fadd float %122, %121
  %124 = fmul float %86, %86
  %125 = fadd float %123, %124
  %126 = call float @llvm.AMDGPU.rsq(float %125)
  %127 = fmul float %84, %126
  %128 = fmul float %85, %126
  %129 = fmul float %86, %126
  %130 = fmul float %118, %118
  %131 = fmul float %120, %120
  %132 = fadd float %130, %131
  %133 = fsub float -0.000000e+00, %132
  %134 = fadd float 1.000000e+00, %133
  %135 = fcmp uge float 0.000000e+00, %134
  %136 = select i1 %135, float 0.000000e+00, float %134
  %137 = call float @llvm.AMDGPU.rsq(float %136)
  %138 = fmul float %137, %136
  %139 = fsub float -0.000000e+00, %136
  %140 = call float @llvm.AMDGPU.cndlt(float %139, float %138, float 0.000000e+00)
  %141 = fmul float %127, %140
  %142 = fmul float %128, %140
  %143 = fmul float %129, %140
  %144 = fmul float %87, %87
  %145 = fmul float %88, %88
  %146 = fadd float %145, %144
  %147 = fmul float %89, %89
  %148 = fadd float %146, %147
  %149 = call float @llvm.AMDGPU.rsq(float %148)
  %150 = fmul float %87, %149
  %151 = fmul float %88, %149
  %152 = fmul float %89, %149
  %153 = fmul float %90, %90
  %154 = fmul float %91, %91
  %155 = fadd float %154, %153
  %156 = fmul float %92, %92
  %157 = fadd float %155, %156
  %158 = call float @llvm.AMDGPU.rsq(float %157)
  %159 = fmul float %90, %158
  %160 = fmul float %91, %158
  %161 = fmul float %92, %158
  %162 = fmul float %159, %120
  %163 = fmul float %160, %120
  %164 = fmul float %161, %120
  %165 = fmul float %150, %118
  %166 = fadd float %165, %162
  %167 = fmul float %151, %118
  %168 = fadd float %167, %163
  %169 = fmul float %152, %118
  %170 = fadd float %169, %164
  %171 = fmul float %141, %.
  %172 = fadd float %171, %166
  %173 = fmul float %142, %.
  %174 = fadd float %173, %168
  %175 = fmul float %143, %.
  %176 = fadd float %175, %170
  %177 = fmul float %172, %78
  %178 = fmul float %174, %79
  %179 = fadd float %178, %177
  %180 = fmul float %176, %80
  %181 = fadd float %179, %180
  %182 = fmul float %181, %172
  %183 = fmul float %181, %174
  %184 = fmul float %181, %176
  %185 = fmul float 2.000000e+00, %182
  %186 = fmul float 2.000000e+00, %183
  %187 = fmul float 2.000000e+00, %184
  %188 = fsub float -0.000000e+00, %185
  %189 = fadd float %78, %188
  %190 = fsub float -0.000000e+00, %186
  %191 = fadd float %79, %190
  %192 = fsub float -0.000000e+00, %187
  %193 = fadd float %80, %192
  %194 = insertelement <4 x float> undef, float %189, i32 0
  %195 = insertelement <4 x float> %194, float %191, i32 1
  %196 = insertelement <4 x float> %195, float %193, i32 2
  %197 = insertelement <4 x float> %196, float 0.000000e+00, i32 3
  %198 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %197)
  %199 = extractelement <4 x float> %198, i32 0
  %200 = extractelement <4 x float> %198, i32 1
  %201 = extractelement <4 x float> %198, i32 2
  %202 = extractelement <4 x float> %198, i32 3
  %203 = call float @fabs(float %201)
  %204 = fdiv float 1.000000e+00, %203
  %205 = fmul float %199, %204
  %206 = fadd float %205, 1.500000e+00
  %207 = fmul float %200, %204
  %208 = fadd float %207, 1.500000e+00
  %209 = bitcast float %208 to i32
  %210 = bitcast float %206 to i32
  %211 = bitcast float %202 to i32
  %212 = insertelement <4 x i32> undef, i32 %209, i32 0
  %213 = insertelement <4 x i32> %212, i32 %210, i32 1
  %214 = insertelement <4 x i32> %213, i32 %211, i32 2
  %215 = insertelement <4 x i32> %214, i32 undef, i32 3
  %216 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %215, <32 x i8> %59, <16 x i8> %61, i32 4)
  %217 = extractelement <4 x float> %216, i32 0
  %218 = extractelement <4 x float> %216, i32 1
  %219 = extractelement <4 x float> %216, i32 2
  %220 = extractelement <4 x float> %216, i32 3
  %221 = fmul float %106, %41
  %222 = fmul float %107, %42
  %223 = fadd float %221, %222
  %224 = fmul float %108, %43
  %225 = fadd float %223, %224
  %226 = fmul float %109, %44
  %227 = fadd float %225, %226
  %228 = fadd float %227, %32
  %229 = call float @llvm.AMDIL.clamp.(float %228, float 0.000000e+00, float 1.000000e+00)
  %230 = fmul float %220, %229
  %231 = call float @llvm.AMDGPU.lrp(float %230, float %217, float %106)
  %232 = call float @llvm.AMDGPU.lrp(float %230, float %218, float %107)
  %233 = call float @llvm.AMDGPU.lrp(float %230, float %219, float %108)
  %234 = fmul float %106, %37
  %235 = fmul float %107, %38
  %236 = fadd float %234, %235
  %237 = fmul float %108, %39
  %238 = fadd float %236, %237
  %239 = fmul float %109, %40
  %240 = fadd float %238, %239
  %241 = fadd float %240, %31
  %242 = call float @llvm.AMDIL.clamp.(float %241, float 0.000000e+00, float 1.000000e+00)
  %243 = call float @llvm.AMDGPU.lrp(float %242, float %72, float 1.000000e+00)
  %244 = call float @llvm.AMDGPU.lrp(float %242, float %73, float 1.000000e+00)
  %245 = call float @llvm.AMDGPU.lrp(float %242, float %74, float 1.000000e+00)
  %246 = call float @llvm.AMDGPU.lrp(float %242, float %75, float 1.000000e+00)
  %247 = fmul float %231, %243
  %248 = fmul float %232, %244
  %249 = fmul float %233, %245
  %250 = fmul float %109, %246
  %251 = fmul float %12, %22
  %252 = fmul float %94, %23
  %253 = bitcast float %251 to i32
  %254 = bitcast float %252 to i32
  %255 = insertelement <2 x i32> undef, i32 %253, i32 0
  %256 = insertelement <2 x i32> %255, i32 %254, i32 1
  %257 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %256, <32 x i8> %67, <16 x i8> %69, i32 2)
  %258 = extractelement <4 x float> %257, i32 0
  %259 = extractelement <4 x float> %257, i32 1
  %260 = extractelement <4 x float> %257, i32 2
  %261 = extractelement <4 x float> %257, i32 3
  %262 = fmul float %106, %33
  %263 = fmul float %107, %34
  %264 = fadd float %262, %263
  %265 = fmul float %108, %35
  %266 = fadd float %264, %265
  %267 = fmul float %109, %36
  %268 = fadd float %266, %267
  %269 = fadd float %268, %30
  %270 = call float @llvm.AMDIL.clamp.(float %269, float 0.000000e+00, float 1.000000e+00)
  %271 = fmul float %270, %261
  %272 = fmul float %258, 0x3FCB367A00000000
  %273 = fmul float %259, 0x3FE6E2EB20000000
  %274 = fadd float %273, %272
  %275 = fmul float %260, 0x3FB27BB300000000
  %276 = fadd float %274, %275
  %277 = fcmp uge float %276, 0x3F50624DE0000000
  %278 = select i1 %277, float %276, float 0x3F50624DE0000000
  %279 = fdiv float 1.000000e+00, %278
  %280 = fmul float %258, %279
  %281 = fmul float %259, %279
  %282 = fmul float %260, %279
  %283 = fmul float %247, %258
  %284 = fmul float %248, %259
  %285 = fmul float %249, %260
  %286 = fmul float %271, %280
  %287 = fadd float %286, %283
  %288 = fmul float %271, %281
  %289 = fadd float %288, %284
  %290 = fmul float %271, %282
  %291 = fadd float %290, %285
  %292 = fmul float %287, 4.000000e+00
  %293 = fmul float %289, 4.000000e+00
  %294 = fmul float %291, 4.000000e+00
  %295 = bitcast float %76 to i32
  %296 = bitcast float %77 to i32
  %297 = insertelement <2 x i32> undef, i32 %295, i32 0
  %298 = insertelement <2 x i32> %297, i32 %296, i32 1
  %299 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %298, <32 x i8> %63, <16 x i8> %65, i32 2)
  %300 = extractelement <4 x float> %299, i32 0
  %301 = extractelement <4 x float> %299, i32 1
  %302 = extractelement <4 x float> %299, i32 2
  %303 = fmul float %45, %300
  %304 = fadd float %303, %292
  %305 = fmul float %46, %301
  %306 = fadd float %305, %293
  %307 = fmul float %47, %302
  %308 = fadd float %307, %294
  %309 = fcmp uge float %81, %27
  %310 = select i1 %309, float %81, float %27
  %311 = call float @llvm.AMDIL.clamp.(float %310, float 0.000000e+00, float 1.000000e+00)
  %312 = call float @llvm.AMDGPU.lrp(float %311, float %304, float %24)
  %313 = call float @llvm.AMDGPU.lrp(float %311, float %306, float %25)
  %314 = call float @llvm.AMDGPU.lrp(float %311, float %308, float %26)
  %315 = fmul float %250, %28
  %316 = fadd float %315, %29
  %317 = fcmp ult float %316, 0.000000e+00
  %318 = select i1 %317, float 1.000000e+00, float 0.000000e+00
  %319 = fsub float -0.000000e+00, %318
  %320 = fptosi float %319 to i32
  %321 = bitcast i32 %320 to float
  %322 = bitcast float %321 to i32
  %323 = icmp ne i32 %322, 0
  br i1 %323, label %IF37, label %ENDIF36

IF37:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF36

ENDIF36:                                          ; preds = %main_body, %IF37
  %324 = call i32 @llvm.SI.packf16(float %312, float %313)
  %325 = bitcast i32 %324 to float
  %326 = call i32 @llvm.SI.packf16(float %314, float %250)
  %327 = bitcast i32 %326 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %325, float %327, float %325, float %327)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340f00
c8350f01
c8300e00
c8310e01
1014190c
d282000b
042a1b0d
c8281000
c8291001
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441500
c8451501
c8401400
c8411401
101a2110
d282000e
04362311
c8341600
c8351601
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541200
c8551201
c8501100
c8511101
10222914
d2820012
04462b15
c8441300
c8451301
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0800
c83d0801
10281f0c
c8400900
c8410901
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200a00
c8210a01
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430808
c0840300
c0c60500
bf8c0070
f0800f00
00430405
c0840100
bf8c0070
c2000931
bf8c007f
10180a00
c2000930
bf8c007f
d282000c
04300104
c2000932
bf8c007f
d282000c
04300106
c2000933
bf8c007f
d282000c
04300107
c2000926
bf8c007f
06181800
d206080c
0201010c
101c190b
081e1cf2
10180d0f
d2820010
0432150e
c200092d
bf8c007f
10180a00
c200092c
bf8c007f
d282000c
04300104
c200092e
bf8c007f
d282000c
04300106
c200092f
bf8c007f
d282000c
04300107
c2000925
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8440200
c8450201
d2820011
0436230c
10282310
c2000938
c2008939
bf8c007f
7e200201
d2820003
04400103
c2000915
bf8c007f
10220600
c2000914
bf8c007f
10200400
c0860310
c0c80520
bf8c007f
f0800f00
00641010
bf8c0770
10282514
100420ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0711
7e0602ff
3d93dd98
d2820002
040a0712
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
102a0512
c2000929
bf8c007f
10060a00
c2000928
bf8c007f
d2820003
040c0104
c200092a
bf8c007f
d2820003
040c0106
c200092b
bf8c007f
d2820003
040c0107
c2000924
bf8c007f
06060600
d2060803
02010103
10062703
d2820014
04522b03
102828f6
c8580700
c8590701
c8540600
c8550601
c080030c
c0c60518
bf8c007f
f0800700
00031515
c2000936
bf8c0070
d2820014
04522e00
c8600b00
c8610b01
c200091b
bf8c007f
d00c0002
02000118
7e320200
d2000018
000a3119
d2060819
02010118
083432f2
c200091a
bf8c007f
10303400
d2820014
04622919
10300b0f
d2820018
0462130e
c86c0100
c86d0101
d282001b
0436370c
10303718
10302318
10360511
d2820018
04623703
103030f6
c2000935
bf8c007f
d2820018
04622c00
c2000919
bf8c007f
10363400
d2820018
046e3119
101e090f
d2820008
043e110e
c8240000
c8250001
d2820009
0436130c
10101308
10102108
10040510
d2820002
04220503
100404f6
c2000934
bf8c007f
d2820002
040a2a00
c2000918
bf8c007f
10063400
d2820002
040e0519
c80c0300
c80d0301
d2820000
0436070c
10000107
c200091d
c200891e
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000114
5e023102
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL IN[7]
DCL IN[8]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..10]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[8].xyzz, CONST[7].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MOV TEMP[4].w, IMM[0].yyyy
 10: MOV TEMP[4].xyz, IN[8].xyzx
 11: MUL TEMP[5], CONST[0], TEMP[1].xxxx
 12: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[5]
 13: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 14: ADD TEMP[0], TEMP[0], CONST[3]
 15: MOV TEMP[2].w, IMM[0].xxxx
 16: MOV TEMP[2].xyz, CONST[4].xyzx
 17: DP4 TEMP[5].x, TEMP[3], IN[0]
 18: DP4 TEMP[6].x, TEMP[3], IN[1]
 19: MOV TEMP[5].y, TEMP[6].xxxx
 20: DP4 TEMP[3].x, TEMP[3], IN[2]
 21: MOV TEMP[5].z, TEMP[3].xxxx
 22: MUL TEMP[3].xyz, TEMP[5].xyzz, CONST[6].wwww
 23: DP4 TEMP[5].x, TEMP[4], IN[0]
 24: DP4 TEMP[6].x, TEMP[4], IN[1]
 25: MOV TEMP[5].y, TEMP[6].xxxx
 26: DP4 TEMP[4].x, TEMP[4], IN[2]
 27: MOV TEMP[5].z, TEMP[4].xxxx
 28: MUL TEMP[4].xyz, TEMP[5].xyzz, CONST[6].wwww
 29: MUL TEMP[2], TEMP[2], IN[3]
 30: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[10].xyzz
 31: MAD TEMP[5].x, TEMP[0].zzzz, CONST[9].xxxx, CONST[9].yyyy
 32: MOV TEMP[1].w, TEMP[5].xxxx
 33: MAD TEMP[5].xy, IN[6].xyyy, CONST[5].xyyy, CONST[5].zwww
 34: MOV TEMP[5].zw, TEMP[3].yyxy
 35: MOV TEMP[6].x, TEMP[3].zzzz
 36: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 37: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 38: MOV TEMP[6].yzw, TEMP[3].yxyz
 39: MOV TEMP[3].xyz, TEMP[4].xyzx
 40: MOV OUT[1], TEMP[2]
 41: MOV OUT[2], IN[7]
 42: MOV OUT[4], TEMP[5]
 43: MOV OUT[6], TEMP[3]
 44: MOV OUT[5], TEMP[6]
 45: MOV OUT[3], TEMP[1]
 46: MOV OUT[0], TEMP[0]
 47: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %5)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = getelementptr <16 x i8> addrspace(2)* %3, i32 7
  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
  %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %5)
  %94 = extractelement <4 x float> %93, i32 0
  %95 = extractelement <4 x float> %93, i32 1
  %96 = extractelement <4 x float> %93, i32 2
  %97 = extractelement <4 x float> %93, i32 3
  %98 = getelementptr <16 x i8> addrspace(2)* %3, i32 8
  %99 = load <16 x i8> addrspace(2)* %98, !tbaa !0
  %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %5)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = fmul float %77, %38
  %105 = fadd float %104, %35
  %106 = fmul float %78, %39
  %107 = fadd float %106, %36
  %108 = fmul float %79, %40
  %109 = fadd float %108, %37
  %110 = fmul float %105, %49
  %111 = fmul float %107, %50
  %112 = fadd float %110, %111
  %113 = fmul float %109, %51
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %52
  %116 = fadd float %114, %115
  %117 = fmul float %105, %56
  %118 = fmul float %107, %57
  %119 = fadd float %117, %118
  %120 = fmul float %109, %58
  %121 = fadd float %119, %120
  %122 = fmul float 1.000000e+00, %59
  %123 = fadd float %121, %122
  %124 = fmul float %105, %63
  %125 = fmul float %107, %64
  %126 = fadd float %124, %125
  %127 = fmul float %109, %65
  %128 = fadd float %126, %127
  %129 = fmul float 1.000000e+00, %66
  %130 = fadd float %128, %129
  %131 = fmul float %11, %116
  %132 = fmul float %12, %116
  %133 = fmul float %13, %116
  %134 = fmul float %14, %116
  %135 = fmul float %15, %123
  %136 = fadd float %135, %131
  %137 = fmul float %16, %123
  %138 = fadd float %137, %132
  %139 = fmul float %17, %123
  %140 = fadd float %139, %133
  %141 = fmul float %18, %123
  %142 = fadd float %141, %134
  %143 = fmul float %19, %130
  %144 = fadd float %143, %136
  %145 = fmul float %20, %130
  %146 = fadd float %145, %138
  %147 = fmul float %21, %130
  %148 = fadd float %147, %140
  %149 = fmul float %22, %130
  %150 = fadd float %149, %142
  %151 = fadd float %144, %23
  %152 = fadd float %146, %24
  %153 = fadd float %148, %25
  %154 = fadd float %150, %26
  %155 = fmul float %83, %49
  %156 = fmul float %84, %50
  %157 = fadd float %155, %156
  %158 = fmul float %85, %51
  %159 = fadd float %157, %158
  %160 = fmul float 0.000000e+00, %52
  %161 = fadd float %159, %160
  %162 = fmul float %83, %56
  %163 = fmul float %84, %57
  %164 = fadd float %162, %163
  %165 = fmul float %85, %58
  %166 = fadd float %164, %165
  %167 = fmul float 0.000000e+00, %59
  %168 = fadd float %166, %167
  %169 = fmul float %83, %63
  %170 = fmul float %84, %64
  %171 = fadd float %169, %170
  %172 = fmul float %85, %65
  %173 = fadd float %171, %172
  %174 = fmul float 0.000000e+00, %66
  %175 = fadd float %173, %174
  %176 = fmul float %161, %34
  %177 = fmul float %168, %34
  %178 = fmul float %175, %34
  %179 = fmul float %101, %49
  %180 = fmul float %102, %50
  %181 = fadd float %179, %180
  %182 = fmul float %103, %51
  %183 = fadd float %181, %182
  %184 = fmul float 0.000000e+00, %52
  %185 = fadd float %183, %184
  %186 = fmul float %101, %56
  %187 = fmul float %102, %57
  %188 = fadd float %186, %187
  %189 = fmul float %103, %58
  %190 = fadd float %188, %189
  %191 = fmul float 0.000000e+00, %59
  %192 = fadd float %190, %191
  %193 = fmul float %101, %63
  %194 = fmul float %102, %64
  %195 = fadd float %193, %194
  %196 = fmul float %103, %65
  %197 = fadd float %195, %196
  %198 = fmul float 0.000000e+00, %66
  %199 = fadd float %197, %198
  %200 = fmul float %185, %34
  %201 = fmul float %192, %34
  %202 = fmul float %199, %34
  %203 = fmul float %27, %70
  %204 = fmul float %28, %71
  %205 = fmul float %29, %72
  %206 = fmul float 1.000000e+00, %73
  %207 = fsub float -0.000000e+00, %43
  %208 = fadd float %116, %207
  %209 = fsub float -0.000000e+00, %44
  %210 = fadd float %123, %209
  %211 = fsub float -0.000000e+00, %45
  %212 = fadd float %130, %211
  %213 = fmul float %153, %41
  %214 = fadd float %213, %42
  %215 = fmul float %89, %30
  %216 = fadd float %215, %32
  %217 = fmul float %90, %31
  %218 = fadd float %217, %33
  %219 = fmul float %202, %177
  %220 = fmul float %200, %178
  %221 = fmul float %201, %176
  %222 = fsub float -0.000000e+00, %219
  %223 = fmul float %201, %178
  %224 = fadd float %223, %222
  %225 = fsub float -0.000000e+00, %220
  %226 = fmul float %202, %176
  %227 = fadd float %226, %225
  %228 = fsub float -0.000000e+00, %221
  %229 = fmul float %200, %177
  %230 = fadd float %229, %228
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %203, float %204, float %205, float %206)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %94, float %95, float %96, float %97)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %208, float %210, float %212, float %214)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %216, float %218, float %176, float %177)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %178, float %224, float %227, float %230)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %200, float %201, float %202, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %151, float %152, float %153, float %154)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c084071c
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840710
bf8c000f
e00c2000
80020900
c2020121
c202811d
bf8c0070
7e020205
d2820003
0404090a
c0840704
bf8c007f
e00c2000
80020500
bf8c0770
10020d03
c2020120
c202811c
bf8c007f
7e040205
d2820004
04080909
d2820001
04060b04
c2020122
c202811e
bf8c007f
7e040205
d2820011
0408090b
d2820001
04060f11
06021101
c0840700
bf8c007f
e00c2000
80020900
bf8c0770
10041503
d2820002
040a1304
d2820002
040a1711
06041902
c2020102
bf8c007f
101a0404
c2020106
bf8c007f
d2820012
04360204
c0840708
bf8c007f
e00c2000
80020d00
bf8c0770
10061d03
d2820003
040e1b04
d2820003
040e1f11
06062103
c202010a
bf8c007f
d2820004
044a0604
c202010e
bf8c007f
06080804
c2020124
c2028125
bf8c007f
7e220205
d2820011
04440904
c202012a
bf8c007f
0a240604
c2020129
bf8c007f
0a260204
c2020128
bf8c007f
0a280404
f800022f
11121314
c0840714
bf8c000f
e00c2000
80021200
bf8c0770
10220d13
d2820011
04460b12
d2820011
04460f14
d2820011
04450108
c202011b
bf8c007f
10222204
102c1513
d2820016
045a1312
d2820016
045a1714
d2820016
0459010c
102c2c04
c0840718
bf8c007f
e00c2000
80021700
c2028115
c2040117
bf8c0070
7e360208
d282001b
046c0b18
c2028114
c2040116
bf8c007f
7e380208
d2820017
04700b17
f800023f
11161b17
c0840720
bf8c000f
e00c2000
80021700
bf8c0770
10000d18
d2820000
04020b17
d2820000
04020f19
d2820000
04010108
10000004
100c2d00
100a1518
d2820005
04161317
d2820005
04161719
d2820005
0415010c
100a0a04
100e2305
080c0d07
100e1d13
d2820007
041e1b12
d2820007
041e1f14
d2820007
041d0110
100e0e04
10120f05
10101d18
d2820008
04221b17
d2820008
04221f19
d2820008
04210110
10101004
10142d08
0812130a
10142308
10160f00
0814150b
f800024f
06090a07
bf8c070f
7e0c0280
f800025f
06080005
c2020103
bf8c000f
10000404
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[20..21]
DCL CONST[5..19]
DCL TEMP[0..1]
DCL TEMP[2..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.0000,     0.0100}
IMM[1] FLT32 {    4.0000,    -0.0100,    -0.5000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[21].xxxx, CONST[21].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: UIF TEMP[1].xxxx :3
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :3
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[4].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: DP4 TEMP[4].x, TEMP[3], CONST[18]
 11: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[17].xxxx
 12: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz
 13: RSQ TEMP[5].x, TEMP[5].xxxx
 14: MUL TEMP[5].xyz, IN[5].xyzz, TEMP[5].xxxx
 15: MUL TEMP[2].xyz, TEMP[5].xyzz, TEMP[2].xxxx
 16: DP4 TEMP[5].x, TEMP[3], CONST[19]
 17: ADD_SAT TEMP[5].x, TEMP[5].xxxx, CONST[17].yyyy
 18: LRP TEMP[5], TEMP[5].xxxx, IN[2], IMM[0].yyyy
 19: MUL TEMP[3], TEMP[3], TEMP[5]
 20: DP3 TEMP[5].x, TEMP[2].xyzz, CONST[9].xyzz
 21: ADD TEMP[5].x, TEMP[5].xxxx, CONST[13].wwww
 22: MOV_SAT TEMP[6].x, TEMP[5].xxxx
 23: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[11].xyzz, CONST[12].xyzz
 24: MOV_SAT TEMP[7].x, -TEMP[5].xxxx
 25: LRP TEMP[7].xyz, TEMP[7].xxxx, CONST[13].xyzz, CONST[12].xyzz
 26: SLT TEMP[8].x, TEMP[5].xxxx, IMM[0].zzzz
 27: F2I TEMP[8].x, -TEMP[8]
 28: UIF TEMP[8].xxxx :3
 29:   MOV TEMP[7].xyz, TEMP[7].xyzx
 30: ELSE :3
 31:   MOV TEMP[7].xyz, TEMP[6].xyzx
 32: ENDIF
 33: DP3 TEMP[6].x, IN[3].xyzz, IN[3].xyzz
 34: RSQ TEMP[6].x, TEMP[6].xxxx
 35: MUL TEMP[6].xyz, IN[3].xyzz, TEMP[6].xxxx
 36: ADD TEMP[6].xyz, CONST[10].xyzz, -TEMP[6].xyzz
 37: DP3 TEMP[8].x, TEMP[6].xyzz, TEMP[6].xyzz
 38: RSQ TEMP[8].x, TEMP[8].xxxx
 39: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xxxx
 40: DP3_SAT TEMP[6].x, TEMP[6].xyzz, TEMP[2].xyzz
 41: POW TEMP[6].x, TEMP[6].xxxx, CONST[16].xxxx
 42: SGE TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 43: F2I TEMP[5].x, -TEMP[5]
 44: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
 45: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx
 46: MUL TEMP[5].xyz, CONST[14].xyzz, TEMP[5].xxxx
 47: MOV TEMP[2].xyz, TEMP[2].xyzz
 48: TEX TEMP[2].xyz, TEMP[2], SAMP[2], CUBE
 49: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, TEMP[7].xyzz
 50: MOV TEMP[6].xy, IN[4].zwww
 51: TEX TEMP[6], TEMP[6], SAMP[1], 2D
 52: MUL TEMP[6], TEMP[6], CONST[15]
 53: LRP TEMP[2].xyz, TEMP[6].wwww, TEMP[6].xyzz, TEMP[2].xyzz
 54: LRP TEMP[5].xyz, TEMP[6].wwww, IMM[0].zzzz, TEMP[5].xyzz
 55: MUL TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xyzz
 56: MAD TEMP[3].xyz, TEMP[5].xyzz, TEMP[4].xxxx, TEMP[2].xyzz
 57: MAX TEMP[2].x, IN[3].wwww, CONST[6].wwww
 58: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 59: LRP TEMP[3].xyz, TEMP[2].xxxx, TEMP[3].xyzz, CONST[6].xyzz
 60: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[5].xyyy
 61: MAD TEMP[4].xy, CONST[8].xzzz, TEMP[3].wwww, CONST[8].ywww
 62: ADD TEMP[5].x, TEMP[4].xxxx, IMM[1].yyyy
 63: SLT TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz
 64: F2I TEMP[5].x, -TEMP[5]
 65: UIF TEMP[5].xxxx :3
 66:   KILL
 67: ENDIF
 68: MUL TEMP[5].xy, TEMP[2].xyyy, CONST[7].yzzz
 69: MOV TEMP[5].xy, TEMP[5].xyyy
 70: TEX TEMP[5].xy, TEMP[5], SAMP[3], 2D
 71: ADD TEMP[5].xy, TEMP[5].xyyy, IMM[1].zzzz
 72: MUL TEMP[6].x, CONST[7].xxxx, TEMP[3].wwww
 73: MAD TEMP[2].xy, TEMP[5].xyyy, TEMP[6].xxxx, TEMP[2].xyyy
 74: MOV TEMP[2].xy, TEMP[2].xyyy
 75: TEX TEMP[2], TEMP[2], SAMP[4], 2D
 76: MUL TEMP[3], TEMP[3], TEMP[4].xxxx
 77: MAD TEMP[2], TEMP[2], TEMP[4].yyyy, TEMP[3]
 78: MOV OUT[0], TEMP[2]
 79: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 272)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 276)
  %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 288)
  %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 292)
  %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 296)
  %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300)
  %65 = call float @llvm.SI.load.const(<16 x i8> %21, i32 304)
  %66 = call float @llvm.SI.load.const(<16 x i8> %21, i32 308)
  %67 = call float @llvm.SI.load.const(<16 x i8> %21, i32 312)
  %68 = call float @llvm.SI.load.const(<16 x i8> %21, i32 316)
  %69 = call float @llvm.SI.load.const(<16 x i8> %21, i32 336)
  %70 = call float @llvm.SI.load.const(<16 x i8> %21, i32 340)
  %71 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %72 = load <32 x i8> addrspace(2)* %71, !tbaa !0
  %73 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %76 = load <32 x i8> addrspace(2)* %75, !tbaa !0
  %77 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %78 = load <16 x i8> addrspace(2)* %77, !tbaa !0
  %79 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %80 = load <32 x i8> addrspace(2)* %79, !tbaa !0
  %81 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %82 = load <16 x i8> addrspace(2)* %81, !tbaa !0
  %83 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %84 = load <32 x i8> addrspace(2)* %83, !tbaa !0
  %85 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %86 = load <16 x i8> addrspace(2)* %85, !tbaa !0
  %87 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %88 = load <32 x i8> addrspace(2)* %87, !tbaa !0
  %89 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %90 = load <16 x i8> addrspace(2)* %89, !tbaa !0
  %91 = fcmp ugt float %16, 0.000000e+00
  %92 = select i1 %91, float 1.000000e+00, float 0.000000e+00
  %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %95 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %96 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %97 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %98 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %99 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %100 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %101 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %102 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %103 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %104 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %105 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %106 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %107 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %108 = fmul float %13, %69
  %109 = fadd float %108, %70
  %110 = call float @llvm.AMDIL.clamp.(float %92, float 0.000000e+00, float 1.000000e+00)
  %111 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %112 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %113 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %114 = bitcast float %110 to i32
  %115 = icmp ne i32 %114, 0
  %. = select i1 %115, float -1.000000e+00, float 1.000000e+00
  %116 = bitcast float %101 to i32
  %117 = bitcast float %102 to i32
  %118 = insertelement <2 x i32> undef, i32 %116, i32 0
  %119 = insertelement <2 x i32> %118, i32 %117, i32 1
  %120 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %72, <16 x i8> %74, i32 2)
  %121 = extractelement <4 x float> %120, i32 0
  %122 = extractelement <4 x float> %120, i32 1
  %123 = extractelement <4 x float> %120, i32 2
  %124 = extractelement <4 x float> %120, i32 3
  %125 = fmul float %121, %61
  %126 = fmul float %122, %62
  %127 = fadd float %125, %126
  %128 = fmul float %123, %63
  %129 = fadd float %127, %128
  %130 = fmul float %124, %64
  %131 = fadd float %129, %130
  %132 = fadd float %131, %59
  %133 = call float @llvm.AMDIL.clamp.(float %132, float 0.000000e+00, float 1.000000e+00)
  %134 = fmul float %105, %105
  %135 = fmul float %106, %106
  %136 = fadd float %135, %134
  %137 = fmul float %107, %107
  %138 = fadd float %136, %137
  %139 = call float @llvm.AMDGPU.rsq(float %138)
  %140 = fmul float %105, %139
  %141 = fmul float %106, %139
  %142 = fmul float %107, %139
  %143 = fmul float %140, %.
  %144 = fmul float %141, %.
  %145 = fmul float %142, %.
  %146 = fmul float %121, %65
  %147 = fmul float %122, %66
  %148 = fadd float %146, %147
  %149 = fmul float %123, %67
  %150 = fadd float %148, %149
  %151 = fmul float %124, %68
  %152 = fadd float %150, %151
  %153 = fadd float %152, %60
  %154 = call float @llvm.AMDIL.clamp.(float %153, float 0.000000e+00, float 1.000000e+00)
  %155 = call float @llvm.AMDGPU.lrp(float %154, float %93, float 1.000000e+00)
  %156 = call float @llvm.AMDGPU.lrp(float %154, float %94, float 1.000000e+00)
  %157 = call float @llvm.AMDGPU.lrp(float %154, float %95, float 1.000000e+00)
  %158 = call float @llvm.AMDGPU.lrp(float %154, float %96, float 1.000000e+00)
  %159 = fmul float %121, %155
  %160 = fmul float %122, %156
  %161 = fmul float %123, %157
  %162 = fmul float %124, %158
  %163 = fmul float %143, %35
  %164 = fmul float %144, %36
  %165 = fadd float %164, %163
  %166 = fmul float %145, %37
  %167 = fadd float %165, %166
  %168 = fadd float %167, %50
  %169 = call float @llvm.AMDIL.clamp.(float %168, float 0.000000e+00, float 1.000000e+00)
  %170 = call float @llvm.AMDGPU.lrp(float %169, float %41, float %44)
  %171 = call float @llvm.AMDGPU.lrp(float %169, float %42, float %45)
  %172 = call float @llvm.AMDGPU.lrp(float %169, float %43, float %46)
  %173 = fsub float -0.000000e+00, %168
  %174 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00)
  %175 = call float @llvm.AMDGPU.lrp(float %174, float %47, float %44)
  %176 = call float @llvm.AMDGPU.lrp(float %174, float %48, float %45)
  %177 = call float @llvm.AMDGPU.lrp(float %174, float %49, float %46)
  %178 = fcmp ult float %168, 0.000000e+00
  %179 = select i1 %178, float 1.000000e+00, float 0.000000e+00
  %180 = fsub float -0.000000e+00, %179
  %181 = fptosi float %180 to i32
  %182 = bitcast i32 %181 to float
  %183 = bitcast float %182 to i32
  %184 = icmp ne i32 %183, 0
  %temp28.0 = select i1 %184, float %175, float %170
  %temp29.0 = select i1 %184, float %176, float %171
  %temp30.0 = select i1 %184, float %177, float %172
  %185 = fmul float %97, %97
  %186 = fmul float %98, %98
  %187 = fadd float %186, %185
  %188 = fmul float %99, %99
  %189 = fadd float %187, %188
  %190 = call float @llvm.AMDGPU.rsq(float %189)
  %191 = fmul float %97, %190
  %192 = fmul float %98, %190
  %193 = fmul float %99, %190
  %194 = fsub float -0.000000e+00, %191
  %195 = fadd float %38, %194
  %196 = fsub float -0.000000e+00, %192
  %197 = fadd float %39, %196
  %198 = fsub float -0.000000e+00, %193
  %199 = fadd float %40, %198
  %200 = fmul float %195, %195
  %201 = fmul float %197, %197
  %202 = fadd float %201, %200
  %203 = fmul float %199, %199
  %204 = fadd float %202, %203
  %205 = call float @llvm.AMDGPU.rsq(float %204)
  %206 = fmul float %195, %205
  %207 = fmul float %197, %205
  %208 = fmul float %199, %205
  %209 = fmul float %206, %143
  %210 = fmul float %207, %144
  %211 = fadd float %210, %209
  %212 = fmul float %208, %145
  %213 = fadd float %211, %212
  %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00)
  %215 = call float @llvm.pow.f32(float %214, float %58)
  %216 = fcmp uge float %168, 0x3F847AE140000000
  %217 = select i1 %216, float 1.000000e+00, float 0.000000e+00
  %218 = fsub float -0.000000e+00, %217
  %219 = fptosi float %218 to i32
  %220 = bitcast i32 %219 to float
  %221 = bitcast float %220 to i32
  %222 = and i32 %221, 1065353216
  %223 = bitcast i32 %222 to float
  %224 = fmul float %215, %223
  %225 = fmul float %51, %224
  %226 = fmul float %52, %224
  %227 = fmul float %53, %224
  %228 = insertelement <4 x float> undef, float %143, i32 0
  %229 = insertelement <4 x float> %228, float %144, i32 1
  %230 = insertelement <4 x float> %229, float %145, i32 2
  %231 = insertelement <4 x float> %230, float 0.000000e+00, i32 3
  %232 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %231)
  %233 = extractelement <4 x float> %232, i32 0
  %234 = extractelement <4 x float> %232, i32 1
  %235 = extractelement <4 x float> %232, i32 2
  %236 = extractelement <4 x float> %232, i32 3
  %237 = call float @fabs(float %235)
  %238 = fdiv float 1.000000e+00, %237
  %239 = fmul float %233, %238
  %240 = fadd float %239, 1.500000e+00
  %241 = fmul float %234, %238
  %242 = fadd float %241, 1.500000e+00
  %243 = bitcast float %242 to i32
  %244 = bitcast float %240 to i32
  %245 = bitcast float %236 to i32
  %246 = insertelement <4 x i32> undef, i32 %243, i32 0
  %247 = insertelement <4 x i32> %246, i32 %244, i32 1
  %248 = insertelement <4 x i32> %247, i32 %245, i32 2
  %249 = insertelement <4 x i32> %248, i32 undef, i32 3
  %250 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %249, <32 x i8> %80, <16 x i8> %82, i32 4)
  %251 = extractelement <4 x float> %250, i32 0
  %252 = extractelement <4 x float> %250, i32 1
  %253 = extractelement <4 x float> %250, i32 2
  %254 = fmul float %251, 4.000000e+00
  %255 = fadd float %254, %temp28.0
  %256 = fmul float %252, 4.000000e+00
  %257 = fadd float %256, %temp29.0
  %258 = fmul float %253, 4.000000e+00
  %259 = fadd float %258, %temp30.0
  %260 = bitcast float %103 to i32
  %261 = bitcast float %104 to i32
  %262 = insertelement <2 x i32> undef, i32 %260, i32 0
  %263 = insertelement <2 x i32> %262, i32 %261, i32 1
  %264 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %263, <32 x i8> %76, <16 x i8> %78, i32 2)
  %265 = extractelement <4 x float> %264, i32 0
  %266 = extractelement <4 x float> %264, i32 1
  %267 = extractelement <4 x float> %264, i32 2
  %268 = extractelement <4 x float> %264, i32 3
  %269 = fmul float %265, %54
  %270 = fmul float %266, %55
  %271 = fmul float %267, %56
  %272 = fmul float %268, %57
  %273 = call float @llvm.AMDGPU.lrp(float %272, float %269, float %255)
  %274 = call float @llvm.AMDGPU.lrp(float %272, float %270, float %257)
  %275 = call float @llvm.AMDGPU.lrp(float %272, float %271, float %259)
  %276 = call float @llvm.AMDGPU.lrp(float %272, float 0.000000e+00, float %225)
  %277 = call float @llvm.AMDGPU.lrp(float %272, float 0.000000e+00, float %226)
  %278 = call float @llvm.AMDGPU.lrp(float %272, float 0.000000e+00, float %227)
  %279 = fmul float %159, %273
  %280 = fmul float %160, %274
  %281 = fmul float %161, %275
  %282 = fmul float %276, %133
  %283 = fadd float %282, %279
  %284 = fmul float %277, %133
  %285 = fadd float %284, %280
  %286 = fmul float %278, %133
  %287 = fadd float %286, %281
  %288 = fcmp uge float %100, %27
  %289 = select i1 %288, float %100, float %27
  %290 = call float @llvm.AMDIL.clamp.(float %289, float 0.000000e+00, float 1.000000e+00)
  %291 = call float @llvm.AMDGPU.lrp(float %290, float %283, float %24)
  %292 = call float @llvm.AMDGPU.lrp(float %290, float %285, float %25)
  %293 = call float @llvm.AMDGPU.lrp(float %290, float %287, float %26)
  %294 = fmul float %12, %22
  %295 = fmul float %109, %23
  %296 = fmul float %31, %162
  %297 = fadd float %296, %32
  %298 = fmul float %33, %162
  %299 = fadd float %298, %34
  %300 = fadd float %297, 0xBF847AE140000000
  %301 = fcmp ult float %300, 0.000000e+00
  %302 = select i1 %301, float 1.000000e+00, float 0.000000e+00
  %303 = fsub float -0.000000e+00, %302
  %304 = fptosi float %303 to i32
  %305 = bitcast i32 %304 to float
  %306 = bitcast float %305 to i32
  %307 = icmp ne i32 %306, 0
  br i1 %307, label %IF40, label %ENDIF39

IF40:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF39

ENDIF39:                                          ; preds = %main_body, %IF40
  %308 = fmul float %294, %29
  %309 = fmul float %295, %30
  %310 = bitcast float %308 to i32
  %311 = bitcast float %309 to i32
  %312 = insertelement <2 x i32> undef, i32 %310, i32 0
  %313 = insertelement <2 x i32> %312, i32 %311, i32 1
  %314 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %313, <32 x i8> %84, <16 x i8> %86, i32 2)
  %315 = extractelement <4 x float> %314, i32 0
  %316 = extractelement <4 x float> %314, i32 1
  %317 = fadd float %315, -5.000000e-01
  %318 = fadd float %316, -5.000000e-01
  %319 = fmul float %28, %162
  %320 = fmul float %317, %319
  %321 = fadd float %320, %294
  %322 = fmul float %318, %319
  %323 = fadd float %322, %295
  %324 = bitcast float %321 to i32
  %325 = bitcast float %323 to i32
  %326 = insertelement <2 x i32> undef, i32 %324, i32 0
  %327 = insertelement <2 x i32> %326, i32 %325, i32 1
  %328 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %327, <32 x i8> %88, <16 x i8> %90, i32 2)
  %329 = extractelement <4 x float> %328, i32 0
  %330 = extractelement <4 x float> %328, i32 1
  %331 = extractelement <4 x float> %328, i32 2
  %332 = extractelement <4 x float> %328, i32 3
  %333 = fmul float %291, %297
  %334 = fmul float %292, %297
  %335 = fmul float %293, %297
  %336 = fmul float %162, %297
  %337 = fmul float %329, %299
  %338 = fadd float %337, %333
  %339 = fmul float %330, %299
  %340 = fadd float %339, %334
  %341 = fmul float %331, %299
  %342 = fadd float %341, %335
  %343 = fmul float %332, %299
  %344 = fadd float %343, %336
  %345 = call i32 @llvm.SI.packf16(float %338, float %340)
  %346 = bitcast i32 %345 to float
  %347 = call i32 @llvm.SI.packf16(float %342, float %344)
  %348 = bitcast i32 %347 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %346, float %348, float %346, float %348)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
100e0b05
d2820008
041e0d06
c81c0e00
c81d0e01
d2820008
04220f07
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000004
0021e6f2
101c0906
100a1105
101a0905
100a1107
101e0905
7e200280
d28a0005
043e1d0d
d28c0004
043e1d0d
d28e0006
043e1d0d
d2880007
043e1d0d
d206010c
02010106
7e18550c
7e2202ff
3fc00000
d2820006
04461904
d2820005
04461905
c0840308
c0c60510
bf8c007f
f0800700
00430405
c0840100
bf8c0070
c2000924
bf8c007f
100e1a00
c2000925
bf8c007f
d2820007
041c010e
c2000926
bf8c007f
d2820007
041c010f
c2000937
bf8c007f
06360e00
d2060807
0201011b
08100ef2
c2000932
bf8c007f
10121000
c200892e
bf8c007f
d282000b
04240307
d2060009
2201011b
d2060809
02010109
081412f2
10181400
c2000936
bf8c007f
d282000c
04300109
d0020000
0201011b
d2000011
0001e480
d2060011
22010111
7e221111
d10a0000
02010111
d200000b
0002190b
d2820015
042ded06
c8300b00
c8310b01
c82c0a00
c82d0a01
c0860304
c0c80508
bf8c007f
f0800f00
0064110b
c203893f
bf8c0070
10162807
081816f2
102a2b0c
c203893e
bf8c007f
102c2607
d282001c
04562d0b
c8580900
c8590901
c8540800
c8550801
c0860300
c0c80500
bf8c007f
f0800f00
00641515
c203894d
bf8c0070
10322c07
c203894c
bf8c007f
d2820019
04640f15
c203894e
bf8c007f
d2820019
04640f17
c203894f
bf8c007f
d2820019
04640f18
c2038945
bf8c007f
06323207
d2060819
02010119
083432f2
c8740200
c8750201
d282001d
046a3b19
103a3b17
1038391d
c8740500
c8750501
c8780400
c8790401
103e3d1e
d2820020
047e3b1d
c87c0600
c87d0601
d2820020
04823f1f
7e405b20
103a411d
c2038929
bf8c007f
083a3a07
103c411e
c2038928
bf8c007f
083c3c07
10423d1e
d2820021
04863b1d
103e411f
c203892a
bf8c007f
083e3e07
d2820020
04863f1f
7e405b20
103a411d
103c411e
103c1b1e
d282001d
047a1d1d
103c411f
d282000d
04761f1e
d206080d
0201010d
7e1a4f0d
c2038940
bf8c007f
0e1a1a07
7e1a4b0d
7e1c02ff
3c23d70a
d00c000c
02021d1b
d200000e
0031e480
d206000e
2201010e
7e1c110e
361c1cf2
101e1d0d
c203893a
bf8c007f
101a1e07
101a1b0c
d282000d
0435010b
c2038949
bf8c007f
101c2c07
c2038948
bf8c007f
d282000e
04380f15
c203894a
bf8c007f
d282000e
04380f17
c203894b
bf8c007f
d282000e
04380f18
c2038944
bf8c007f
061c1c07
d2060810
0201010e
d282000d
0472210d
c8380700
c8390701
c203891b
bf8c007f
d00c000c
02000f0e
7e360207
d200000e
00321d1b
d206081b
0201010e
083836f2
c203891a
bf8c007f
101c3807
d282000d
043a1b1b
c2038931
bf8c007f
101c1007
c206092d
bf8c007f
d282000e
04381907
103a1407
c2038935
bf8c007f
d282001d
04740f09
d200000e
00023b0e
d282000e
0439ed05
101c1d0c
c203893d
bf8c007f
103a2407
d282000e
043a3b0b
c8740100
c8750101
d282001d
046a3b19
103a3b16
101c1d1d
c2038939
bf8c007f
103a1e07
103a3b0c
d282001d
0475010b
d282000e
043a211d
c2038919
bf8c007f
103a3807
d282000e
04761d1b
c2038930
bf8c007f
10101007
c206092c
bf8c007f
d2820007
04201907
10101407
c2038934
bf8c007f
d2820008
04200f09
d2000007
00021107
d2820004
041ded04
1008090c
c200093c
bf8c007f
100a2200
d2820004
04120b0b
c8140000
c8150001
d2820005
046a0b19
100a0b15
10080905
c2000938
bf8c007f
100a1e00
100a0b0c
d2820005
0415010b
d2820004
04122105
c2000918
bf8c007f
100a3800
d2820004
0416091b
c8140300
c8150301
d2820000
046a0b19
100a0118
c2000922
c2008923
bf8c007f
7e000201
d2820000
04020a00
c2000920
c2008921
bf8c007f
7e020201
d2820001
04060a00
060c02ff
bc23d70a
d0020000
02010106
d2000006
0001e480
d2060006
22010106
7e0c1106
d10a0006
02010106
c2000954
c2008955
bf8c007f
7e0c0201
d2820003
04180103
c2000915
bf8c007f
10060600
c2000914
bf8c007f
10040400
c0860310
c0c80520
c080030c
c0cc0518
c202091e
c202891d
c204091c
bf8c007f
7e100204
7e0e0205
7e0c0208
be842406
8984047e
7e1202f3
7c261280
88fe047e
10121103
10100f02
f0800300
00060808
bf8c0770
060e12f1
10140b06
d2820007
040e1507
060610f1
d2820006
040a1503
f0800f00
00640606
10040305
bf8c0770
d2820002
040a0109
1006030d
d2820003
040e0108
5e040503
1006030e
d2820003
040e0107
10020304
d2820000
04060106
5e000700
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..208]
DCL TEMP[0..7], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww
  6: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz
  7: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy
  8: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx
  9: UARL ADDR[0].x, TEMP[6].xxxx
 10: MUL TEMP[6], CONST[ADDR[0].x+17], IN[1].xxxx
 11: UARL ADDR[0].x, TEMP[5].xxxx
 12: MAD TEMP[5], CONST[ADDR[0].x+17], IN[1].yyyy, TEMP[6]
 13: UARL ADDR[0].x, TEMP[4].xxxx
 14: MAD TEMP[4], CONST[ADDR[0].x+17], IN[1].zzzz, TEMP[5]
 15: UARL ADDR[0].x, TEMP[3].xxxx
 16: UARL ADDR[0].x, TEMP[3].xxxx
 17: MAD TEMP[3], CONST[ADDR[0].x+17], IN[1].wwww, TEMP[4]
 18: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 19: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 20: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 21: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 22: UARL ADDR[0].x, TEMP[7].xxxx
 23: MUL TEMP[7], CONST[ADDR[0].x+17], IN[1].xxxx
 24: UARL ADDR[0].x, TEMP[6].xxxx
 25: MAD TEMP[6], CONST[ADDR[0].x+17], IN[1].yyyy, TEMP[7]
 26: UARL ADDR[0].x, TEMP[5].xxxx
 27: MAD TEMP[5], CONST[ADDR[0].x+17], IN[1].zzzz, TEMP[6]
 28: UARL ADDR[0].x, TEMP[4].xxxx
 29: UARL ADDR[0].x, TEMP[4].xxxx
 30: MAD TEMP[4], CONST[ADDR[0].x+17], IN[1].wwww, TEMP[5]
 31: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 32: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 33: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 34: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 35: UARL ADDR[0].x, TEMP[0].xxxx
 36: MUL TEMP[0], CONST[ADDR[0].x+17], IN[1].xxxx
 37: UARL ADDR[0].x, TEMP[7].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+17], IN[1].yyyy, TEMP[0]
 39: UARL ADDR[0].x, TEMP[6].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+17], IN[1].zzzz, TEMP[0]
 41: UARL ADDR[0].x, TEMP[5].xxxx
 42: UARL ADDR[0].x, TEMP[5].xxxx
 43: MAD TEMP[0], CONST[ADDR[0].x+17], IN[1].wwww, TEMP[0]
 44: DP4 TEMP[5].x, TEMP[1], TEMP[3]
 45: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 46: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 47: DP4 TEMP[3].x, TEMP[2], TEMP[3]
 48: DP4 TEMP[4].x, TEMP[2], TEMP[4]
 49: MOV TEMP[3].y, TEMP[4].xxxx
 50: DP4 TEMP[0].x, TEMP[2], TEMP[0]
 51: MOV TEMP[3].z, TEMP[0].xxxx
 52: MUL TEMP[0], CONST[4], TEMP[5].xxxx
 53: MAD TEMP[0], CONST[5], TEMP[6].xxxx, TEMP[0]
 54: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 55: ADD TEMP[0], TEMP[0], CONST[7]
 56: MUL TEMP[2].xyz, TEMP[3].xyzz, CONST[14].wwww
 57: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 58: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 59: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 60: MUL TEMP[3], CONST[0], TEMP[5].xxxx
 61: MAD TEMP[3], CONST[1], TEMP[6].xxxx, TEMP[3]
 62: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[3]
 63: ADD TEMP[1].xyz, TEMP[1], CONST[3]
 64: MOV TEMP[3].w, IMM[0].xxxx
 65: MOV TEMP[3].xyz, TEMP[1].xyzx
 66: MOV TEMP[4].w, IMM[0].xxxx
 67: MOV TEMP[4].xyz, TEMP[1].xyzx
 68: DP4 TEMP[3].x, CONST[15], TEMP[3]
 69: DP4 TEMP[4].x, CONST[16], TEMP[4]
 70: MOV TEMP[3].y, TEMP[4].xxxx
 71: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[12].xyzz
 72: MAD TEMP[4].x, TEMP[0].zzzz, CONST[13].xxxx, CONST[13].yyyy
 73: MOV TEMP[1].w, TEMP[4].xxxx
 74: MAD TEMP[4].xy, IN[4].xyyy, CONST[9].xyyy, CONST[9].zwww
 75: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 76: RSQ TEMP[5].x, TEMP[5].xxxx
 77: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 78: MOV TEMP[4].zw, TEMP[3].yyxy
 79: MOV OUT[4], TEMP[2]
 80: MOV OUT[3], TEMP[4]
 81: MOV OUT[1], CONST[8]
 82: MOV OUT[2], TEMP[1]
 83: MOV OUT[0], TEMP[0]
 84: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %66 = call float @llvm.SI.load.const(<16 x i8> %10, i32 268)
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = extractelement <4 x float> %75, i32 2
  %79 = extractelement <4 x float> %75, i32 3
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = extractelement <4 x float> %82, i32 3
  %87 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
  %89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %5)
  %90 = extractelement <4 x float> %89, i32 0
  %91 = extractelement <4 x float> %89, i32 1
  %92 = extractelement <4 x float> %89, i32 2
  %93 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %94 = load <16 x i8> addrspace(2)* %93, !tbaa !0
  %95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %94, i32 0, i32 %5)
  %96 = extractelement <4 x float> %95, i32 0
  %97 = extractelement <4 x float> %95, i32 1
  %98 = fptosi float %83 to i32
  %99 = fptosi float %84 to i32
  %100 = fptosi float %85 to i32
  %101 = fptosi float %86 to i32
  %102 = bitcast i32 %98 to float
  %103 = bitcast i32 %99 to float
  %104 = bitcast i32 %100 to float
  %105 = bitcast i32 %101 to float
  %106 = fmul float %70, %50
  %107 = fadd float %106, %47
  %108 = fmul float %71, %51
  %109 = fadd float %108, %48
  %110 = fmul float %72, %52
  %111 = fadd float %110, %49
  %112 = bitcast float %105 to i32
  %113 = mul i32 3, %112
  %114 = bitcast i32 %113 to float
  %115 = bitcast float %104 to i32
  %116 = mul i32 3, %115
  %117 = bitcast i32 %116 to float
  %118 = bitcast float %103 to i32
  %119 = mul i32 3, %118
  %120 = bitcast i32 %119 to float
  %121 = bitcast float %102 to i32
  %122 = mul i32 3, %121
  %123 = bitcast i32 %122 to float
  %124 = bitcast float %123 to i32
  %125 = shl i32 %124, 4
  %126 = add i32 %125, 272
  %127 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %126)
  %128 = fmul float %127, %76
  %129 = shl i32 %124, 4
  %130 = add i32 %129, 276
  %131 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %130)
  %132 = fmul float %131, %76
  %133 = shl i32 %124, 4
  %134 = add i32 %133, 280
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %76
  %137 = shl i32 %124, 4
  %138 = add i32 %137, 284
  %139 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %138)
  %140 = fmul float %139, %76
  %141 = bitcast float %120 to i32
  %142 = shl i32 %141, 4
  %143 = add i32 %142, 272
  %144 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %143)
  %145 = fmul float %144, %77
  %146 = fadd float %145, %128
  %147 = shl i32 %141, 4
  %148 = add i32 %147, 276
  %149 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %148)
  %150 = fmul float %149, %77
  %151 = fadd float %150, %132
  %152 = shl i32 %141, 4
  %153 = add i32 %152, 280
  %154 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %153)
  %155 = fmul float %154, %77
  %156 = fadd float %155, %136
  %157 = shl i32 %141, 4
  %158 = add i32 %157, 284
  %159 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %158)
  %160 = fmul float %159, %77
  %161 = fadd float %160, %140
  %162 = bitcast float %117 to i32
  %163 = shl i32 %162, 4
  %164 = add i32 %163, 272
  %165 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %164)
  %166 = fmul float %165, %78
  %167 = fadd float %166, %146
  %168 = shl i32 %162, 4
  %169 = add i32 %168, 276
  %170 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %169)
  %171 = fmul float %170, %78
  %172 = fadd float %171, %151
  %173 = shl i32 %162, 4
  %174 = add i32 %173, 280
  %175 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %174)
  %176 = fmul float %175, %78
  %177 = fadd float %176, %156
  %178 = shl i32 %162, 4
  %179 = add i32 %178, 284
  %180 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %179)
  %181 = fmul float %180, %78
  %182 = fadd float %181, %161
  %183 = bitcast float %114 to i32
  %184 = shl i32 %183, 4
  %185 = add i32 %184, 272
  %186 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %185)
  %187 = fmul float %186, %79
  %188 = fadd float %187, %167
  %189 = shl i32 %183, 4
  %190 = add i32 %189, 276
  %191 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %190)
  %192 = fmul float %191, %79
  %193 = fadd float %192, %172
  %194 = shl i32 %183, 4
  %195 = add i32 %194, 280
  %196 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %195)
  %197 = fmul float %196, %79
  %198 = fadd float %197, %177
  %199 = shl i32 %183, 4
  %200 = add i32 %199, 284
  %201 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %200)
  %202 = fmul float %201, %79
  %203 = fadd float %202, %182
  %204 = bitcast float %105 to i32
  %205 = mul i32 3, %204
  %206 = add i32 %205, 1
  %207 = bitcast i32 %206 to float
  %208 = bitcast float %104 to i32
  %209 = mul i32 3, %208
  %210 = add i32 %209, 1
  %211 = bitcast i32 %210 to float
  %212 = bitcast float %103 to i32
  %213 = mul i32 3, %212
  %214 = add i32 %213, 1
  %215 = bitcast i32 %214 to float
  %216 = bitcast float %102 to i32
  %217 = mul i32 3, %216
  %218 = add i32 %217, 1
  %219 = bitcast i32 %218 to float
  %220 = bitcast float %219 to i32
  %221 = shl i32 %220, 4
  %222 = add i32 %221, 272
  %223 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %222)
  %224 = fmul float %223, %76
  %225 = shl i32 %220, 4
  %226 = add i32 %225, 276
  %227 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %226)
  %228 = fmul float %227, %76
  %229 = shl i32 %220, 4
  %230 = add i32 %229, 280
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %76
  %233 = shl i32 %220, 4
  %234 = add i32 %233, 284
  %235 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %234)
  %236 = fmul float %235, %76
  %237 = bitcast float %215 to i32
  %238 = shl i32 %237, 4
  %239 = add i32 %238, 272
  %240 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %239)
  %241 = fmul float %240, %77
  %242 = fadd float %241, %224
  %243 = shl i32 %237, 4
  %244 = add i32 %243, 276
  %245 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %244)
  %246 = fmul float %245, %77
  %247 = fadd float %246, %228
  %248 = shl i32 %237, 4
  %249 = add i32 %248, 280
  %250 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %249)
  %251 = fmul float %250, %77
  %252 = fadd float %251, %232
  %253 = shl i32 %237, 4
  %254 = add i32 %253, 284
  %255 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %254)
  %256 = fmul float %255, %77
  %257 = fadd float %256, %236
  %258 = bitcast float %211 to i32
  %259 = shl i32 %258, 4
  %260 = add i32 %259, 272
  %261 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %260)
  %262 = fmul float %261, %78
  %263 = fadd float %262, %242
  %264 = shl i32 %258, 4
  %265 = add i32 %264, 276
  %266 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %265)
  %267 = fmul float %266, %78
  %268 = fadd float %267, %247
  %269 = shl i32 %258, 4
  %270 = add i32 %269, 280
  %271 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %270)
  %272 = fmul float %271, %78
  %273 = fadd float %272, %252
  %274 = shl i32 %258, 4
  %275 = add i32 %274, 284
  %276 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %275)
  %277 = fmul float %276, %78
  %278 = fadd float %277, %257
  %279 = bitcast float %207 to i32
  %280 = shl i32 %279, 4
  %281 = add i32 %280, 272
  %282 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %281)
  %283 = fmul float %282, %79
  %284 = fadd float %283, %263
  %285 = shl i32 %279, 4
  %286 = add i32 %285, 276
  %287 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %286)
  %288 = fmul float %287, %79
  %289 = fadd float %288, %268
  %290 = shl i32 %279, 4
  %291 = add i32 %290, 280
  %292 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %291)
  %293 = fmul float %292, %79
  %294 = fadd float %293, %273
  %295 = shl i32 %279, 4
  %296 = add i32 %295, 284
  %297 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %296)
  %298 = fmul float %297, %79
  %299 = fadd float %298, %278
  %300 = bitcast float %105 to i32
  %301 = mul i32 3, %300
  %302 = add i32 %301, 2
  %303 = bitcast i32 %302 to float
  %304 = bitcast float %104 to i32
  %305 = mul i32 3, %304
  %306 = add i32 %305, 2
  %307 = bitcast i32 %306 to float
  %308 = bitcast float %103 to i32
  %309 = mul i32 3, %308
  %310 = add i32 %309, 2
  %311 = bitcast i32 %310 to float
  %312 = bitcast float %102 to i32
  %313 = mul i32 3, %312
  %314 = add i32 %313, 2
  %315 = bitcast i32 %314 to float
  %316 = bitcast float %315 to i32
  %317 = shl i32 %316, 4
  %318 = add i32 %317, 272
  %319 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %318)
  %320 = fmul float %319, %76
  %321 = shl i32 %316, 4
  %322 = add i32 %321, 276
  %323 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %322)
  %324 = fmul float %323, %76
  %325 = shl i32 %316, 4
  %326 = add i32 %325, 280
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %76
  %329 = shl i32 %316, 4
  %330 = add i32 %329, 284
  %331 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %330)
  %332 = fmul float %331, %76
  %333 = bitcast float %311 to i32
  %334 = shl i32 %333, 4
  %335 = add i32 %334, 272
  %336 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %335)
  %337 = fmul float %336, %77
  %338 = fadd float %337, %320
  %339 = shl i32 %333, 4
  %340 = add i32 %339, 276
  %341 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %340)
  %342 = fmul float %341, %77
  %343 = fadd float %342, %324
  %344 = shl i32 %333, 4
  %345 = add i32 %344, 280
  %346 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %345)
  %347 = fmul float %346, %77
  %348 = fadd float %347, %328
  %349 = shl i32 %333, 4
  %350 = add i32 %349, 284
  %351 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %350)
  %352 = fmul float %351, %77
  %353 = fadd float %352, %332
  %354 = bitcast float %307 to i32
  %355 = shl i32 %354, 4
  %356 = add i32 %355, 272
  %357 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %356)
  %358 = fmul float %357, %78
  %359 = fadd float %358, %338
  %360 = shl i32 %354, 4
  %361 = add i32 %360, 276
  %362 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %361)
  %363 = fmul float %362, %78
  %364 = fadd float %363, %343
  %365 = shl i32 %354, 4
  %366 = add i32 %365, 280
  %367 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %366)
  %368 = fmul float %367, %78
  %369 = fadd float %368, %348
  %370 = shl i32 %354, 4
  %371 = add i32 %370, 284
  %372 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %371)
  %373 = fmul float %372, %78
  %374 = fadd float %373, %353
  %375 = bitcast float %303 to i32
  %376 = shl i32 %375, 4
  %377 = add i32 %376, 272
  %378 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %377)
  %379 = fmul float %378, %79
  %380 = fadd float %379, %359
  %381 = shl i32 %375, 4
  %382 = add i32 %381, 276
  %383 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %382)
  %384 = fmul float %383, %79
  %385 = fadd float %384, %364
  %386 = shl i32 %375, 4
  %387 = add i32 %386, 280
  %388 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %387)
  %389 = fmul float %388, %79
  %390 = fadd float %389, %369
  %391 = shl i32 %375, 4
  %392 = add i32 %391, 284
  %393 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %392)
  %394 = fmul float %393, %79
  %395 = fadd float %394, %374
  %396 = fmul float %107, %188
  %397 = fmul float %109, %193
  %398 = fadd float %396, %397
  %399 = fmul float %111, %198
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %203
  %402 = fadd float %400, %401
  %403 = fmul float %107, %284
  %404 = fmul float %109, %289
  %405 = fadd float %403, %404
  %406 = fmul float %111, %294
  %407 = fadd float %405, %406
  %408 = fmul float 1.000000e+00, %299
  %409 = fadd float %407, %408
  %410 = fmul float %107, %380
  %411 = fmul float %109, %385
  %412 = fadd float %410, %411
  %413 = fmul float %111, %390
  %414 = fadd float %412, %413
  %415 = fmul float 1.000000e+00, %395
  %416 = fadd float %414, %415
  %417 = fmul float %90, %188
  %418 = fmul float %91, %193
  %419 = fadd float %417, %418
  %420 = fmul float %92, %198
  %421 = fadd float %419, %420
  %422 = fmul float 1.000000e+00, %203
  %423 = fadd float %421, %422
  %424 = fmul float %90, %284
  %425 = fmul float %91, %289
  %426 = fadd float %424, %425
  %427 = fmul float %92, %294
  %428 = fadd float %426, %427
  %429 = fmul float 1.000000e+00, %299
  %430 = fadd float %428, %429
  %431 = fmul float %90, %380
  %432 = fmul float %91, %385
  %433 = fadd float %431, %432
  %434 = fmul float %92, %390
  %435 = fadd float %433, %434
  %436 = fmul float 1.000000e+00, %395
  %437 = fadd float %435, %436
  %438 = fmul float %23, %402
  %439 = fmul float %24, %402
  %440 = fmul float %25, %402
  %441 = fmul float %26, %402
  %442 = fmul float %27, %409
  %443 = fadd float %442, %438
  %444 = fmul float %28, %409
  %445 = fadd float %444, %439
  %446 = fmul float %29, %409
  %447 = fadd float %446, %440
  %448 = fmul float %30, %409
  %449 = fadd float %448, %441
  %450 = fmul float %31, %416
  %451 = fadd float %450, %443
  %452 = fmul float %32, %416
  %453 = fadd float %452, %445
  %454 = fmul float %33, %416
  %455 = fadd float %454, %447
  %456 = fmul float %34, %416
  %457 = fadd float %456, %449
  %458 = fadd float %451, %35
  %459 = fadd float %453, %36
  %460 = fadd float %455, %37
  %461 = fadd float %457, %38
  %462 = fmul float %423, %58
  %463 = fmul float %430, %58
  %464 = fmul float %437, %58
  %465 = fmul float %11, %462
  %466 = fmul float %12, %462
  %467 = fmul float %13, %462
  %468 = fmul float %14, %463
  %469 = fadd float %468, %465
  %470 = fmul float %15, %463
  %471 = fadd float %470, %466
  %472 = fmul float %16, %463
  %473 = fadd float %472, %467
  %474 = fmul float %17, %464
  %475 = fadd float %474, %469
  %476 = fmul float %18, %464
  %477 = fadd float %476, %471
  %478 = fmul float %19, %464
  %479 = fadd float %478, %473
  %480 = fmul float %11, %402
  %481 = fmul float %12, %402
  %482 = fmul float %13, %402
  %483 = fmul float %14, %409
  %484 = fadd float %483, %480
  %485 = fmul float %15, %409
  %486 = fadd float %485, %481
  %487 = fmul float %16, %409
  %488 = fadd float %487, %482
  %489 = fmul float %17, %416
  %490 = fadd float %489, %484
  %491 = fmul float %18, %416
  %492 = fadd float %491, %486
  %493 = fmul float %19, %416
  %494 = fadd float %493, %488
  %495 = fadd float %490, %20
  %496 = fadd float %492, %21
  %497 = fadd float %494, %22
  %498 = fmul float %59, %495
  %499 = fmul float %60, %496
  %500 = fadd float %498, %499
  %501 = fmul float %61, %497
  %502 = fadd float %500, %501
  %503 = fmul float %62, 1.000000e+00
  %504 = fadd float %502, %503
  %505 = fmul float %63, %495
  %506 = fmul float %64, %496
  %507 = fadd float %505, %506
  %508 = fmul float %65, %497
  %509 = fadd float %507, %508
  %510 = fmul float %66, 1.000000e+00
  %511 = fadd float %509, %510
  %512 = fsub float -0.000000e+00, %53
  %513 = fadd float %495, %512
  %514 = fsub float -0.000000e+00, %54
  %515 = fadd float %496, %514
  %516 = fsub float -0.000000e+00, %55
  %517 = fadd float %497, %516
  %518 = fmul float %460, %56
  %519 = fadd float %518, %57
  %520 = fmul float %96, %43
  %521 = fadd float %520, %45
  %522 = fmul float %97, %44
  %523 = fadd float %522, %46
  %524 = fmul float %475, %475
  %525 = fmul float %477, %477
  %526 = fadd float %525, %524
  %527 = fmul float %479, %479
  %528 = fadd float %526, %527
  %529 = call float @llvm.AMDGPU.rsq(float %528)
  %530 = fmul float %475, %529
  %531 = fmul float %477, %529
  %532 = fmul float %479, %529
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %513, float %515, float %517, float %519)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %521, float %523, float %504, float %511)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %530, float %531, float %532, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %458, float %459, float %460, float %461)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840708
bf8c000f
e00c2000
80021200
bf8c0770
7e021112
d2d6000e
02010701
34041c84
4a0204ff
00000110
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020500
bf8c0770
10020b01
7e061113
d2d6000f
02010703
34121e84
4a0612ff
00000110
e0301000
80000303
bf8c0770
d2820001
04060d03
7e061114
d2d60011
02010703
34142284
4a0614ff
00000110
e0301000
80000303
bf8c0770
d2820001
04060f03
7e061115
d2d60012
02010703
34162484
4a0616ff
00000110
e0301000
80000303
bf8c0770
d2820001
04061103
4a0604ff
00000114
e0301000
80000303
bf8c0770
10060b03
4a0812ff
00000114
e0301000
80000404
bf8c0770
d2820003
040e0d04
4a0814ff
00000114
e0301000
80000404
bf8c0770
d2820003
040e0f04
4a0816ff
00000114
e0301000
80000404
bf8c0770
d2820003
040e1104
c0840700
bf8c007f
e00c2000
80021500
c202012d
c2028129
bf8c0070
7e080205
d2820013
04100916
10080713
c202012c
c2028128
bf8c007f
7e180205
d2820014
04300915
d282000c
04120314
4a0804ff
00000118
e0301000
80000404
bf8c0770
10080b04
4a1a12ff
00000118
e0301000
80000d0d
bf8c0770
d2820004
04120d0d
4a1a14ff
00000118
e0301000
80000d0d
bf8c0770
d2820004
04120f0d
4a1a16ff
00000118
e0301000
80000d0d
bf8c0770
d2820004
0412110d
c202012e
c202812a
bf8c007f
7e1a0205
d2820015
04340917
d282000c
04320915
4a0404ff
0000011c
e0301000
80000202
bf8c0770
10040b02
4a1212ff
0000011c
e0301000
80000909
bf8c0770
d2820002
040a0d09
4a1214ff
0000011c
e0301000
80000909
bf8c0770
d2820002
040a0f09
4a1216ff
0000011c
e0301000
80000909
bf8c0770
d2820009
040a1109
0604130c
c2020112
bf8c007f
102c0404
4a141c81
34141484
4a1614ff
00000110
e0301000
80000b0b
bf8c0770
10160b0b
4a181e81
34201884
4a1820ff
00000110
e0301000
80000c0c
bf8c0770
d282000b
042e0d0c
4a182281
342e1884
4a182eff
00000110
e0301000
80000c0c
bf8c0770
d282000b
042e0f0c
4a182481
34301884
4a1830ff
00000110
e0301000
80000c0c
bf8c0770
d282000b
042e110c
4a1814ff
00000114
e0301000
80000c0c
bf8c0770
10180b0c
4a1a20ff
00000114
e0301000
80000d0d
bf8c0770
d282000c
04320d0d
4a1a2eff
00000114
e0301000
80000d0d
bf8c0770
d282000c
04320f0d
4a1a30ff
00000114
e0301000
80000d0d
bf8c0770
d282000c
0432110d
101a1913
d2820019
04361714
4a1a14ff
00000118
e0301000
80000d0d
bf8c0770
101a0b0d
4a3420ff
00000118
e0301000
80001a1a
bf8c0770
d282000d
04360d1a
4a342eff
00000118
e0301000
80001a1a
bf8c0770
d282000d
04360f1a
4a3430ff
00000118
e0301000
80001a1a
bf8c0770
d282000d
0436111a
d2820019
04661b15
4a1414ff
0000011c
e0301000
80000a0a
bf8c0770
10140b0a
4a2020ff
0000011c
e0301000
80001010
bf8c0770
d282000a
042a0d10
4a202eff
0000011c
e0301000
80001010
bf8c0770
d282000a
042a0f10
4a2030ff
0000011c
e0301000
80001010
bf8c0770
d2820010
042a1110
06142119
c2020116
bf8c007f
d2820016
045a1404
4a1c1c82
342e1c84
4a1c2eff
00000110
e0301000
80000e0e
bf8c0770
101c0b0e
4a1e1e82
34301e84
4a1e30ff
00000110
e0301000
80000f0f
bf8c0770
d282000e
043a0d0f
4a1e2282
34321e84
4a1e32ff
00000110
e0301000
80000f0f
bf8c0770
d282000e
043a0f0f
4a1e2482
34241e84
4a1e24ff
00000110
e0301000
80000f0f
bf8c0770
d282000e
043a110f
4a1e2eff
00000114
e0301000
80000f0f
bf8c0770
101e0b0f
4a2230ff
00000114
e0301000
80001111
bf8c0770
d282000f
043e0d11
4a2232ff
00000114
e0301000
80001111
bf8c0770
d282000f
043e0f11
4a2224ff
00000114
e0301000
80001111
bf8c0770
d282000f
043e1111
10221f13
d2820013
04461d14
4a222eff
00000118
e0301000
80001111
bf8c0770
10220b11
4a2830ff
00000118
e0301000
80001414
bf8c0770
d2820011
04460d14
4a2832ff
00000118
e0301000
80001414
bf8c0770
d2820011
04460f14
4a2824ff
00000118
e0301000
80001414
bf8c0770
d2820011
04461114
d2820013
044e2315
4a282eff
0000011c
e0301000
80001414
bf8c0770
10280b14
4a2a30ff
0000011c
e0301000
80001515
bf8c0770
d2820014
04520d15
4a2a32ff
0000011c
e0301000
80001515
bf8c0770
d2820014
04520f15
4a2424ff
0000011c
e0301000
80001212
bf8c0770
d2820007
04521112
060a0f13
c202011a
bf8c007f
d2820006
045a0a04
c202011e
bf8c007f
060c0c04
c2020134
c2028135
bf8c007f
7e100205
d2820012
04200906
c2020102
bf8c007f
10100404
c2028106
bf8c007f
d2820008
04221405
c204010a
bf8c007f
d2820008
04220a08
c204810e
bf8c007f
06101009
c2048132
bf8c007f
0a281009
c2070101
bf8c007f
1026040e
c2050105
bf8c007f
d2820013
044e140a
c2048109
bf8c007f
d2820013
044e0a09
c205810d
bf8c007f
0626260b
c2058131
bf8c007f
0a2c260b
c2058100
bf8c007f
102a040b
c2060104
bf8c007f
d2820015
0456140c
c2068108
bf8c007f
d2820015
04560a0d
c207810c
bf8c007f
062a2a0f
c2078130
bf8c007f
0a2e2a0f
f800021f
12141617
c2078141
bf8c000f
1024260f
c2078140
bf8c007f
d2820012
044a2a0f
c2078142
bf8c007f
d2820012
044a100f
c2078143
bf8c007f
0624240f
c207813d
bf8c007f
1026260f
c207813c
bf8c007f
d2820013
044e2a0f
c207813e
bf8c007f
d2820008
044e100f
c207813f
bf8c007f
0610100f
c0880710
bf8c007f
e00c2000
80041300
c2078125
c2080127
bf8c0070
7e2e0210
d2820017
045c1f14
c2078124
c2080126
bf8c007f
7e300210
d2820013
04601f13
f800022f
12081713
c088070c
bf8c000f
e00c2000
80041200
bf8c0770
10000713
d2820000
04020312
d2820000
04020914
06001300
c203013b
bf8c007f
10000006
1006000e
10021913
d2820001
04061712
d2820001
04061b14
06022101
10020206
d2820003
040e020a
10081f13
d2820004
04121d12
d2820004
04122314
06080f04
100e0806
d2820003
040e0e09
1008000b
d2820004
0412020c
d2820004
04120e0d
10100904
d2820008
04220703
10000004
d2820000
04020205
d2820000
04020e08
d2820001
04220100
7e025b01
10000300
10060303
10020304
7e0802f2
f800023f
04000301
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020a04
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10020404
c2020115
bf8c007f
d2820001
04061404
c2020119
bf8c007f
d2820001
04060a04
c202011d
bf8c007f
06020204
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820002
040a1404
c2020118
bf8c007f
d2820002
040a0a04
c200011c
bf8c007f
06040400
f80008cf
00060102
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL CONST[11..12]
DCL CONST[4..10]
DCL TEMP[0..1]
DCL TEMP[2..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[4].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].xy, IN[4].xyyy
 13: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 14: MAD TEMP[5].yw, IMM[0].zzzz, TEMP[5], IMM[0].xxxx
 15: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[2].xyzz
 16: RSQ TEMP[6].x, TEMP[6].xxxx
 17: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 18: DP2 TEMP[6].x, TEMP[5].ywww, TEMP[5].ywww
 19: ADD TEMP[6].x, IMM[0].yyyy, -TEMP[6].xxxx
 20: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
 21: RSQ TEMP[7].x, TEMP[6].xxxx
 22: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[6].xxxx
 23: CMP TEMP[7].x, -TEMP[6].xxxx, TEMP[7].xxxx, IMM[0].wwww
 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 25: DP3 TEMP[6].x, IN[5].yzww, IN[5].yzww
 26: RSQ TEMP[6].x, TEMP[6].xxxx
 27: MUL TEMP[6].xyz, IN[5].yzww, TEMP[6].xxxx
 28: DP3 TEMP[7].x, IN[6].xyzz, IN[6].xyzz
 29: RSQ TEMP[7].x, TEMP[7].xxxx
 30: MUL TEMP[7].xyz, IN[6].xyzz, TEMP[7].xxxx
 31: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].wwww
 32: MAD TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].yyyy, TEMP[7].xyzz
 33: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[5].xyzz
 34: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 35: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 36: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 37: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 38: MOV TEMP[2].xyz, TEMP[2].xyzz
 39: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 40: DP4 TEMP[3].x, TEMP[4], CONST[9]
 41: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[7].yyyy
 42: LRP TEMP[3], TEMP[3].xxxx, IN[2], TEMP[4]
 43: MOV TEMP[5].w, TEMP[3].wwww
 44: MUL TEMP[6].xy, TEMP[0].xyyy, CONST[4].xyyy
 45: MOV TEMP[6].xy, TEMP[6].xyyy
 46: TEX TEMP[6], TEMP[6], SAMP[3], 2D
 47: DP4 TEMP[7].x, TEMP[4], CONST[8]
 48: ADD_SAT TEMP[7].x, TEMP[7].xxxx, CONST[7].xxxx
 49: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[6].wwww
 50: DP3 TEMP[8].x, TEMP[6].xyzz, IMM[1].xyzz
 51: MAX TEMP[8].x, TEMP[8].xxxx, IMM[1].wwww
 52: RCP TEMP[8].x, TEMP[8].xxxx
 53: MUL TEMP[8].xyz, TEMP[6].xyzz, TEMP[8].xxxx
 54: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xyzz
 55: MAD TEMP[3].xyz, TEMP[7].xxxx, TEMP[8].xyzz, TEMP[3].xyzz
 56: MUL TEMP[5].xyz, TEMP[3].xyzz, IMM[2].xxxx
 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 58: DP4 TEMP[3].x, TEMP[4], CONST[10]
 59: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[7].zzzz
 60: MAD TEMP[5].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[5].xyzz
 61: MAX TEMP[2].x, IN[3].wwww, CONST[5].wwww
 62: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 63: LRP TEMP[5].xyz, TEMP[2].xxxx, TEMP[5].xyzz, CONST[5].xyzz
 64: MOV OUT[0], TEMP[5]
 65: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %58 = load <32 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = fcmp ugt float %16, 0.000000e+00
  %62 = select i1 %61, float 1.000000e+00, float 0.000000e+00
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %66 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %67 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %68 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %82 = fmul float %13, %43
  %83 = fadd float %82, %44
  %84 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
  %85 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %87 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %88 = bitcast float %84 to i32
  %89 = icmp ne i32 %88, 0
  %. = select i1 %89, float -1.000000e+00, float 1.000000e+00
  %90 = bitcast float %71 to i32
  %91 = bitcast float %72 to i32
  %92 = insertelement <2 x i32> undef, i32 %90, i32 0
  %93 = insertelement <2 x i32> %92, i32 %91, i32 1
  %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %46, <16 x i8> %48, i32 2)
  %95 = extractelement <4 x float> %94, i32 0
  %96 = extractelement <4 x float> %94, i32 1
  %97 = extractelement <4 x float> %94, i32 2
  %98 = extractelement <4 x float> %94, i32 3
  %99 = bitcast float %71 to i32
  %100 = bitcast float %72 to i32
  %101 = insertelement <2 x i32> undef, i32 %99, i32 0
  %102 = insertelement <2 x i32> %101, i32 %100, i32 1
  %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %50, <16 x i8> %52, i32 2)
  %104 = extractelement <4 x float> %103, i32 1
  %105 = extractelement <4 x float> %103, i32 3
  %106 = fmul float 2.000000e+00, %104
  %107 = fadd float %106, -1.000000e+00
  %108 = fmul float 2.000000e+00, %105
  %109 = fadd float %108, -1.000000e+00
  %110 = fmul float %73, %73
  %111 = fmul float %74, %74
  %112 = fadd float %111, %110
  %113 = fmul float %75, %75
  %114 = fadd float %112, %113
  %115 = call float @llvm.AMDGPU.rsq(float %114)
  %116 = fmul float %73, %115
  %117 = fmul float %74, %115
  %118 = fmul float %75, %115
  %119 = fmul float %107, %107
  %120 = fmul float %109, %109
  %121 = fadd float %119, %120
  %122 = fsub float -0.000000e+00, %121
  %123 = fadd float 1.000000e+00, %122
  %124 = fcmp uge float 0.000000e+00, %123
  %125 = select i1 %124, float 0.000000e+00, float %123
  %126 = call float @llvm.AMDGPU.rsq(float %125)
  %127 = fmul float %126, %125
  %128 = fsub float -0.000000e+00, %125
  %129 = call float @llvm.AMDGPU.cndlt(float %128, float %127, float 0.000000e+00)
  %130 = fmul float %116, %129
  %131 = fmul float %117, %129
  %132 = fmul float %118, %129
  %133 = fmul float %76, %76
  %134 = fmul float %77, %77
  %135 = fadd float %134, %133
  %136 = fmul float %78, %78
  %137 = fadd float %135, %136
  %138 = call float @llvm.AMDGPU.rsq(float %137)
  %139 = fmul float %76, %138
  %140 = fmul float %77, %138
  %141 = fmul float %78, %138
  %142 = fmul float %79, %79
  %143 = fmul float %80, %80
  %144 = fadd float %143, %142
  %145 = fmul float %81, %81
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %79, %147
  %149 = fmul float %80, %147
  %150 = fmul float %81, %147
  %151 = fmul float %148, %109
  %152 = fmul float %149, %109
  %153 = fmul float %150, %109
  %154 = fmul float %139, %107
  %155 = fadd float %154, %151
  %156 = fmul float %140, %107
  %157 = fadd float %156, %152
  %158 = fmul float %141, %107
  %159 = fadd float %158, %153
  %160 = fmul float %130, %.
  %161 = fadd float %160, %155
  %162 = fmul float %131, %.
  %163 = fadd float %162, %157
  %164 = fmul float %132, %.
  %165 = fadd float %164, %159
  %166 = fmul float %161, %67
  %167 = fmul float %163, %68
  %168 = fadd float %167, %166
  %169 = fmul float %165, %69
  %170 = fadd float %168, %169
  %171 = fmul float %170, %161
  %172 = fmul float %170, %163
  %173 = fmul float %170, %165
  %174 = fmul float 2.000000e+00, %171
  %175 = fmul float 2.000000e+00, %172
  %176 = fmul float 2.000000e+00, %173
  %177 = fsub float -0.000000e+00, %174
  %178 = fadd float %67, %177
  %179 = fsub float -0.000000e+00, %175
  %180 = fadd float %68, %179
  %181 = fsub float -0.000000e+00, %176
  %182 = fadd float %69, %181
  %183 = insertelement <4 x float> undef, float %178, i32 0
  %184 = insertelement <4 x float> %183, float %180, i32 1
  %185 = insertelement <4 x float> %184, float %182, i32 2
  %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3
  %187 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %186)
  %188 = extractelement <4 x float> %187, i32 0
  %189 = extractelement <4 x float> %187, i32 1
  %190 = extractelement <4 x float> %187, i32 2
  %191 = extractelement <4 x float> %187, i32 3
  %192 = call float @fabs(float %190)
  %193 = fdiv float 1.000000e+00, %192
  %194 = fmul float %188, %193
  %195 = fadd float %194, 1.500000e+00
  %196 = fmul float %189, %193
  %197 = fadd float %196, 1.500000e+00
  %198 = bitcast float %197 to i32
  %199 = bitcast float %195 to i32
  %200 = bitcast float %191 to i32
  %201 = insertelement <4 x i32> undef, i32 %198, i32 0
  %202 = insertelement <4 x i32> %201, i32 %199, i32 1
  %203 = insertelement <4 x i32> %202, i32 %200, i32 2
  %204 = insertelement <4 x i32> %203, i32 undef, i32 3
  %205 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %204, <32 x i8> %54, <16 x i8> %56, i32 4)
  %206 = extractelement <4 x float> %205, i32 0
  %207 = extractelement <4 x float> %205, i32 1
  %208 = extractelement <4 x float> %205, i32 2
  %209 = extractelement <4 x float> %205, i32 3
  %210 = fmul float %95, %35
  %211 = fmul float %96, %36
  %212 = fadd float %210, %211
  %213 = fmul float %97, %37
  %214 = fadd float %212, %213
  %215 = fmul float %98, %38
  %216 = fadd float %214, %215
  %217 = fadd float %216, %29
  %218 = call float @llvm.AMDIL.clamp.(float %217, float 0.000000e+00, float 1.000000e+00)
  %219 = call float @llvm.AMDGPU.lrp(float %218, float %63, float %95)
  %220 = call float @llvm.AMDGPU.lrp(float %218, float %64, float %96)
  %221 = call float @llvm.AMDGPU.lrp(float %218, float %65, float %97)
  %222 = call float @llvm.AMDGPU.lrp(float %218, float %66, float %98)
  %223 = fmul float %12, %22
  %224 = fmul float %83, %23
  %225 = bitcast float %223 to i32
  %226 = bitcast float %224 to i32
  %227 = insertelement <2 x i32> undef, i32 %225, i32 0
  %228 = insertelement <2 x i32> %227, i32 %226, i32 1
  %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %58, <16 x i8> %60, i32 2)
  %230 = extractelement <4 x float> %229, i32 0
  %231 = extractelement <4 x float> %229, i32 1
  %232 = extractelement <4 x float> %229, i32 2
  %233 = extractelement <4 x float> %229, i32 3
  %234 = fmul float %95, %31
  %235 = fmul float %96, %32
  %236 = fadd float %234, %235
  %237 = fmul float %97, %33
  %238 = fadd float %236, %237
  %239 = fmul float %98, %34
  %240 = fadd float %238, %239
  %241 = fadd float %240, %28
  %242 = call float @llvm.AMDIL.clamp.(float %241, float 0.000000e+00, float 1.000000e+00)
  %243 = fmul float %242, %233
  %244 = fmul float %230, 0x3FCB367A00000000
  %245 = fmul float %231, 0x3FE6E2EB20000000
  %246 = fadd float %245, %244
  %247 = fmul float %232, 0x3FB27BB300000000
  %248 = fadd float %246, %247
  %249 = fcmp uge float %248, 0x3F50624DE0000000
  %250 = select i1 %249, float %248, float 0x3F50624DE0000000
  %251 = fdiv float 1.000000e+00, %250
  %252 = fmul float %230, %251
  %253 = fmul float %231, %251
  %254 = fmul float %232, %251
  %255 = fmul float %219, %230
  %256 = fmul float %220, %231
  %257 = fmul float %221, %232
  %258 = fmul float %243, %252
  %259 = fadd float %258, %255
  %260 = fmul float %243, %253
  %261 = fadd float %260, %256
  %262 = fmul float %243, %254
  %263 = fadd float %262, %257
  %264 = fmul float %259, 4.000000e+00
  %265 = fmul float %261, 4.000000e+00
  %266 = fmul float %263, 4.000000e+00
  %267 = fmul float %206, %209
  %268 = fmul float %207, %209
  %269 = fmul float %208, %209
  %270 = fmul float %95, %39
  %271 = fmul float %96, %40
  %272 = fadd float %270, %271
  %273 = fmul float %97, %41
  %274 = fadd float %272, %273
  %275 = fmul float %98, %42
  %276 = fadd float %274, %275
  %277 = fadd float %276, %30
  %278 = call float @llvm.AMDIL.clamp.(float %277, float 0.000000e+00, float 1.000000e+00)
  %279 = fmul float %267, %278
  %280 = fadd float %279, %264
  %281 = fmul float %268, %278
  %282 = fadd float %281, %265
  %283 = fmul float %269, %278
  %284 = fadd float %283, %266
  %285 = fcmp uge float %70, %27
  %286 = select i1 %285, float %70, float %27
  %287 = call float @llvm.AMDIL.clamp.(float %286, float 0.000000e+00, float 1.000000e+00)
  %288 = call float @llvm.AMDGPU.lrp(float %287, float %280, float %24)
  %289 = call float @llvm.AMDGPU.lrp(float %287, float %282, float %25)
  %290 = call float @llvm.AMDGPU.lrp(float %287, float %284, float %26)
  %291 = call i32 @llvm.SI.packf16(float %288, float %289)
  %292 = bitcast i32 %291 to float
  %293 = call i32 @llvm.SI.packf16(float %290, float %222)
  %294 = bitcast i32 %293 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %292, float %294, float %292, float %294)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c8140800
c8150801
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340b00
c8350b01
c8300a00
c8310a01
1014190c
d282000b
042a1b0d
c8280c00
c8290c01
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441100
c8451101
c8401000
c8411001
101a2110
d282000e
04362311
c8341200
c8351201
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8540e00
c8550e01
c8500d00
c8510d01
10222914
d2820012
04462b15
c8440f00
c8450f01
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0400
c83d0401
10281f0c
c8400500
c8410501
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200600
c8210601
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430808
bf8c0770
10241709
c0840300
c0c60500
bf8c007f
f0800f00
00430405
c0840100
bf8c0070
c2000925
bf8c007f
10180a00
c2000924
bf8c007f
d282000c
04300104
c2000926
bf8c007f
d282000c
04300106
c2000927
bf8c007f
d282000c
04300107
c200091d
bf8c007f
06181800
d206080c
0201010c
081a18f2
101c0b0d
c83c0100
c83d0101
d2820013
043a1f0c
c2000930
c2008931
bf8c007f
7e1c0201
d2820003
04380103
c2000911
bf8c007f
101e0600
c2000910
bf8c007f
101c0400
c080030c
c0c60518
bf8c007f
f0800f00
00030e0e
bf8c0770
10261f13
10041cff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070f
7e0602ff
3d93dd98
d2820002
040a0710
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
1028050f
c2000921
bf8c007f
10060a00
c2000920
bf8c007f
d2820003
040c0104
c2000922
bf8c007f
d2820003
040c0106
c2000923
bf8c007f
d2820003
040c0107
c200091c
bf8c007f
06060600
d2060803
02010103
10062303
d2820013
044e2903
102826f6
c2000929
bf8c007f
10260a00
c2000928
bf8c007f
d2820013
044c0104
c200092a
bf8c007f
d2820013
044c0106
c200092b
bf8c007f
d2820013
044c0107
c200091e
bf8c007f
06262600
d2060813
02010113
d2820015
04522712
c8480700
c8490701
c2000917
bf8c007f
d00c0002
02000112
7e280200
d2000012
000a2514
d2060812
02010112
082824f2
c2000915
bf8c007f
102c2800
d2820015
045a2b12
102c1708
102e090d
c8600000
c8610001
d2820017
045e310c
102e1d17
1030050e
d2820017
045e3103
102e2ef6
d2820016
045e2716
c2000914
bf8c007f
102e2800
d2820016
045e2d12
5e2a2b16
1010170a
10120d0d
c8280200
c8290201
d2820009
0426150c
10122109
10040510
d2820002
04260503
100404f6
d2820002
040a2708
c2000916
bf8c007f
10062800
d2820002
040e0512
10060f0d
c8100300
c8110301
d2820000
040e090c
5e000102
f8001c0f
00150015
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL IN[7]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL CONST[0..10]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[8].xyzz, CONST[7].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MOV TEMP[4].w, IMM[0].yyyy
 10: MOV TEMP[4].xyz, IN[7].xyzx
 11: MUL TEMP[5], CONST[0], TEMP[1].xxxx
 12: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[5]
 13: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 14: ADD TEMP[0], TEMP[0], CONST[3]
 15: MOV TEMP[2].w, IMM[0].xxxx
 16: MOV TEMP[2].xyz, CONST[4].xyzx
 17: DP4 TEMP[5].x, TEMP[3], IN[0]
 18: DP4 TEMP[6].x, TEMP[3], IN[1]
 19: MOV TEMP[5].y, TEMP[6].xxxx
 20: DP4 TEMP[3].x, TEMP[3], IN[2]
 21: MOV TEMP[5].z, TEMP[3].xxxx
 22: MUL TEMP[3].xyz, TEMP[5].xyzz, CONST[6].wwww
 23: DP4 TEMP[5].x, TEMP[4], IN[0]
 24: DP4 TEMP[6].x, TEMP[4], IN[1]
 25: MOV TEMP[5].y, TEMP[6].xxxx
 26: DP4 TEMP[4].x, TEMP[4], IN[2]
 27: MOV TEMP[5].z, TEMP[4].xxxx
 28: MUL TEMP[4].xyz, TEMP[5].xyzz, CONST[6].wwww
 29: MUL TEMP[2], TEMP[2], IN[3]
 30: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[10].xyzz
 31: MAD TEMP[5].x, TEMP[0].zzzz, CONST[9].xxxx, CONST[9].yyyy
 32: MOV TEMP[1].w, TEMP[5].xxxx
 33: MAD TEMP[5].xy, IN[6].xyyy, CONST[5].xyyy, CONST[5].zwww
 34: MOV TEMP[5].zw, TEMP[3].yyxy
 35: MOV TEMP[6].x, TEMP[3].zzzz
 36: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 37: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 38: MOV TEMP[6].yzw, TEMP[3].yxyz
 39: MOV TEMP[3].xyz, TEMP[4].xyzx
 40: MOV OUT[1], TEMP[2]
 41: MOV OUT[3], TEMP[5]
 42: MOV OUT[5], TEMP[3]
 43: MOV OUT[4], TEMP[6]
 44: MOV OUT[2], TEMP[1]
 45: MOV OUT[0], TEMP[0]
 46: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %5)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = getelementptr <16 x i8> addrspace(2)* %3, i32 7
  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
  %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %5)
  %94 = extractelement <4 x float> %93, i32 0
  %95 = extractelement <4 x float> %93, i32 1
  %96 = extractelement <4 x float> %93, i32 2
  %97 = fmul float %77, %38
  %98 = fadd float %97, %35
  %99 = fmul float %78, %39
  %100 = fadd float %99, %36
  %101 = fmul float %79, %40
  %102 = fadd float %101, %37
  %103 = fmul float %98, %49
  %104 = fmul float %100, %50
  %105 = fadd float %103, %104
  %106 = fmul float %102, %51
  %107 = fadd float %105, %106
  %108 = fmul float 1.000000e+00, %52
  %109 = fadd float %107, %108
  %110 = fmul float %98, %56
  %111 = fmul float %100, %57
  %112 = fadd float %110, %111
  %113 = fmul float %102, %58
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %59
  %116 = fadd float %114, %115
  %117 = fmul float %98, %63
  %118 = fmul float %100, %64
  %119 = fadd float %117, %118
  %120 = fmul float %102, %65
  %121 = fadd float %119, %120
  %122 = fmul float 1.000000e+00, %66
  %123 = fadd float %121, %122
  %124 = fmul float %11, %109
  %125 = fmul float %12, %109
  %126 = fmul float %13, %109
  %127 = fmul float %14, %109
  %128 = fmul float %15, %116
  %129 = fadd float %128, %124
  %130 = fmul float %16, %116
  %131 = fadd float %130, %125
  %132 = fmul float %17, %116
  %133 = fadd float %132, %126
  %134 = fmul float %18, %116
  %135 = fadd float %134, %127
  %136 = fmul float %19, %123
  %137 = fadd float %136, %129
  %138 = fmul float %20, %123
  %139 = fadd float %138, %131
  %140 = fmul float %21, %123
  %141 = fadd float %140, %133
  %142 = fmul float %22, %123
  %143 = fadd float %142, %135
  %144 = fadd float %137, %23
  %145 = fadd float %139, %24
  %146 = fadd float %141, %25
  %147 = fadd float %143, %26
  %148 = fmul float %83, %49
  %149 = fmul float %84, %50
  %150 = fadd float %148, %149
  %151 = fmul float %85, %51
  %152 = fadd float %150, %151
  %153 = fmul float 0.000000e+00, %52
  %154 = fadd float %152, %153
  %155 = fmul float %83, %56
  %156 = fmul float %84, %57
  %157 = fadd float %155, %156
  %158 = fmul float %85, %58
  %159 = fadd float %157, %158
  %160 = fmul float 0.000000e+00, %59
  %161 = fadd float %159, %160
  %162 = fmul float %83, %63
  %163 = fmul float %84, %64
  %164 = fadd float %162, %163
  %165 = fmul float %85, %65
  %166 = fadd float %164, %165
  %167 = fmul float 0.000000e+00, %66
  %168 = fadd float %166, %167
  %169 = fmul float %154, %34
  %170 = fmul float %161, %34
  %171 = fmul float %168, %34
  %172 = fmul float %94, %49
  %173 = fmul float %95, %50
  %174 = fadd float %172, %173
  %175 = fmul float %96, %51
  %176 = fadd float %174, %175
  %177 = fmul float 0.000000e+00, %52
  %178 = fadd float %176, %177
  %179 = fmul float %94, %56
  %180 = fmul float %95, %57
  %181 = fadd float %179, %180
  %182 = fmul float %96, %58
  %183 = fadd float %181, %182
  %184 = fmul float 0.000000e+00, %59
  %185 = fadd float %183, %184
  %186 = fmul float %94, %63
  %187 = fmul float %95, %64
  %188 = fadd float %186, %187
  %189 = fmul float %96, %65
  %190 = fadd float %188, %189
  %191 = fmul float 0.000000e+00, %66
  %192 = fadd float %190, %191
  %193 = fmul float %178, %34
  %194 = fmul float %185, %34
  %195 = fmul float %192, %34
  %196 = fmul float %27, %70
  %197 = fmul float %28, %71
  %198 = fmul float %29, %72
  %199 = fmul float 1.000000e+00, %73
  %200 = fsub float -0.000000e+00, %43
  %201 = fadd float %109, %200
  %202 = fsub float -0.000000e+00, %44
  %203 = fadd float %116, %202
  %204 = fsub float -0.000000e+00, %45
  %205 = fadd float %123, %204
  %206 = fmul float %146, %41
  %207 = fadd float %206, %42
  %208 = fmul float %89, %30
  %209 = fadd float %208, %32
  %210 = fmul float %90, %31
  %211 = fadd float %210, %33
  %212 = fmul float %195, %170
  %213 = fmul float %193, %171
  %214 = fmul float %194, %169
  %215 = fsub float -0.000000e+00, %212
  %216 = fmul float %194, %171
  %217 = fadd float %216, %215
  %218 = fsub float -0.000000e+00, %213
  %219 = fmul float %195, %169
  %220 = fadd float %219, %218
  %221 = fsub float -0.000000e+00, %214
  %222 = fmul float %193, %170
  %223 = fadd float %222, %221
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %196, float %197, float %198, float %199)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %201, float %203, float %205, float %207)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %209, float %211, float %169, float %170)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %171, float %217, float %220, float %223)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %193, float %194, float %195, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %144, float %145, float %146, float %147)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020900
c2020121
c202811d
bf8c0070
7e020205
d2820003
0404090a
c0840704
bf8c007f
e00c2000
80020500
bf8c0770
10020d03
c2020120
c202811c
bf8c007f
7e040205
d2820004
04080909
d2820001
04060b04
c2020122
c202811e
bf8c007f
7e040205
d2820011
0408090b
d2820001
04060f11
06021101
c0840700
bf8c007f
e00c2000
80020900
bf8c0770
10041503
d2820002
040a1304
d2820002
040a1711
06041902
c2020102
bf8c007f
101a0404
c2020106
bf8c007f
d2820012
04360204
c0840708
bf8c007f
e00c2000
80020d00
bf8c0770
10061d03
d2820003
040e1b04
d2820003
040e1f11
06062103
c202010a
bf8c007f
d2820004
044a0604
c202010e
bf8c007f
06080804
c2020124
c2028125
bf8c007f
7e220205
d2820011
04440904
c202012a
bf8c007f
0a240604
c2020129
bf8c007f
0a260204
c2020128
bf8c007f
0a280404
f800021f
11121314
c0840714
bf8c000f
e00c2000
80021200
bf8c0770
10220d13
d2820011
04460b12
d2820011
04460f14
d2820011
04450108
c202011b
bf8c007f
10222204
102c1513
d2820016
045a1312
d2820016
045a1714
d2820016
0459010c
102c2c04
c0840718
bf8c007f
e00c2000
80021700
c2028115
c2040117
bf8c0070
7e360208
d282001b
046c0b18
c2028114
c2040116
bf8c007f
7e380208
d2820017
04700b17
f800022f
11161b17
c084071c
bf8c000f
e00c2000
80021700
bf8c0770
10000d18
d2820000
04020b17
d2820000
04020f19
d2820000
04010108
10000004
100c2d00
100a1518
d2820005
04161317
d2820005
04161719
d2820005
0415010c
100a0a04
100e2305
080c0d07
100e1d13
d2820007
041e1b12
d2820007
041e1f14
d2820007
041d0110
100e0e04
10120f05
10101d18
d2820008
04221b17
d2820008
04221f19
d2820008
04210110
10101004
10142d08
0812130a
10142308
10160f00
0814150b
f800023f
06090a07
bf8c070f
7e0c0280
f800024f
06080005
c2020103
bf8c000f
10000404
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[9..10]
DCL CONST[2..8]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: DP4 TEMP[2].x, TEMP[1], CONST[7]
  5: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[5].yyyy
  6: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].xxxx
  7: MUL TEMP[2], TEMP[1], TEMP[2]
  8: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[3].xy, TEMP[3].xyyy
 10: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 11: DP4 TEMP[1].x, TEMP[1], CONST[6]
 12: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[5].xxxx
 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].wwww
 14: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].yzww
 15: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 16: RCP TEMP[4].x, TEMP[4].xxxx
 17: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 18: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 19: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 21: MAX TEMP[1].x, IN[2].wwww, CONST[3].wwww
 22: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 23: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[3].xyzz
 24: MOV OUT[0], TEMP[2]
 25: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %45 = load <32 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %55 = fmul float %13, %38
  %56 = fadd float %55, %39
  %57 = bitcast float %53 to i32
  %58 = bitcast float %54 to i32
  %59 = insertelement <2 x i32> undef, i32 %57, i32 0
  %60 = insertelement <2 x i32> %59, i32 %58, i32 1
  %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %41, <16 x i8> %43, i32 2)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = fmul float %62, %34
  %67 = fmul float %63, %35
  %68 = fadd float %66, %67
  %69 = fmul float %64, %36
  %70 = fadd float %68, %69
  %71 = fmul float %65, %37
  %72 = fadd float %70, %71
  %73 = fadd float %72, %29
  %74 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00)
  %75 = call float @llvm.AMDGPU.lrp(float %74, float %48, float 1.000000e+00)
  %76 = call float @llvm.AMDGPU.lrp(float %74, float %49, float 1.000000e+00)
  %77 = call float @llvm.AMDGPU.lrp(float %74, float %50, float 1.000000e+00)
  %78 = call float @llvm.AMDGPU.lrp(float %74, float %51, float 1.000000e+00)
  %79 = fmul float %62, %75
  %80 = fmul float %63, %76
  %81 = fmul float %64, %77
  %82 = fmul float %65, %78
  %83 = fmul float %12, %22
  %84 = fmul float %56, %23
  %85 = bitcast float %83 to i32
  %86 = bitcast float %84 to i32
  %87 = insertelement <2 x i32> undef, i32 %85, i32 0
  %88 = insertelement <2 x i32> %87, i32 %86, i32 1
  %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %45, <16 x i8> %47, i32 2)
  %90 = extractelement <4 x float> %89, i32 0
  %91 = extractelement <4 x float> %89, i32 1
  %92 = extractelement <4 x float> %89, i32 2
  %93 = extractelement <4 x float> %89, i32 3
  %94 = fmul float %62, %30
  %95 = fmul float %63, %31
  %96 = fadd float %94, %95
  %97 = fmul float %64, %32
  %98 = fadd float %96, %97
  %99 = fmul float %65, %33
  %100 = fadd float %98, %99
  %101 = fadd float %100, %28
  %102 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00)
  %103 = fmul float %102, %93
  %104 = fmul float %90, 0x3FCB367A00000000
  %105 = fmul float %91, 0x3FE6E2EB20000000
  %106 = fadd float %105, %104
  %107 = fmul float %92, 0x3FB27BB300000000
  %108 = fadd float %106, %107
  %109 = fcmp uge float %108, 0x3F50624DE0000000
  %110 = select i1 %109, float %108, float 0x3F50624DE0000000
  %111 = fdiv float 1.000000e+00, %110
  %112 = fmul float %90, %111
  %113 = fmul float %91, %111
  %114 = fmul float %92, %111
  %115 = fmul float %79, %90
  %116 = fmul float %80, %91
  %117 = fmul float %81, %92
  %118 = fmul float %103, %112
  %119 = fadd float %118, %115
  %120 = fmul float %103, %113
  %121 = fadd float %120, %116
  %122 = fmul float %103, %114
  %123 = fadd float %122, %117
  %124 = fmul float %119, 4.000000e+00
  %125 = fmul float %121, 4.000000e+00
  %126 = fmul float %123, 4.000000e+00
  %127 = fcmp uge float %52, %27
  %128 = select i1 %127, float %52, float %27
  %129 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00)
  %130 = call float @llvm.AMDGPU.lrp(float %129, float %124, float %24)
  %131 = call float @llvm.AMDGPU.lrp(float %129, float %125, float %25)
  %132 = call float @llvm.AMDGPU.lrp(float %129, float %126, float %26)
  %133 = call i32 @llvm.SI.packf16(float %130, float %131)
  %134 = bitcast i32 %133 to float
  %135 = call i32 @llvm.SI.packf16(float %132, float %82)
  %136 = bitcast i32 %135 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %134, float %136, float %134, float %136)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
c0840100
bf8c0070
c200091d
bf8c007f
10100a00
c200091c
bf8c007f
d2820008
04200104
c200091e
bf8c007f
d2820008
04200106
c200091f
bf8c007f
d2820008
04200107
c2000915
bf8c007f
06101000
d2060808
02010108
081210f2
c8280200
c8290201
d282000a
04261508
101c1506
c2000928
c2008929
bf8c007f
7e140201
d2820003
04280103
c2000909
bf8c007f
10160600
c2000908
bf8c007f
10140400
c0800304
c0c60508
bf8c007f
f0800f00
00030a0a
bf8c0770
101c190e
100414ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070b
7e0602ff
3d93dd98
d2820002
040a070c
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
101e050c
c2000919
bf8c007f
10060a00
c2000918
bf8c007f
d2820003
040c0104
c200091a
bf8c007f
d2820003
040c0106
c200091b
bf8c007f
d2820003
040c0107
c2000914
bf8c007f
06060600
d2060803
02010103
10061b03
d282000e
043a1f03
10201cf6
c8380700
c8390701
c200090f
bf8c007f
d00c0002
0200010e
7e1e0200
d200000e
000a1d0f
d206080e
0201010e
081e1cf2
c200090e
bf8c007f
10221e00
d2820010
0446210e
c8440300
c8450301
d2820011
04262308
10222307
5e202310
c8440100
c8450101
d2820011
04262308
10222305
10221711
1024050b
d2820011
04462503
102222f6
c200090d
bf8c007f
10241e00
d2820011
044a230e
c8480000
c8490001
d2820000
04262508
10000104
10001500
1002050a
d2820000
04020303
100000f6
c200090c
bf8c007f
10021e00
d2820000
0406010e
5e002300
f8001c0f
10001000
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..205]
DCL TEMP[0..6], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: UMUL TEMP[2].x, IMM[1].xxxx, TEMP[0].wwww
  4: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].zzzz
  5: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].yyyy
  6: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].xxxx
  7: UARL ADDR[0].x, TEMP[5].xxxx
  8: MUL TEMP[5], CONST[ADDR[0].x+14], IN[1].xxxx
  9: UARL ADDR[0].x, TEMP[4].xxxx
 10: MAD TEMP[4], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[5]
 11: UARL ADDR[0].x, TEMP[3].xxxx
 12: MAD TEMP[3], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[4]
 13: UARL ADDR[0].x, TEMP[2].xxxx
 14: MAD TEMP[2], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[3]
 15: DP4 TEMP[2].x, TEMP[1], TEMP[2]
 16: UMAD TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 17: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 18: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 19: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 20: UARL ADDR[0].x, TEMP[6].xxxx
 21: MUL TEMP[6], CONST[ADDR[0].x+14], IN[1].xxxx
 22: UARL ADDR[0].x, TEMP[5].xxxx
 23: MAD TEMP[5], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[6]
 24: UARL ADDR[0].x, TEMP[4].xxxx
 25: MAD TEMP[4], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[5]
 26: UARL ADDR[0].x, TEMP[3].xxxx
 27: MAD TEMP[3], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[4]
 28: DP4 TEMP[3].x, TEMP[1], TEMP[3]
 29: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 30: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 31: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 32: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 33: UARL ADDR[0].x, TEMP[0].xxxx
 34: MUL TEMP[0], CONST[ADDR[0].x+14], IN[1].xxxx
 35: UARL ADDR[0].x, TEMP[6].xxxx
 36: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[0]
 37: UARL ADDR[0].x, TEMP[5].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[0]
 39: UARL ADDR[0].x, TEMP[4].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[0]
 41: DP4 TEMP[0].x, TEMP[1], TEMP[0]
 42: MUL TEMP[1], CONST[4], TEMP[2].xxxx
 43: MAD TEMP[1], CONST[5], TEMP[3].xxxx, TEMP[1]
 44: MAD TEMP[1], CONST[6], TEMP[0].xxxx, TEMP[1]
 45: ADD TEMP[1], TEMP[1], CONST[7]
 46: MOV TEMP[4].w, IMM[0].xxxx
 47: MOV TEMP[4].xyz, CONST[8].xyzx
 48: MUL TEMP[2], CONST[0], TEMP[2].xxxx
 49: MAD TEMP[2], CONST[1], TEMP[3].xxxx, TEMP[2]
 50: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 51: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 52: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 53: MAD TEMP[2].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 54: MOV TEMP[0].w, TEMP[2].xxxx
 55: MAD TEMP[2].xy, IN[3].xyyy, CONST[9].xyyy, CONST[9].zwww
 56: MOV OUT[3], TEMP[2]
 57: MOV OUT[1], TEMP[4]
 58: MOV OUT[2], TEMP[0]
 59: MOV OUT[0], TEMP[1]
 60: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = extractelement <4 x float> %67, i32 2
  %71 = extractelement <4 x float> %67, i32 3
  %72 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %5)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = extractelement <4 x float> %74, i32 1
  %77 = extractelement <4 x float> %74, i32 2
  %78 = extractelement <4 x float> %74, i32 3
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = fptosi float %75 to i32
  %85 = fptosi float %76 to i32
  %86 = fptosi float %77 to i32
  %87 = fptosi float %78 to i32
  %88 = bitcast i32 %84 to float
  %89 = bitcast i32 %85 to float
  %90 = bitcast i32 %86 to float
  %91 = bitcast i32 %87 to float
  %92 = fmul float %62, %51
  %93 = fadd float %92, %48
  %94 = fmul float %63, %52
  %95 = fadd float %94, %49
  %96 = fmul float %64, %53
  %97 = fadd float %96, %50
  %98 = bitcast float %91 to i32
  %99 = mul i32 3, %98
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %90 to i32
  %102 = mul i32 3, %101
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %89 to i32
  %105 = mul i32 3, %104
  %106 = bitcast i32 %105 to float
  %107 = bitcast float %88 to i32
  %108 = mul i32 3, %107
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = shl i32 %110, 4
  %112 = add i32 %111, 224
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %68
  %115 = shl i32 %110, 4
  %116 = add i32 %115, 228
  %117 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %116)
  %118 = fmul float %117, %68
  %119 = shl i32 %110, 4
  %120 = add i32 %119, 232
  %121 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %120)
  %122 = fmul float %121, %68
  %123 = shl i32 %110, 4
  %124 = add i32 %123, 236
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %68
  %127 = bitcast float %106 to i32
  %128 = shl i32 %127, 4
  %129 = add i32 %128, 224
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %69
  %132 = fadd float %131, %114
  %133 = shl i32 %127, 4
  %134 = add i32 %133, 228
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %69
  %137 = fadd float %136, %118
  %138 = shl i32 %127, 4
  %139 = add i32 %138, 232
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %69
  %142 = fadd float %141, %122
  %143 = shl i32 %127, 4
  %144 = add i32 %143, 236
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %69
  %147 = fadd float %146, %126
  %148 = bitcast float %103 to i32
  %149 = shl i32 %148, 4
  %150 = add i32 %149, 224
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %70
  %153 = fadd float %152, %132
  %154 = shl i32 %148, 4
  %155 = add i32 %154, 228
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %70
  %158 = fadd float %157, %137
  %159 = shl i32 %148, 4
  %160 = add i32 %159, 232
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %70
  %163 = fadd float %162, %142
  %164 = shl i32 %148, 4
  %165 = add i32 %164, 236
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %70
  %168 = fadd float %167, %147
  %169 = bitcast float %100 to i32
  %170 = shl i32 %169, 4
  %171 = add i32 %170, 224
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %71
  %174 = fadd float %173, %153
  %175 = shl i32 %169, 4
  %176 = add i32 %175, 228
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %71
  %179 = fadd float %178, %158
  %180 = shl i32 %169, 4
  %181 = add i32 %180, 232
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %71
  %184 = fadd float %183, %163
  %185 = shl i32 %169, 4
  %186 = add i32 %185, 236
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %71
  %189 = fadd float %188, %168
  %190 = fmul float %93, %174
  %191 = fmul float %95, %179
  %192 = fadd float %190, %191
  %193 = fmul float %97, %184
  %194 = fadd float %192, %193
  %195 = fmul float 1.000000e+00, %189
  %196 = fadd float %194, %195
  %197 = bitcast float %91 to i32
  %198 = mul i32 3, %197
  %199 = add i32 %198, 1
  %200 = bitcast i32 %199 to float
  %201 = bitcast float %90 to i32
  %202 = mul i32 3, %201
  %203 = add i32 %202, 1
  %204 = bitcast i32 %203 to float
  %205 = bitcast float %89 to i32
  %206 = mul i32 3, %205
  %207 = add i32 %206, 1
  %208 = bitcast i32 %207 to float
  %209 = bitcast float %88 to i32
  %210 = mul i32 3, %209
  %211 = add i32 %210, 1
  %212 = bitcast i32 %211 to float
  %213 = bitcast float %212 to i32
  %214 = shl i32 %213, 4
  %215 = add i32 %214, 224
  %216 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %215)
  %217 = fmul float %216, %68
  %218 = shl i32 %213, 4
  %219 = add i32 %218, 228
  %220 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %219)
  %221 = fmul float %220, %68
  %222 = shl i32 %213, 4
  %223 = add i32 %222, 232
  %224 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %223)
  %225 = fmul float %224, %68
  %226 = shl i32 %213, 4
  %227 = add i32 %226, 236
  %228 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %227)
  %229 = fmul float %228, %68
  %230 = bitcast float %208 to i32
  %231 = shl i32 %230, 4
  %232 = add i32 %231, 224
  %233 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %232)
  %234 = fmul float %233, %69
  %235 = fadd float %234, %217
  %236 = shl i32 %230, 4
  %237 = add i32 %236, 228
  %238 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %237)
  %239 = fmul float %238, %69
  %240 = fadd float %239, %221
  %241 = shl i32 %230, 4
  %242 = add i32 %241, 232
  %243 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %242)
  %244 = fmul float %243, %69
  %245 = fadd float %244, %225
  %246 = shl i32 %230, 4
  %247 = add i32 %246, 236
  %248 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %247)
  %249 = fmul float %248, %69
  %250 = fadd float %249, %229
  %251 = bitcast float %204 to i32
  %252 = shl i32 %251, 4
  %253 = add i32 %252, 224
  %254 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %253)
  %255 = fmul float %254, %70
  %256 = fadd float %255, %235
  %257 = shl i32 %251, 4
  %258 = add i32 %257, 228
  %259 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %258)
  %260 = fmul float %259, %70
  %261 = fadd float %260, %240
  %262 = shl i32 %251, 4
  %263 = add i32 %262, 232
  %264 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %263)
  %265 = fmul float %264, %70
  %266 = fadd float %265, %245
  %267 = shl i32 %251, 4
  %268 = add i32 %267, 236
  %269 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %268)
  %270 = fmul float %269, %70
  %271 = fadd float %270, %250
  %272 = bitcast float %200 to i32
  %273 = shl i32 %272, 4
  %274 = add i32 %273, 224
  %275 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %274)
  %276 = fmul float %275, %71
  %277 = fadd float %276, %256
  %278 = shl i32 %272, 4
  %279 = add i32 %278, 228
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = fmul float %280, %71
  %282 = fadd float %281, %261
  %283 = shl i32 %272, 4
  %284 = add i32 %283, 232
  %285 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %284)
  %286 = fmul float %285, %71
  %287 = fadd float %286, %266
  %288 = shl i32 %272, 4
  %289 = add i32 %288, 236
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = fmul float %290, %71
  %292 = fadd float %291, %271
  %293 = fmul float %93, %277
  %294 = fmul float %95, %282
  %295 = fadd float %293, %294
  %296 = fmul float %97, %287
  %297 = fadd float %295, %296
  %298 = fmul float 1.000000e+00, %292
  %299 = fadd float %297, %298
  %300 = bitcast float %91 to i32
  %301 = mul i32 3, %300
  %302 = add i32 %301, 2
  %303 = bitcast i32 %302 to float
  %304 = bitcast float %90 to i32
  %305 = mul i32 3, %304
  %306 = add i32 %305, 2
  %307 = bitcast i32 %306 to float
  %308 = bitcast float %89 to i32
  %309 = mul i32 3, %308
  %310 = add i32 %309, 2
  %311 = bitcast i32 %310 to float
  %312 = bitcast float %88 to i32
  %313 = mul i32 3, %312
  %314 = add i32 %313, 2
  %315 = bitcast i32 %314 to float
  %316 = bitcast float %315 to i32
  %317 = shl i32 %316, 4
  %318 = add i32 %317, 224
  %319 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %318)
  %320 = fmul float %319, %68
  %321 = shl i32 %316, 4
  %322 = add i32 %321, 228
  %323 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %322)
  %324 = fmul float %323, %68
  %325 = shl i32 %316, 4
  %326 = add i32 %325, 232
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %68
  %329 = shl i32 %316, 4
  %330 = add i32 %329, 236
  %331 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %330)
  %332 = fmul float %331, %68
  %333 = bitcast float %311 to i32
  %334 = shl i32 %333, 4
  %335 = add i32 %334, 224
  %336 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %335)
  %337 = fmul float %336, %69
  %338 = fadd float %337, %320
  %339 = shl i32 %333, 4
  %340 = add i32 %339, 228
  %341 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %340)
  %342 = fmul float %341, %69
  %343 = fadd float %342, %324
  %344 = shl i32 %333, 4
  %345 = add i32 %344, 232
  %346 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %345)
  %347 = fmul float %346, %69
  %348 = fadd float %347, %328
  %349 = shl i32 %333, 4
  %350 = add i32 %349, 236
  %351 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %350)
  %352 = fmul float %351, %69
  %353 = fadd float %352, %332
  %354 = bitcast float %307 to i32
  %355 = shl i32 %354, 4
  %356 = add i32 %355, 224
  %357 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %356)
  %358 = fmul float %357, %70
  %359 = fadd float %358, %338
  %360 = shl i32 %354, 4
  %361 = add i32 %360, 228
  %362 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %361)
  %363 = fmul float %362, %70
  %364 = fadd float %363, %343
  %365 = shl i32 %354, 4
  %366 = add i32 %365, 232
  %367 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %366)
  %368 = fmul float %367, %70
  %369 = fadd float %368, %348
  %370 = shl i32 %354, 4
  %371 = add i32 %370, 236
  %372 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %371)
  %373 = fmul float %372, %70
  %374 = fadd float %373, %353
  %375 = bitcast float %303 to i32
  %376 = shl i32 %375, 4
  %377 = add i32 %376, 224
  %378 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %377)
  %379 = fmul float %378, %71
  %380 = fadd float %379, %359
  %381 = shl i32 %375, 4
  %382 = add i32 %381, 228
  %383 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %382)
  %384 = fmul float %383, %71
  %385 = fadd float %384, %364
  %386 = shl i32 %375, 4
  %387 = add i32 %386, 232
  %388 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %387)
  %389 = fmul float %388, %71
  %390 = fadd float %389, %369
  %391 = shl i32 %375, 4
  %392 = add i32 %391, 236
  %393 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %392)
  %394 = fmul float %393, %71
  %395 = fadd float %394, %374
  %396 = fmul float %93, %380
  %397 = fmul float %95, %385
  %398 = fadd float %396, %397
  %399 = fmul float %97, %390
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %395
  %402 = fadd float %400, %401
  %403 = fmul float %25, %196
  %404 = fmul float %26, %196
  %405 = fmul float %27, %196
  %406 = fmul float %28, %196
  %407 = fmul float %29, %299
  %408 = fadd float %407, %403
  %409 = fmul float %30, %299
  %410 = fadd float %409, %404
  %411 = fmul float %31, %299
  %412 = fadd float %411, %405
  %413 = fmul float %32, %299
  %414 = fadd float %413, %406
  %415 = fmul float %33, %402
  %416 = fadd float %415, %408
  %417 = fmul float %34, %402
  %418 = fadd float %417, %410
  %419 = fmul float %35, %402
  %420 = fadd float %419, %412
  %421 = fmul float %36, %402
  %422 = fadd float %421, %414
  %423 = fadd float %416, %37
  %424 = fadd float %418, %38
  %425 = fadd float %420, %39
  %426 = fadd float %422, %40
  %427 = fmul float %11, %196
  %428 = fmul float %12, %196
  %429 = fmul float %13, %196
  %430 = fmul float %14, %196
  %431 = fmul float %15, %299
  %432 = fadd float %431, %427
  %433 = fmul float %16, %299
  %434 = fadd float %433, %428
  %435 = fmul float %17, %299
  %436 = fadd float %435, %429
  %437 = fmul float %18, %299
  %438 = fadd float %437, %430
  %439 = fmul float %19, %402
  %440 = fadd float %439, %432
  %441 = fmul float %20, %402
  %442 = fadd float %441, %434
  %443 = fmul float %21, %402
  %444 = fadd float %443, %436
  %445 = fadd float %440, %22
  %446 = fadd float %442, %23
  %447 = fadd float %444, %24
  %448 = fsub float -0.000000e+00, %56
  %449 = fadd float %445, %448
  %450 = fsub float -0.000000e+00, %57
  %451 = fadd float %446, %450
  %452 = fsub float -0.000000e+00, %58
  %453 = fadd float %447, %452
  %454 = fmul float %425, %54
  %455 = fadd float %454, %55
  %456 = fmul float %82, %44
  %457 = fadd float %456, %46
  %458 = fmul float %83, %45
  %459 = fadd float %458, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %449, float %451, float %453, float %455)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %457, float %459, float %436, float %438)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %423, float %424, float %425, float %426)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020f00
bf8c0770
7e02110f
d2d60006
02010701
340a0c84
4a020aff
000000e0
e0301000
80000701
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
10100307
7e0e1110
d2d60007
02010707
34140e84
4a1214ff
000000e0
e0301000
80000909
bf8c0770
d2820009
04220509
7e101111
d2d60008
02010708
341c1084
4a161cff
000000e0
e0301000
80000b0b
bf8c0770
d282000b
0426070b
7e121112
d2d60009
02010709
341e1284
4a181eff
000000e0
e0301000
80000c0c
bf8c0770
d282000d
042e090c
4a160aff
000000e4
e0301000
80000b0b
bf8c0770
1016030b
4a1814ff
000000e4
e0301000
80000c0c
bf8c0770
d282000b
042e050c
4a181cff
000000e4
e0301000
80000c0c
bf8c0770
d282000b
042e070c
4a181eff
000000e4
e0301000
80000c0c
bf8c0770
d282000c
042e090c
c0840700
bf8c007f
e00c2000
80021000
c202012d
c2028129
bf8c0070
7e160205
d282000b
042c0911
1028190b
c202012c
c2028128
bf8c007f
7e180205
d282000c
04300910
d2820014
04521b0c
4a1a0aff
000000e8
e0301000
80000d0d
bf8c0770
101a030d
4a2a14ff
000000e8
e0301000
80001515
bf8c0770
d282000d
04360515
4a2a1cff
000000e8
e0301000
80001515
bf8c0770
d282000d
04360715
4a2a1eff
000000e8
e0301000
80001515
bf8c0770
d2820015
04360915
c202012e
c202812a
bf8c007f
7e1a0205
d282000d
04340912
d2820010
04522b0d
4a0a0aff
000000ec
e0301000
80000505
bf8c0770
100a0305
4a1414ff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416050a
4a141cff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416070a
4a141eff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416090a
060a0b10
c2020112
bf8c007f
101c0a04
4a140c81
34141484
4a1e14ff
000000e0
e0301000
80000f0f
bf8c0770
1020030f
4a1e0e81
341e1e84
4a221eff
000000e0
e0301000
80001111
bf8c0770
d2820011
04420511
4a201081
34202084
4a2420ff
000000e0
e0301000
80001212
bf8c0770
d2820012
04460712
4a221281
34222284
4a2622ff
000000e0
e0301000
80001313
bf8c0770
d2820012
044a0913
4a2614ff
000000e4
e0301000
80001313
bf8c0770
10260313
4a281eff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0914
1026270b
d2820012
044e250c
4a2614ff
000000e8
e0301000
80001313
bf8c0770
10260313
4a281eff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0914
d2820012
044a270d
4a1414ff
000000ec
e0301000
80000a0a
bf8c0770
1014030a
4a1e1eff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a050f
4a1e20ff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a070f
4a1e22ff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a090f
06141512
c2020116
bf8c007f
d282000e
043a1404
4a0c0c82
340c0c84
4a1e0cff
000000e0
e0301000
80000f0f
bf8c0770
101e030f
4a0e0e82
340e0e84
4a200eff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0510
4a101082
34101084
4a2010ff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0710
4a121282
34121284
4a2012ff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0910
4a200cff
000000e4
e0301000
80001010
bf8c0770
10200310
4a220eff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420511
4a2210ff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420711
4a2212ff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420911
1016210b
d282000b
042e1f0c
4a180cff
000000e8
e0301000
80000c0c
bf8c0770
1018030c
4a1e0eff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432050f
4a1e10ff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432070f
4a1e12ff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432090f
d282000b
042e190d
4a0c0cff
000000ec
e0301000
80000606
bf8c0770
100c0306
4a0e0eff
000000ec
e0301000
80000707
bf8c0770
d2820006
041a0507
4a0e10ff
000000ec
e0301000
80000707
bf8c0770
d2820006
041a0707
4a0e12ff
000000ec
e0301000
80000707
bf8c0770
d2820001
041a0907
0602030b
c202011a
bf8c007f
d2820002
043a0204
c202011e
bf8c007f
06040404
c2020130
c2028131
bf8c007f
7e060205
d2820004
040c0902
c2020102
bf8c007f
10060a04
c2020106
bf8c007f
d2820003
040e1404
c202010a
bf8c007f
d2820006
040e0204
c202010e
bf8c007f
060c0c04
c2020136
bf8c007f
0a0c0c04
c2020101
bf8c007f
100e0a04
c2020105
bf8c007f
d2820007
041e1404
c2020109
bf8c007f
d2820007
041e0204
c202010d
bf8c007f
060e0e04
c2020135
bf8c007f
0a0e0e04
c2020100
bf8c007f
10100a04
c2020104
bf8c007f
d2820008
04221404
c2020108
bf8c007f
d2820008
04220204
c202010c
bf8c007f
06101004
c2020134
bf8c007f
0a101004
f800021f
04060708
c2020103
bf8c000f
10080a04
c2020107
bf8c007f
d2820004
04121404
c082070c
bf8c007f
e00c2000
80010600
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000907
c2020124
c2028126
bf8c007f
7e160205
d2820006
042c0906
f800022f
04030006
c2020113
bf8c000f
10000a04
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020204
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10060a04
c2020115
bf8c007f
d2820003
040e1404
c2020119
bf8c007f
d2820003
040e0204
c202011d
bf8c007f
06060604
c2020110
bf8c007f
10080a04
c2020114
bf8c007f
d2820004
04121404
c2020118
bf8c007f
d2820001
04120204
c200011c
bf8c007f
06020200
f80008cf
00020301
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[9..10]
DCL CONST[2..8]
DCL TEMP[0..1]
DCL TEMP[2..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0010}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: UIF TEMP[1].xxxx :3
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :3
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].w, IMM[0].yyyy
  9: DP3 TEMP[4].x, IN[4].xyzz, IN[4].xyzz
 10: RSQ TEMP[4].x, TEMP[4].xxxx
 11: MUL TEMP[4].xyz, IN[4].xyzz, TEMP[4].xxxx
 12: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xxxx
 13: DP3 TEMP[4].x, TEMP[2].xyzz, IN[3].xyzz
 14: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz
 15: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 16: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 17: MOV TEMP[2].xyz, TEMP[2].xyzz
 18: TEX TEMP[2], TEMP[2], SAMP[0], CUBE
 19: DP4 TEMP[4].x, IMM[0].yyyy, CONST[8]
 20: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[5].zzzz
 21: MUL TEMP[4].x, TEMP[2].wwww, TEMP[4].xxxx
 22: LRP TEMP[3].xyz, TEMP[4].xxxx, TEMP[2].xyzz, IMM[0].yyyy
 23: DP4 TEMP[2].x, IMM[0].yyyy, CONST[7]
 24: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[5].yyyy
 25: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 26: MUL TEMP[2], TEMP[3], TEMP[2]
 27: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 28: MOV TEMP[3].xy, TEMP[3].xyyy
 29: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 30: DP4 TEMP[4].x, IMM[0].yyyy, CONST[6]
 31: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[5].xxxx
 32: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 33: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 34: MAX TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 35: RCP TEMP[5].x, TEMP[5].xxxx
 36: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 37: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 38: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 39: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[1].wwww
 40: MAX TEMP[3].x, IN[3].wwww, CONST[3].wwww
 41: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 42: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[3].xyzz
 43: MOV OUT[0], TEMP[2]
 44: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = fcmp ugt float %16, 0.000000e+00
  %54 = select i1 %53, float 1.000000e+00, float 0.000000e+00
  %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %58 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %59 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %66 = fmul float %13, %43
  %67 = fadd float %66, %44
  %68 = call float @llvm.AMDIL.clamp.(float %54, float 0.000000e+00, float 1.000000e+00)
  %69 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %70 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %71 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %72 = bitcast float %68 to i32
  %73 = icmp ne i32 %72, 0
  %. = select i1 %73, float -1.000000e+00, float 1.000000e+00
  %74 = fmul float %63, %63
  %75 = fmul float %64, %64
  %76 = fadd float %75, %74
  %77 = fmul float %65, %65
  %78 = fadd float %76, %77
  %79 = call float @llvm.AMDGPU.rsq(float %78)
  %80 = fmul float %63, %79
  %81 = fmul float %64, %79
  %82 = fmul float %65, %79
  %83 = fmul float %80, %.
  %84 = fmul float %81, %.
  %85 = fmul float %82, %.
  %86 = fmul float %83, %59
  %87 = fmul float %84, %60
  %88 = fadd float %87, %86
  %89 = fmul float %85, %61
  %90 = fadd float %88, %89
  %91 = fmul float %90, %83
  %92 = fmul float %90, %84
  %93 = fmul float %90, %85
  %94 = fmul float 2.000000e+00, %91
  %95 = fmul float 2.000000e+00, %92
  %96 = fmul float 2.000000e+00, %93
  %97 = fsub float -0.000000e+00, %94
  %98 = fadd float %59, %97
  %99 = fsub float -0.000000e+00, %95
  %100 = fadd float %60, %99
  %101 = fsub float -0.000000e+00, %96
  %102 = fadd float %61, %101
  %103 = insertelement <4 x float> undef, float %98, i32 0
  %104 = insertelement <4 x float> %103, float %100, i32 1
  %105 = insertelement <4 x float> %104, float %102, i32 2
  %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 3
  %107 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %106)
  %108 = extractelement <4 x float> %107, i32 0
  %109 = extractelement <4 x float> %107, i32 1
  %110 = extractelement <4 x float> %107, i32 2
  %111 = extractelement <4 x float> %107, i32 3
  %112 = call float @fabs(float %110)
  %113 = fdiv float 1.000000e+00, %112
  %114 = fmul float %108, %113
  %115 = fadd float %114, 1.500000e+00
  %116 = fmul float %109, %113
  %117 = fadd float %116, 1.500000e+00
  %118 = bitcast float %117 to i32
  %119 = bitcast float %115 to i32
  %120 = bitcast float %111 to i32
  %121 = insertelement <4 x i32> undef, i32 %118, i32 0
  %122 = insertelement <4 x i32> %121, i32 %119, i32 1
  %123 = insertelement <4 x i32> %122, i32 %120, i32 2
  %124 = insertelement <4 x i32> %123, i32 undef, i32 3
  %125 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %124, <32 x i8> %46, <16 x i8> %48, i32 4)
  %126 = extractelement <4 x float> %125, i32 0
  %127 = extractelement <4 x float> %125, i32 1
  %128 = extractelement <4 x float> %125, i32 2
  %129 = extractelement <4 x float> %125, i32 3
  %130 = fmul float 1.000000e+00, %39
  %131 = fmul float 1.000000e+00, %40
  %132 = fadd float %130, %131
  %133 = fmul float 1.000000e+00, %41
  %134 = fadd float %132, %133
  %135 = fmul float 1.000000e+00, %42
  %136 = fadd float %134, %135
  %137 = fadd float %136, %30
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = fmul float %129, %138
  %140 = call float @llvm.AMDGPU.lrp(float %139, float %126, float 1.000000e+00)
  %141 = call float @llvm.AMDGPU.lrp(float %139, float %127, float 1.000000e+00)
  %142 = call float @llvm.AMDGPU.lrp(float %139, float %128, float 1.000000e+00)
  %143 = fmul float 1.000000e+00, %35
  %144 = fmul float 1.000000e+00, %36
  %145 = fadd float %143, %144
  %146 = fmul float 1.000000e+00, %37
  %147 = fadd float %145, %146
  %148 = fmul float 1.000000e+00, %38
  %149 = fadd float %147, %148
  %150 = fadd float %149, %29
  %151 = call float @llvm.AMDIL.clamp.(float %150, float 0.000000e+00, float 1.000000e+00)
  %152 = call float @llvm.AMDGPU.lrp(float %151, float %55, float 1.000000e+00)
  %153 = call float @llvm.AMDGPU.lrp(float %151, float %56, float 1.000000e+00)
  %154 = call float @llvm.AMDGPU.lrp(float %151, float %57, float 1.000000e+00)
  %155 = call float @llvm.AMDGPU.lrp(float %151, float %58, float 1.000000e+00)
  %156 = fmul float %140, %152
  %157 = fmul float %141, %153
  %158 = fmul float %142, %154
  %159 = fmul float 1.000000e+00, %155
  %160 = fmul float %12, %22
  %161 = fmul float %67, %23
  %162 = bitcast float %160 to i32
  %163 = bitcast float %161 to i32
  %164 = insertelement <2 x i32> undef, i32 %162, i32 0
  %165 = insertelement <2 x i32> %164, i32 %163, i32 1
  %166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %50, <16 x i8> %52, i32 2)
  %167 = extractelement <4 x float> %166, i32 0
  %168 = extractelement <4 x float> %166, i32 1
  %169 = extractelement <4 x float> %166, i32 2
  %170 = extractelement <4 x float> %166, i32 3
  %171 = fmul float 1.000000e+00, %31
  %172 = fmul float 1.000000e+00, %32
  %173 = fadd float %171, %172
  %174 = fmul float 1.000000e+00, %33
  %175 = fadd float %173, %174
  %176 = fmul float 1.000000e+00, %34
  %177 = fadd float %175, %176
  %178 = fadd float %177, %28
  %179 = call float @llvm.AMDIL.clamp.(float %178, float 0.000000e+00, float 1.000000e+00)
  %180 = fmul float %179, %170
  %181 = fmul float %167, 0x3FCB367A00000000
  %182 = fmul float %168, 0x3FE6E2EB20000000
  %183 = fadd float %182, %181
  %184 = fmul float %169, 0x3FB27BB300000000
  %185 = fadd float %183, %184
  %186 = fcmp uge float %185, 0x3F50624DE0000000
  %187 = select i1 %186, float %185, float 0x3F50624DE0000000
  %188 = fdiv float 1.000000e+00, %187
  %189 = fmul float %167, %188
  %190 = fmul float %168, %188
  %191 = fmul float %169, %188
  %192 = fmul float %156, %167
  %193 = fmul float %157, %168
  %194 = fmul float %158, %169
  %195 = fmul float %180, %189
  %196 = fadd float %195, %192
  %197 = fmul float %180, %190
  %198 = fadd float %197, %193
  %199 = fmul float %180, %191
  %200 = fadd float %199, %194
  %201 = fmul float %196, 4.000000e+00
  %202 = fmul float %198, 4.000000e+00
  %203 = fmul float %200, 4.000000e+00
  %204 = fcmp uge float %62, %27
  %205 = select i1 %204, float %62, float %27
  %206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00)
  %207 = call float @llvm.AMDGPU.lrp(float %206, float %201, float %24)
  %208 = call float @llvm.AMDGPU.lrp(float %206, float %202, float %25)
  %209 = call float @llvm.AMDGPU.lrp(float %206, float %203, float %26)
  %210 = call i32 @llvm.SI.packf16(float %207, float %208)
  %211 = bitcast i32 %210 to float
  %212 = call i32 @llvm.SI.packf16(float %209, float %159)
  %213 = bitcast i32 %212 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %211, float %213, float %211, float %213)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c81c0800
c81d0801
100a0f07
d2820008
04160d06
c8140a00
c8150a01
d2820008
04220b05
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000009
0021e6f2
100c1306
10081107
10081304
c81c0400
c81d0401
10160f04
c8280500
c8290501
d282000b
042e1506
100a1105
100a1305
c8200600
c8210601
d2820009
042e1105
10160d09
d2820006
042e0d09
081a0d0a
100c0909
d2820004
041a0909
08180907
10080b09
d2820004
04120b09
081c0908
7e1e0280
d28a0005
043a1b0c
d28c0004
043a1b0c
d28e0006
043a1b0c
d2880007
043a1b0c
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840300
c0c60500
bf8c007f
f0800f00
00430405
c0840100
bf8c0070
c2000920
c2008921
bf8c007f
7e100201
d2060008
02021000
c2000922
bf8c007f
06101000
c2000923
bf8c007f
06101000
c2000916
bf8c007f
06101000
d2060808
02010108
10141107
081614f2
d282000c
042e0b0a
c8340100
c8350101
c200091c
c200891d
bf8c007f
7e100201
d2060008
02021000
c200091e
bf8c007f
06101000
c200091f
bf8c007f
06101000
c2000915
bf8c007f
06101000
d2060808
02010108
081210f2
d282000d
04261b08
10201b0c
c2000928
c2008929
bf8c007f
7e180201
d2820003
04300103
c2000909
bf8c007f
101a0600
c2000908
bf8c007f
10180400
c0800304
c0c60508
bf8c007f
f0800f00
00030c0c
bf8c0770
10201b10
100418ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070d
7e0602ff
3d93dd98
d2820002
040a070e
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
1022050d
c2000918
c2008919
bf8c007f
7e060201
d2060003
02020600
c200091a
bf8c007f
06060600
c200091b
bf8c007f
06060600
c2000914
bf8c007f
06060600
d2060803
02010103
10061f03
d2820010
04422303
102420f6
c8400700
c8410701
c200090f
bf8c007f
d00c0002
02000110
7e220200
d2000010
000a2111
d2060810
02010110
082220f2
c200090d
bf8c007f
10262200
d2820012
044e2510
d2820013
042e090a
c8500000
c8510001
d2820014
04262908
10262913
10261913
1028050c
d2820013
044e2903
102626f6
c200090c
bf8c007f
10282200
d2820013
04522710
5e242513
d2820004
042e0d0a
c8140200
c8150201
d2820005
04260b08
10080b04
10081d04
1004050e
d2820002
04120503
100404f6
c200090e
bf8c007f
10062200
d2820002
040e0510
c80c0300
c80d0301
d2820000
04260708
5e000102
f8001c0f
00120012
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..9]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[7].xyzz, CONST[6].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MUL TEMP[4], CONST[0], TEMP[1].xxxx
 10: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[4]
 11: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 12: ADD TEMP[0], TEMP[0], CONST[3]
 13: MOV TEMP[2].w, IMM[0].xxxx
 14: MOV TEMP[2].xyz, CONST[4].xyzx
 15: DP4 TEMP[4].x, TEMP[3], IN[0]
 16: DP4 TEMP[5].x, TEMP[3], IN[1]
 17: MOV TEMP[4].y, TEMP[5].xxxx
 18: DP4 TEMP[3].x, TEMP[3], IN[2]
 19: MOV TEMP[4].z, TEMP[3].xxxx
 20: MUL TEMP[2], TEMP[2], IN[3]
 21: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[9].xyzz
 22: MAD TEMP[3].x, TEMP[0].zzzz, CONST[8].xxxx, CONST[8].yyyy
 23: MOV TEMP[1].w, TEMP[3].xxxx
 24: MUL TEMP[3].xyz, TEMP[4].xyzz, CONST[5].wwww
 25: MOV OUT[3], TEMP[3]
 26: MOV OUT[1], TEMP[2]
 27: MOV OUT[2], TEMP[1]
 28: MOV OUT[0], TEMP[0]
 29: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %42 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %5)
  %45 = extractelement <4 x float> %44, i32 0
  %46 = extractelement <4 x float> %44, i32 1
  %47 = extractelement <4 x float> %44, i32 2
  %48 = extractelement <4 x float> %44, i32 3
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = extractelement <4 x float> %51, i32 3
  %56 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0
  %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %5)
  %59 = extractelement <4 x float> %58, i32 0
  %60 = extractelement <4 x float> %58, i32 1
  %61 = extractelement <4 x float> %58, i32 2
  %62 = extractelement <4 x float> %58, i32 3
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = extractelement <4 x float> %65, i32 3
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = extractelement <4 x float> %72, i32 2
  %76 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %77 = load <16 x i8> addrspace(2)* %76, !tbaa !0
  %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %5)
  %79 = extractelement <4 x float> %78, i32 0
  %80 = extractelement <4 x float> %78, i32 1
  %81 = extractelement <4 x float> %78, i32 2
  %82 = fmul float %73, %34
  %83 = fadd float %82, %31
  %84 = fmul float %74, %35
  %85 = fadd float %84, %32
  %86 = fmul float %75, %36
  %87 = fadd float %86, %33
  %88 = fmul float %83, %45
  %89 = fmul float %85, %46
  %90 = fadd float %88, %89
  %91 = fmul float %87, %47
  %92 = fadd float %90, %91
  %93 = fmul float 1.000000e+00, %48
  %94 = fadd float %92, %93
  %95 = fmul float %83, %52
  %96 = fmul float %85, %53
  %97 = fadd float %95, %96
  %98 = fmul float %87, %54
  %99 = fadd float %97, %98
  %100 = fmul float 1.000000e+00, %55
  %101 = fadd float %99, %100
  %102 = fmul float %83, %59
  %103 = fmul float %85, %60
  %104 = fadd float %102, %103
  %105 = fmul float %87, %61
  %106 = fadd float %104, %105
  %107 = fmul float 1.000000e+00, %62
  %108 = fadd float %106, %107
  %109 = fmul float %11, %94
  %110 = fmul float %12, %94
  %111 = fmul float %13, %94
  %112 = fmul float %14, %94
  %113 = fmul float %15, %101
  %114 = fadd float %113, %109
  %115 = fmul float %16, %101
  %116 = fadd float %115, %110
  %117 = fmul float %17, %101
  %118 = fadd float %117, %111
  %119 = fmul float %18, %101
  %120 = fadd float %119, %112
  %121 = fmul float %19, %108
  %122 = fadd float %121, %114
  %123 = fmul float %20, %108
  %124 = fadd float %123, %116
  %125 = fmul float %21, %108
  %126 = fadd float %125, %118
  %127 = fmul float %22, %108
  %128 = fadd float %127, %120
  %129 = fadd float %122, %23
  %130 = fadd float %124, %24
  %131 = fadd float %126, %25
  %132 = fadd float %128, %26
  %133 = fmul float %79, %45
  %134 = fmul float %80, %46
  %135 = fadd float %133, %134
  %136 = fmul float %81, %47
  %137 = fadd float %135, %136
  %138 = fmul float 0.000000e+00, %48
  %139 = fadd float %137, %138
  %140 = fmul float %79, %52
  %141 = fmul float %80, %53
  %142 = fadd float %140, %141
  %143 = fmul float %81, %54
  %144 = fadd float %142, %143
  %145 = fmul float 0.000000e+00, %55
  %146 = fadd float %144, %145
  %147 = fmul float %79, %59
  %148 = fmul float %80, %60
  %149 = fadd float %147, %148
  %150 = fmul float %81, %61
  %151 = fadd float %149, %150
  %152 = fmul float 0.000000e+00, %62
  %153 = fadd float %151, %152
  %154 = fmul float %27, %66
  %155 = fmul float %28, %67
  %156 = fmul float %29, %68
  %157 = fmul float 1.000000e+00, %69
  %158 = fsub float -0.000000e+00, %39
  %159 = fadd float %94, %158
  %160 = fsub float -0.000000e+00, %40
  %161 = fadd float %101, %160
  %162 = fsub float -0.000000e+00, %41
  %163 = fadd float %108, %162
  %164 = fmul float %131, %37
  %165 = fadd float %164, %38
  %166 = fmul float %139, %30
  %167 = fmul float %146, %30
  %168 = fmul float %153, %30
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %154, float %155, float %156, float %157)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %159, float %161, float %163, float %165)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %166, float %167, float %168, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %129, float %130, float %131, float %132)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020900
c202011d
c2028119
bf8c0070
7e020205
d2820007
0404090a
c0840704
bf8c007f
e00c2000
80020200
bf8c0770
10020707
c202011c
c2028118
bf8c007f
7e0c0205
d2820008
04180909
d2820001
04060508
c202011e
c202811a
bf8c007f
7e0c0205
d2820011
0418090b
d2820001
04060911
06020b01
c0840700
bf8c007f
e00c2000
80020900
bf8c0770
100c1507
d2820006
041a1308
d2820006
041a1711
060c1906
c2020102
bf8c007f
101a0c04
c2020106
bf8c007f
d2820012
04360204
c0840708
bf8c007f
e00c2000
80020d00
bf8c0770
100e1d07
d2820007
041e1b08
d2820007
041e1f11
060e2107
c202010a
bf8c007f
d2820008
044a0e04
c202010e
bf8c007f
06101004
c2020120
c2028121
bf8c007f
7e220205
d2820011
04440908
c2020126
bf8c007f
0a240e04
c2020125
bf8c007f
0a260204
c2020124
bf8c007f
0a280c04
f800021f
11121314
c0820714
bf8c000f
e00c2000
80011100
bf8c0770
10001d12
d2820000
04021b11
d2820000
04021f13
d2820000
04010110
c2020117
bf8c007f
10000004
101a0712
d282000d
04360511
d282000d
04360913
d2820002
04350105
10040404
10061512
d2820003
040e1311
d2820003
040e1713
d2820003
040d010c
10060604
7e080280
f800022f
04000203
c2020103
bf8c000f
10000c04
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020e04
c202010f
bf8c007f
06000004
c2020101
bf8c007f
10040c04
c2020105
bf8c007f
d2820002
040a0204
c2020109
bf8c007f
d2820002
040a0e04
c202010d
bf8c007f
06040404
c2020100
bf8c007f
10060c04
c2020104
bf8c007f
d2820001
040e0204
c2020108
bf8c007f
d2820001
04060e04
c200010c
bf8c007f
06020200
f80008cf
00080201
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL CONST[19..20]
DCL CONST[4..18]
DCL TEMP[0..1]
DCL TEMP[2..9], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.0000,     0.0100}
IMM[1] FLT32 {    4.0000,    -0.0100,    -0.5000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[20].xxxx, CONST[20].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: DP4 TEMP[4].x, IMM[0].yyyy, CONST[17]
 11: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[16].xxxx
 12: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 13: RSQ TEMP[5].x, TEMP[5].xxxx
 14: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 16: DP4 TEMP[3].x, IMM[0].yyyy, CONST[18]
 17: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[16].yyyy
 18: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 19: MOV TEMP[5].w, TEMP[3]
 20: DP3 TEMP[6].x, TEMP[2].xyzz, CONST[8].xyzz
 21: ADD TEMP[6].x, TEMP[6].xxxx, CONST[12].wwww
 22: MOV_SAT TEMP[7].x, TEMP[6].xxxx
 23: LRP TEMP[7].xyz, TEMP[7].xxxx, CONST[10].xyzz, CONST[11].xyzz
 24: MOV_SAT TEMP[8].x, -TEMP[6].xxxx
 25: LRP TEMP[8].xyz, TEMP[8].xxxx, CONST[12].xyzz, CONST[11].xyzz
 26: SLT TEMP[9].x, TEMP[6].xxxx, IMM[0].zzzz
 27: F2I TEMP[9].x, -TEMP[9]
 28: UIF TEMP[9].xxxx :3
 29:   MOV TEMP[8].xyz, TEMP[8].xyzx
 30: ELSE :3
 31:   MOV TEMP[8].xyz, TEMP[7].xyzx
 32: ENDIF
 33: DP3 TEMP[7].x, IN[3].xyzz, IN[3].xyzz
 34: RSQ TEMP[7].x, TEMP[7].xxxx
 35: MUL TEMP[7].xyz, IN[3].xyzz, TEMP[7].xxxx
 36: ADD TEMP[7].xyz, CONST[9].xyzz, -TEMP[7].xyzz
 37: DP3 TEMP[9].x, TEMP[7].xyzz, TEMP[7].xyzz
 38: RSQ TEMP[9].x, TEMP[9].xxxx
 39: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[9].xxxx
 40: DP3_SAT TEMP[7].x, TEMP[7].xyzz, TEMP[2].xyzz
 41: POW TEMP[7].x, TEMP[7].xxxx, CONST[15].xxxx
 42: SGE TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
 43: F2I TEMP[6].x, -TEMP[6]
 44: AND TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy
 45: MUL TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx
 46: MUL TEMP[6].xyz, CONST[13].xyzz, TEMP[6].xxxx
 47: MOV TEMP[2].xyz, TEMP[2].xyzz
 48: TEX TEMP[2].xyz, TEMP[2], SAMP[1], CUBE
 49: MAD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].xxxx, TEMP[8].xyzz
 50: MOV TEMP[7].xy, IN[4].xyyy
 51: TEX TEMP[7], TEMP[7], SAMP[0], 2D
 52: MUL TEMP[7], TEMP[7], CONST[14]
 53: LRP TEMP[2].xyz, TEMP[7].wwww, TEMP[7].xyzz, TEMP[2].xyzz
 54: LRP TEMP[6].xyz, TEMP[7].wwww, IMM[0].zzzz, TEMP[6].xyzz
 55: MUL TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xyzz
 56: MAD TEMP[5].xyz, TEMP[6].xyzz, TEMP[4].xxxx, TEMP[2].xyzz
 57: MAX TEMP[2].x, IN[3].wwww, CONST[5].wwww
 58: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 59: MUL TEMP[5], TEMP[5], TEMP[2].xxxx
 60: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[4].xyyy
 61: MAD TEMP[3].xy, CONST[7].xzzz, TEMP[5].wwww, CONST[7].ywww
 62: ADD TEMP[4].x, TEMP[3].xxxx, IMM[1].yyyy
 63: SLT TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz
 64: F2I TEMP[4].x, -TEMP[4]
 65: UIF TEMP[4].xxxx :3
 66:   KILL
 67: ENDIF
 68: MUL TEMP[4].xy, TEMP[2].xyyy, CONST[6].yzzz
 69: MOV TEMP[4].xy, TEMP[4].xyyy
 70: TEX TEMP[4].xy, TEMP[4], SAMP[2], 2D
 71: ADD TEMP[4].xy, TEMP[4].xyyy, IMM[1].zzzz
 72: MUL TEMP[6].x, CONST[6].xxxx, TEMP[5].wwww
 73: MAD TEMP[2].xy, TEMP[4].xyyy, TEMP[6].xxxx, TEMP[2].xyyy
 74: MOV TEMP[2].xy, TEMP[2].xyyy
 75: TEX TEMP[2], TEMP[2], SAMP[3], 2D
 76: MUL TEMP[4], TEMP[5], TEMP[3].xxxx
 77: MAD TEMP[2], TEMP[2], TEMP[3].yyyy, TEMP[4]
 78: MOV OUT[0], TEMP[2]
 79: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 272)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 276)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 280)
  %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 284)
  %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 288)
  %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 292)
  %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 296)
  %65 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300)
  %66 = call float @llvm.SI.load.const(<16 x i8> %21, i32 320)
  %67 = call float @llvm.SI.load.const(<16 x i8> %21, i32 324)
  %68 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %69 = load <32 x i8> addrspace(2)* %68, !tbaa !0
  %70 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %73 = load <32 x i8> addrspace(2)* %72, !tbaa !0
  %74 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %77 = load <32 x i8> addrspace(2)* %76, !tbaa !0
  %78 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %81 = load <32 x i8> addrspace(2)* %80, !tbaa !0
  %82 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %83 = load <16 x i8> addrspace(2)* %82, !tbaa !0
  %84 = fcmp ugt float %16, 0.000000e+00
  %85 = select i1 %84, float 1.000000e+00, float 0.000000e+00
  %86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %92 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %93 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %97 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %99 = fmul float %13, %66
  %100 = fadd float %99, %67
  %101 = call float @llvm.AMDIL.clamp.(float %85, float 0.000000e+00, float 1.000000e+00)
  %102 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %103 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %104 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %105 = bitcast float %101 to i32
  %106 = icmp ne i32 %105, 0
  %. = select i1 %106, float -1.000000e+00, float 1.000000e+00
  %107 = fmul float 1.000000e+00, %58
  %108 = fmul float 1.000000e+00, %59
  %109 = fadd float %107, %108
  %110 = fmul float 1.000000e+00, %60
  %111 = fadd float %109, %110
  %112 = fmul float 1.000000e+00, %61
  %113 = fadd float %111, %112
  %114 = fadd float %113, %56
  %115 = call float @llvm.AMDIL.clamp.(float %114, float 0.000000e+00, float 1.000000e+00)
  %116 = fmul float %96, %96
  %117 = fmul float %97, %97
  %118 = fadd float %117, %116
  %119 = fmul float %98, %98
  %120 = fadd float %118, %119
  %121 = call float @llvm.AMDGPU.rsq(float %120)
  %122 = fmul float %96, %121
  %123 = fmul float %97, %121
  %124 = fmul float %98, %121
  %125 = fmul float %122, %.
  %126 = fmul float %123, %.
  %127 = fmul float %124, %.
  %128 = fmul float 1.000000e+00, %62
  %129 = fmul float 1.000000e+00, %63
  %130 = fadd float %128, %129
  %131 = fmul float 1.000000e+00, %64
  %132 = fadd float %130, %131
  %133 = fmul float 1.000000e+00, %65
  %134 = fadd float %132, %133
  %135 = fadd float %134, %57
  %136 = call float @llvm.AMDIL.clamp.(float %135, float 0.000000e+00, float 1.000000e+00)
  %137 = call float @llvm.AMDGPU.lrp(float %136, float %86, float 1.000000e+00)
  %138 = call float @llvm.AMDGPU.lrp(float %136, float %87, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.lrp(float %136, float %88, float 1.000000e+00)
  %140 = call float @llvm.AMDGPU.lrp(float %136, float %89, float 1.000000e+00)
  %141 = fmul float %125, %32
  %142 = fmul float %126, %33
  %143 = fadd float %142, %141
  %144 = fmul float %127, %34
  %145 = fadd float %143, %144
  %146 = fadd float %145, %47
  %147 = call float @llvm.AMDIL.clamp.(float %146, float 0.000000e+00, float 1.000000e+00)
  %148 = call float @llvm.AMDGPU.lrp(float %147, float %38, float %41)
  %149 = call float @llvm.AMDGPU.lrp(float %147, float %39, float %42)
  %150 = call float @llvm.AMDGPU.lrp(float %147, float %40, float %43)
  %151 = fsub float -0.000000e+00, %146
  %152 = call float @llvm.AMDIL.clamp.(float %151, float 0.000000e+00, float 1.000000e+00)
  %153 = call float @llvm.AMDGPU.lrp(float %152, float %44, float %41)
  %154 = call float @llvm.AMDGPU.lrp(float %152, float %45, float %42)
  %155 = call float @llvm.AMDGPU.lrp(float %152, float %46, float %43)
  %156 = fcmp ult float %146, 0.000000e+00
  %157 = select i1 %156, float 1.000000e+00, float 0.000000e+00
  %158 = fsub float -0.000000e+00, %157
  %159 = fptosi float %158 to i32
  %160 = bitcast i32 %159 to float
  %161 = bitcast float %160 to i32
  %162 = icmp ne i32 %161, 0
  %temp32.0 = select i1 %162, float %153, float %148
  %temp33.0 = select i1 %162, float %154, float %149
  %temp34.0 = select i1 %162, float %155, float %150
  %163 = fmul float %90, %90
  %164 = fmul float %91, %91
  %165 = fadd float %164, %163
  %166 = fmul float %92, %92
  %167 = fadd float %165, %166
  %168 = call float @llvm.AMDGPU.rsq(float %167)
  %169 = fmul float %90, %168
  %170 = fmul float %91, %168
  %171 = fmul float %92, %168
  %172 = fsub float -0.000000e+00, %169
  %173 = fadd float %35, %172
  %174 = fsub float -0.000000e+00, %170
  %175 = fadd float %36, %174
  %176 = fsub float -0.000000e+00, %171
  %177 = fadd float %37, %176
  %178 = fmul float %173, %173
  %179 = fmul float %175, %175
  %180 = fadd float %179, %178
  %181 = fmul float %177, %177
  %182 = fadd float %180, %181
  %183 = call float @llvm.AMDGPU.rsq(float %182)
  %184 = fmul float %173, %183
  %185 = fmul float %175, %183
  %186 = fmul float %177, %183
  %187 = fmul float %184, %125
  %188 = fmul float %185, %126
  %189 = fadd float %188, %187
  %190 = fmul float %186, %127
  %191 = fadd float %189, %190
  %192 = call float @llvm.AMDIL.clamp.(float %191, float 0.000000e+00, float 1.000000e+00)
  %193 = call float @llvm.pow.f32(float %192, float %55)
  %194 = fcmp uge float %146, 0x3F847AE140000000
  %195 = select i1 %194, float 1.000000e+00, float 0.000000e+00
  %196 = fsub float -0.000000e+00, %195
  %197 = fptosi float %196 to i32
  %198 = bitcast i32 %197 to float
  %199 = bitcast float %198 to i32
  %200 = and i32 %199, 1065353216
  %201 = bitcast i32 %200 to float
  %202 = fmul float %193, %201
  %203 = fmul float %48, %202
  %204 = fmul float %49, %202
  %205 = fmul float %50, %202
  %206 = insertelement <4 x float> undef, float %125, i32 0
  %207 = insertelement <4 x float> %206, float %126, i32 1
  %208 = insertelement <4 x float> %207, float %127, i32 2
  %209 = insertelement <4 x float> %208, float 0.000000e+00, i32 3
  %210 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %209)
  %211 = extractelement <4 x float> %210, i32 0
  %212 = extractelement <4 x float> %210, i32 1
  %213 = extractelement <4 x float> %210, i32 2
  %214 = extractelement <4 x float> %210, i32 3
  %215 = call float @fabs(float %213)
  %216 = fdiv float 1.000000e+00, %215
  %217 = fmul float %211, %216
  %218 = fadd float %217, 1.500000e+00
  %219 = fmul float %212, %216
  %220 = fadd float %219, 1.500000e+00
  %221 = bitcast float %220 to i32
  %222 = bitcast float %218 to i32
  %223 = bitcast float %214 to i32
  %224 = insertelement <4 x i32> undef, i32 %221, i32 0
  %225 = insertelement <4 x i32> %224, i32 %222, i32 1
  %226 = insertelement <4 x i32> %225, i32 %223, i32 2
  %227 = insertelement <4 x i32> %226, i32 undef, i32 3
  %228 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %227, <32 x i8> %73, <16 x i8> %75, i32 4)
  %229 = extractelement <4 x float> %228, i32 0
  %230 = extractelement <4 x float> %228, i32 1
  %231 = extractelement <4 x float> %228, i32 2
  %232 = fmul float %229, 4.000000e+00
  %233 = fadd float %232, %temp32.0
  %234 = fmul float %230, 4.000000e+00
  %235 = fadd float %234, %temp33.0
  %236 = fmul float %231, 4.000000e+00
  %237 = fadd float %236, %temp34.0
  %238 = bitcast float %94 to i32
  %239 = bitcast float %95 to i32
  %240 = insertelement <2 x i32> undef, i32 %238, i32 0
  %241 = insertelement <2 x i32> %240, i32 %239, i32 1
  %242 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %241, <32 x i8> %69, <16 x i8> %71, i32 2)
  %243 = extractelement <4 x float> %242, i32 0
  %244 = extractelement <4 x float> %242, i32 1
  %245 = extractelement <4 x float> %242, i32 2
  %246 = extractelement <4 x float> %242, i32 3
  %247 = fmul float %243, %51
  %248 = fmul float %244, %52
  %249 = fmul float %245, %53
  %250 = fmul float %246, %54
  %251 = call float @llvm.AMDGPU.lrp(float %250, float %247, float %233)
  %252 = call float @llvm.AMDGPU.lrp(float %250, float %248, float %235)
  %253 = call float @llvm.AMDGPU.lrp(float %250, float %249, float %237)
  %254 = call float @llvm.AMDGPU.lrp(float %250, float 0.000000e+00, float %203)
  %255 = call float @llvm.AMDGPU.lrp(float %250, float 0.000000e+00, float %204)
  %256 = call float @llvm.AMDGPU.lrp(float %250, float 0.000000e+00, float %205)
  %257 = fmul float %137, %251
  %258 = fmul float %138, %252
  %259 = fmul float %139, %253
  %260 = fmul float %254, %115
  %261 = fadd float %260, %257
  %262 = fmul float %255, %115
  %263 = fadd float %262, %258
  %264 = fmul float %256, %115
  %265 = fadd float %264, %259
  %266 = fcmp uge float %93, %24
  %267 = select i1 %266, float %93, float %24
  %268 = call float @llvm.AMDIL.clamp.(float %267, float 0.000000e+00, float 1.000000e+00)
  %269 = fmul float %261, %268
  %270 = fmul float %263, %268
  %271 = fmul float %265, %268
  %272 = fmul float %140, %268
  %273 = fmul float %12, %22
  %274 = fmul float %100, %23
  %275 = fmul float %28, %272
  %276 = fadd float %275, %29
  %277 = fmul float %30, %272
  %278 = fadd float %277, %31
  %279 = fadd float %276, 0xBF847AE140000000
  %280 = fcmp ult float %279, 0.000000e+00
  %281 = select i1 %280, float 1.000000e+00, float 0.000000e+00
  %282 = fsub float -0.000000e+00, %281
  %283 = fptosi float %282 to i32
  %284 = bitcast i32 %283 to float
  %285 = bitcast float %284 to i32
  %286 = icmp ne i32 %285, 0
  br i1 %286, label %IF44, label %ENDIF43

IF44:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF43

ENDIF43:                                          ; preds = %main_body, %IF44
  %287 = fmul float %273, %26
  %288 = fmul float %274, %27
  %289 = bitcast float %287 to i32
  %290 = bitcast float %288 to i32
  %291 = insertelement <2 x i32> undef, i32 %289, i32 0
  %292 = insertelement <2 x i32> %291, i32 %290, i32 1
  %293 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %292, <32 x i8> %77, <16 x i8> %79, i32 2)
  %294 = extractelement <4 x float> %293, i32 0
  %295 = extractelement <4 x float> %293, i32 1
  %296 = fadd float %294, -5.000000e-01
  %297 = fadd float %295, -5.000000e-01
  %298 = fmul float %25, %272
  %299 = fmul float %296, %298
  %300 = fadd float %299, %273
  %301 = fmul float %297, %298
  %302 = fadd float %301, %274
  %303 = bitcast float %300 to i32
  %304 = bitcast float %302 to i32
  %305 = insertelement <2 x i32> undef, i32 %303, i32 0
  %306 = insertelement <2 x i32> %305, i32 %304, i32 1
  %307 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %306, <32 x i8> %81, <16 x i8> %83, i32 2)
  %308 = extractelement <4 x float> %307, i32 0
  %309 = extractelement <4 x float> %307, i32 1
  %310 = extractelement <4 x float> %307, i32 2
  %311 = extractelement <4 x float> %307, i32 3
  %312 = fmul float %269, %276
  %313 = fmul float %270, %276
  %314 = fmul float %271, %276
  %315 = fmul float %272, %276
  %316 = fmul float %308, %278
  %317 = fadd float %316, %312
  %318 = fmul float %309, %278
  %319 = fadd float %318, %313
  %320 = fmul float %310, %278
  %321 = fadd float %320, %314
  %322 = fmul float %311, %278
  %323 = fadd float %322, %315
  %324 = call i32 @llvm.SI.packf16(float %317, float %319)
  %325 = bitcast i32 %324 to float
  %326 = call i32 @llvm.SI.packf16(float %321, float %323)
  %327 = bitcast i32 %326 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %325, float %327, float %325, float %327)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180b00
c8190b01
c8140a00
c8150a01
100e0b05
d2820008
041e0d06
c81c0c00
c81d0c01
d2820008
04220f07
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000004
0021e6f2
10180906
100a1105
10160905
100a1107
101a0905
7e1c0280
d28a0010
0436190b
d28c000f
0436190b
d28e0011
0436190b
d2880012
0436190b
d2060104
02010111
7e085504
7e0a02ff
3fc00000
d2820011
0416090f
d2820010
04160910
c0840304
c0c60508
bf8c007f
f0800700
00430410
c0840100
bf8c0070
c2000920
bf8c007f
100e1600
c2000921
bf8c007f
d2820007
041c010c
c2000922
bf8c007f
d2820007
041c010d
c2000933
bf8c007f
062e0e00
d2060807
02010117
08100ef2
c200092e
bf8c007f
10121000
c200892a
bf8c007f
d282000f
04240307
d2060009
22010117
d2060809
02010109
081412f2
10201400
c2000932
bf8c007f
d2820010
04400109
d0020000
02010117
d2000011
0001e480
d2060011
22010111
7e221111
d10a0000
02010111
d200000f
0002210f
d2820015
043ded06
c8400900
c8410901
c83c0800
c83d0801
c0860300
c0c80500
bf8c007f
f0800f00
0064110f
c203893b
bf8c0070
101e2807
08201ef2
102a2b10
c203893a
bf8c007f
102c2607
d2820018
04562d0f
c8640200
c8650201
c2038948
c2060949
bf8c007f
7e2a020c
d2060015
02022a07
c203894a
bf8c007f
062a2a07
c203894b
bf8c007f
062a2a07
c2038941
bf8c007f
062a2a07
d2060815
02010115
082c2af2
d2820019
045a3315
10303119
c8640500
c8650501
c8680400
c8690401
1036351a
d282001c
046e3319
c86c0600
c86d0601
d282001c
0472371b
7e385b1c
10323919
c2038925
bf8c007f
08323207
1034391a
c2038924
bf8c007f
08343407
103a351a
d282001d
04763319
1036391b
c2038926
bf8c007f
08363607
d282001c
0476371b
7e385b1c
10323919
1034391a
1034171a
d2820019
046a1919
1034391b
d282000b
04661b1a
d206080b
0201010b
7e164f0b
c203893c
bf8c007f
0e161607
7e164b0b
7e1802ff
3c23d70a
d00c000c
02021917
d200000c
0031e480
d206000c
2201010c
7e18110c
361818f2
101a190b
c2038936
bf8c007f
10161a07
10161710
d282000b
042d010f
c2038944
c2060945
bf8c007f
7e18020c
d206000c
02021807
c2038946
bf8c007f
06181807
c2038947
bf8c007f
06181807
c2038940
bf8c007f
06181807
d206080e
0201010c
d282000b
04621d0b
c8300700
c8310701
c2038917
bf8c007f
d00c000c
02000f0c
7e2e0207
d200000c
00321917
d2060817
0201010c
10162f0b
c203892d
bf8c007f
10181007
c2060929
bf8c007f
d282000c
04301907
10301407
c2038931
bf8c007f
d2820018
04600f09
d200000c
0002310c
d282000c
0431ed05
10181910
c2038939
bf8c007f
10302407
d282000c
0432310f
c8600100
c8610101
d2820018
045a3115
10181918
c2038935
bf8c007f
10301a07
10303110
d2820018
0461010f
d282000c
04321d18
10182f0c
c203892c
bf8c007f
10101007
c2060928
bf8c007f
d2820007
04201907
10101407
c2038930
bf8c007f
d2820008
04200f09
d2000007
00021107
d2820004
041ded04
10080910
c2000938
bf8c007f
100a2200
d2820004
04120b0f
c8140000
c8150001
d2820005
045a0b15
10080905
c2000934
bf8c007f
100a1a00
100a0b10
d2820005
0415010f
d2820004
04121d05
10082f04
c8140300
c8150301
d2820000
045a0b15
100a2f00
c200091e
c200891f
bf8c007f
7e000201
d2820000
04020a00
c200091c
c200891d
bf8c007f
7e020201
d2820001
04060a00
060c02ff
bc23d70a
d0020000
02010106
d2000006
0001e480
d2060006
22010106
7e0c1106
d10a0006
02010106
c2000950
c2008951
bf8c007f
7e0c0201
d2820003
04180103
c2000911
bf8c007f
10060600
c2000910
bf8c007f
10040400
c086030c
c0c80518
c0800308
c0cc0510
c202091a
c2028919
c2040918
bf8c007f
7e100204
7e0e0205
7e0c0208
be842406
8984047e
7e1202f3
7c261280
88fe047e
10121103
10100f02
f0800300
00060808
bf8c0770
060e12f1
10140b06
d2820007
040e1507
060610f1
d2820006
040a1503
f0800f00
00640606
10040305
bf8c0770
d2820002
040a0109
1006030b
d2820003
040e0108
5e040503
1006030c
d2820003
040e0107
10020304
d2820000
04060106
5e000700
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..207]
DCL TEMP[0..7], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww
  6: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz
  7: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy
  8: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx
  9: UARL ADDR[0].x, TEMP[6].xxxx
 10: MUL TEMP[6], CONST[ADDR[0].x+16], IN[1].xxxx
 11: UARL ADDR[0].x, TEMP[5].xxxx
 12: MAD TEMP[5], CONST[ADDR[0].x+16], IN[1].yyyy, TEMP[6]
 13: UARL ADDR[0].x, TEMP[4].xxxx
 14: MAD TEMP[4], CONST[ADDR[0].x+16], IN[1].zzzz, TEMP[5]
 15: UARL ADDR[0].x, TEMP[3].xxxx
 16: UARL ADDR[0].x, TEMP[3].xxxx
 17: MAD TEMP[3], CONST[ADDR[0].x+16], IN[1].wwww, TEMP[4]
 18: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 19: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 20: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 21: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 22: UARL ADDR[0].x, TEMP[7].xxxx
 23: MUL TEMP[7], CONST[ADDR[0].x+16], IN[1].xxxx
 24: UARL ADDR[0].x, TEMP[6].xxxx
 25: MAD TEMP[6], CONST[ADDR[0].x+16], IN[1].yyyy, TEMP[7]
 26: UARL ADDR[0].x, TEMP[5].xxxx
 27: MAD TEMP[5], CONST[ADDR[0].x+16], IN[1].zzzz, TEMP[6]
 28: UARL ADDR[0].x, TEMP[4].xxxx
 29: UARL ADDR[0].x, TEMP[4].xxxx
 30: MAD TEMP[4], CONST[ADDR[0].x+16], IN[1].wwww, TEMP[5]
 31: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 32: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 33: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 34: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 35: UARL ADDR[0].x, TEMP[0].xxxx
 36: MUL TEMP[0], CONST[ADDR[0].x+16], IN[1].xxxx
 37: UARL ADDR[0].x, TEMP[7].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+16], IN[1].yyyy, TEMP[0]
 39: UARL ADDR[0].x, TEMP[6].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+16], IN[1].zzzz, TEMP[0]
 41: UARL ADDR[0].x, TEMP[5].xxxx
 42: UARL ADDR[0].x, TEMP[5].xxxx
 43: MAD TEMP[0], CONST[ADDR[0].x+16], IN[1].wwww, TEMP[0]
 44: DP4 TEMP[5].x, TEMP[1], TEMP[3]
 45: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 46: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 47: DP4 TEMP[3].x, TEMP[2], TEMP[3]
 48: DP4 TEMP[4].x, TEMP[2], TEMP[4]
 49: MOV TEMP[3].y, TEMP[4].xxxx
 50: DP4 TEMP[0].x, TEMP[2], TEMP[0]
 51: MOV TEMP[3].z, TEMP[0].xxxx
 52: MUL TEMP[0], CONST[4], TEMP[5].xxxx
 53: MAD TEMP[0], CONST[5], TEMP[6].xxxx, TEMP[0]
 54: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 55: ADD TEMP[0], TEMP[0], CONST[7]
 56: MUL TEMP[2].xyz, TEMP[3].xyzz, CONST[13].wwww
 57: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 58: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 59: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 60: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 61: RSQ TEMP[3].x, TEMP[3].xxxx
 62: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 63: MUL TEMP[3], CONST[0], TEMP[5].xxxx
 64: MAD TEMP[3], CONST[1], TEMP[6].xxxx, TEMP[3]
 65: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[3]
 66: ADD TEMP[1].xyz, TEMP[1], CONST[3]
 67: MOV TEMP[3].w, IMM[0].xxxx
 68: MOV TEMP[3].xyz, TEMP[1].xyzx
 69: MOV TEMP[4].w, IMM[0].xxxx
 70: MOV TEMP[4].xyz, TEMP[1].xyzx
 71: DP4 TEMP[3].x, CONST[14], TEMP[3]
 72: DP4 TEMP[4].x, CONST[15], TEMP[4]
 73: MOV TEMP[3].y, TEMP[4].xxxx
 74: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[11].xyzz
 75: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].xxxx, CONST[12].yyyy
 76: MOV TEMP[1].w, TEMP[4].xxxx
 77: MOV TEMP[4].zw, TEMP[2].yyxy
 78: MOV TEMP[2].x, TEMP[2].zzzz
 79: MOV TEMP[4].xy, TEMP[3].xyxx
 80: MOV OUT[1], CONST[8]
 81: MOV OUT[4], TEMP[2]
 82: MOV OUT[2], TEMP[1]
 83: MOV OUT[0], TEMP[0]
 84: MOV OUT[3], TEMP[4]
 85: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = extractelement <4 x float> %71, i32 3
  %76 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %77 = load <16 x i8> addrspace(2)* %76, !tbaa !0
  %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %5)
  %79 = extractelement <4 x float> %78, i32 0
  %80 = extractelement <4 x float> %78, i32 1
  %81 = extractelement <4 x float> %78, i32 2
  %82 = extractelement <4 x float> %78, i32 3
  %83 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
  %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %5)
  %86 = extractelement <4 x float> %85, i32 0
  %87 = extractelement <4 x float> %85, i32 1
  %88 = extractelement <4 x float> %85, i32 2
  %89 = fptosi float %79 to i32
  %90 = fptosi float %80 to i32
  %91 = fptosi float %81 to i32
  %92 = fptosi float %82 to i32
  %93 = bitcast i32 %89 to float
  %94 = bitcast i32 %90 to float
  %95 = bitcast i32 %91 to float
  %96 = bitcast i32 %92 to float
  %97 = fmul float %66, %46
  %98 = fadd float %97, %43
  %99 = fmul float %67, %47
  %100 = fadd float %99, %44
  %101 = fmul float %68, %48
  %102 = fadd float %101, %45
  %103 = bitcast float %96 to i32
  %104 = mul i32 3, %103
  %105 = bitcast i32 %104 to float
  %106 = bitcast float %95 to i32
  %107 = mul i32 3, %106
  %108 = bitcast i32 %107 to float
  %109 = bitcast float %94 to i32
  %110 = mul i32 3, %109
  %111 = bitcast i32 %110 to float
  %112 = bitcast float %93 to i32
  %113 = mul i32 3, %112
  %114 = bitcast i32 %113 to float
  %115 = bitcast float %114 to i32
  %116 = shl i32 %115, 4
  %117 = add i32 %116, 256
  %118 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %117)
  %119 = fmul float %118, %72
  %120 = shl i32 %115, 4
  %121 = add i32 %120, 260
  %122 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %121)
  %123 = fmul float %122, %72
  %124 = shl i32 %115, 4
  %125 = add i32 %124, 264
  %126 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %125)
  %127 = fmul float %126, %72
  %128 = shl i32 %115, 4
  %129 = add i32 %128, 268
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %72
  %132 = bitcast float %111 to i32
  %133 = shl i32 %132, 4
  %134 = add i32 %133, 256
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %73
  %137 = fadd float %136, %119
  %138 = shl i32 %132, 4
  %139 = add i32 %138, 260
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %73
  %142 = fadd float %141, %123
  %143 = shl i32 %132, 4
  %144 = add i32 %143, 264
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %73
  %147 = fadd float %146, %127
  %148 = shl i32 %132, 4
  %149 = add i32 %148, 268
  %150 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %149)
  %151 = fmul float %150, %73
  %152 = fadd float %151, %131
  %153 = bitcast float %108 to i32
  %154 = shl i32 %153, 4
  %155 = add i32 %154, 256
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %74
  %158 = fadd float %157, %137
  %159 = shl i32 %153, 4
  %160 = add i32 %159, 260
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %74
  %163 = fadd float %162, %142
  %164 = shl i32 %153, 4
  %165 = add i32 %164, 264
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %74
  %168 = fadd float %167, %147
  %169 = shl i32 %153, 4
  %170 = add i32 %169, 268
  %171 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %170)
  %172 = fmul float %171, %74
  %173 = fadd float %172, %152
  %174 = bitcast float %105 to i32
  %175 = shl i32 %174, 4
  %176 = add i32 %175, 256
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %75
  %179 = fadd float %178, %158
  %180 = shl i32 %174, 4
  %181 = add i32 %180, 260
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %75
  %184 = fadd float %183, %163
  %185 = shl i32 %174, 4
  %186 = add i32 %185, 264
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %75
  %189 = fadd float %188, %168
  %190 = shl i32 %174, 4
  %191 = add i32 %190, 268
  %192 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %191)
  %193 = fmul float %192, %75
  %194 = fadd float %193, %173
  %195 = bitcast float %96 to i32
  %196 = mul i32 3, %195
  %197 = add i32 %196, 1
  %198 = bitcast i32 %197 to float
  %199 = bitcast float %95 to i32
  %200 = mul i32 3, %199
  %201 = add i32 %200, 1
  %202 = bitcast i32 %201 to float
  %203 = bitcast float %94 to i32
  %204 = mul i32 3, %203
  %205 = add i32 %204, 1
  %206 = bitcast i32 %205 to float
  %207 = bitcast float %93 to i32
  %208 = mul i32 3, %207
  %209 = add i32 %208, 1
  %210 = bitcast i32 %209 to float
  %211 = bitcast float %210 to i32
  %212 = shl i32 %211, 4
  %213 = add i32 %212, 256
  %214 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %213)
  %215 = fmul float %214, %72
  %216 = shl i32 %211, 4
  %217 = add i32 %216, 260
  %218 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %217)
  %219 = fmul float %218, %72
  %220 = shl i32 %211, 4
  %221 = add i32 %220, 264
  %222 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %221)
  %223 = fmul float %222, %72
  %224 = shl i32 %211, 4
  %225 = add i32 %224, 268
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %72
  %228 = bitcast float %206 to i32
  %229 = shl i32 %228, 4
  %230 = add i32 %229, 256
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %73
  %233 = fadd float %232, %215
  %234 = shl i32 %228, 4
  %235 = add i32 %234, 260
  %236 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %235)
  %237 = fmul float %236, %73
  %238 = fadd float %237, %219
  %239 = shl i32 %228, 4
  %240 = add i32 %239, 264
  %241 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %240)
  %242 = fmul float %241, %73
  %243 = fadd float %242, %223
  %244 = shl i32 %228, 4
  %245 = add i32 %244, 268
  %246 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %245)
  %247 = fmul float %246, %73
  %248 = fadd float %247, %227
  %249 = bitcast float %202 to i32
  %250 = shl i32 %249, 4
  %251 = add i32 %250, 256
  %252 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %251)
  %253 = fmul float %252, %74
  %254 = fadd float %253, %233
  %255 = shl i32 %249, 4
  %256 = add i32 %255, 260
  %257 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %256)
  %258 = fmul float %257, %74
  %259 = fadd float %258, %238
  %260 = shl i32 %249, 4
  %261 = add i32 %260, 264
  %262 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %261)
  %263 = fmul float %262, %74
  %264 = fadd float %263, %243
  %265 = shl i32 %249, 4
  %266 = add i32 %265, 268
  %267 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %266)
  %268 = fmul float %267, %74
  %269 = fadd float %268, %248
  %270 = bitcast float %198 to i32
  %271 = shl i32 %270, 4
  %272 = add i32 %271, 256
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = fmul float %273, %75
  %275 = fadd float %274, %254
  %276 = shl i32 %270, 4
  %277 = add i32 %276, 260
  %278 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %277)
  %279 = fmul float %278, %75
  %280 = fadd float %279, %259
  %281 = shl i32 %270, 4
  %282 = add i32 %281, 264
  %283 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %282)
  %284 = fmul float %283, %75
  %285 = fadd float %284, %264
  %286 = shl i32 %270, 4
  %287 = add i32 %286, 268
  %288 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %287)
  %289 = fmul float %288, %75
  %290 = fadd float %289, %269
  %291 = bitcast float %96 to i32
  %292 = mul i32 3, %291
  %293 = add i32 %292, 2
  %294 = bitcast i32 %293 to float
  %295 = bitcast float %95 to i32
  %296 = mul i32 3, %295
  %297 = add i32 %296, 2
  %298 = bitcast i32 %297 to float
  %299 = bitcast float %94 to i32
  %300 = mul i32 3, %299
  %301 = add i32 %300, 2
  %302 = bitcast i32 %301 to float
  %303 = bitcast float %93 to i32
  %304 = mul i32 3, %303
  %305 = add i32 %304, 2
  %306 = bitcast i32 %305 to float
  %307 = bitcast float %306 to i32
  %308 = shl i32 %307, 4
  %309 = add i32 %308, 256
  %310 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %309)
  %311 = fmul float %310, %72
  %312 = shl i32 %307, 4
  %313 = add i32 %312, 260
  %314 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %313)
  %315 = fmul float %314, %72
  %316 = shl i32 %307, 4
  %317 = add i32 %316, 264
  %318 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %317)
  %319 = fmul float %318, %72
  %320 = shl i32 %307, 4
  %321 = add i32 %320, 268
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %72
  %324 = bitcast float %302 to i32
  %325 = shl i32 %324, 4
  %326 = add i32 %325, 256
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %73
  %329 = fadd float %328, %311
  %330 = shl i32 %324, 4
  %331 = add i32 %330, 260
  %332 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %331)
  %333 = fmul float %332, %73
  %334 = fadd float %333, %315
  %335 = shl i32 %324, 4
  %336 = add i32 %335, 264
  %337 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %336)
  %338 = fmul float %337, %73
  %339 = fadd float %338, %319
  %340 = shl i32 %324, 4
  %341 = add i32 %340, 268
  %342 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %341)
  %343 = fmul float %342, %73
  %344 = fadd float %343, %323
  %345 = bitcast float %298 to i32
  %346 = shl i32 %345, 4
  %347 = add i32 %346, 256
  %348 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %347)
  %349 = fmul float %348, %74
  %350 = fadd float %349, %329
  %351 = shl i32 %345, 4
  %352 = add i32 %351, 260
  %353 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %352)
  %354 = fmul float %353, %74
  %355 = fadd float %354, %334
  %356 = shl i32 %345, 4
  %357 = add i32 %356, 264
  %358 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %357)
  %359 = fmul float %358, %74
  %360 = fadd float %359, %339
  %361 = shl i32 %345, 4
  %362 = add i32 %361, 268
  %363 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %362)
  %364 = fmul float %363, %74
  %365 = fadd float %364, %344
  %366 = bitcast float %294 to i32
  %367 = shl i32 %366, 4
  %368 = add i32 %367, 256
  %369 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %368)
  %370 = fmul float %369, %75
  %371 = fadd float %370, %350
  %372 = shl i32 %366, 4
  %373 = add i32 %372, 260
  %374 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %373)
  %375 = fmul float %374, %75
  %376 = fadd float %375, %355
  %377 = shl i32 %366, 4
  %378 = add i32 %377, 264
  %379 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %378)
  %380 = fmul float %379, %75
  %381 = fadd float %380, %360
  %382 = shl i32 %366, 4
  %383 = add i32 %382, 268
  %384 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %383)
  %385 = fmul float %384, %75
  %386 = fadd float %385, %365
  %387 = fmul float %98, %179
  %388 = fmul float %100, %184
  %389 = fadd float %387, %388
  %390 = fmul float %102, %189
  %391 = fadd float %389, %390
  %392 = fmul float 1.000000e+00, %194
  %393 = fadd float %391, %392
  %394 = fmul float %98, %275
  %395 = fmul float %100, %280
  %396 = fadd float %394, %395
  %397 = fmul float %102, %285
  %398 = fadd float %396, %397
  %399 = fmul float 1.000000e+00, %290
  %400 = fadd float %398, %399
  %401 = fmul float %98, %371
  %402 = fmul float %100, %376
  %403 = fadd float %401, %402
  %404 = fmul float %102, %381
  %405 = fadd float %403, %404
  %406 = fmul float 1.000000e+00, %386
  %407 = fadd float %405, %406
  %408 = fmul float %86, %179
  %409 = fmul float %87, %184
  %410 = fadd float %408, %409
  %411 = fmul float %88, %189
  %412 = fadd float %410, %411
  %413 = fmul float 1.000000e+00, %194
  %414 = fadd float %412, %413
  %415 = fmul float %86, %275
  %416 = fmul float %87, %280
  %417 = fadd float %415, %416
  %418 = fmul float %88, %285
  %419 = fadd float %417, %418
  %420 = fmul float 1.000000e+00, %290
  %421 = fadd float %419, %420
  %422 = fmul float %86, %371
  %423 = fmul float %87, %376
  %424 = fadd float %422, %423
  %425 = fmul float %88, %381
  %426 = fadd float %424, %425
  %427 = fmul float 1.000000e+00, %386
  %428 = fadd float %426, %427
  %429 = fmul float %23, %393
  %430 = fmul float %24, %393
  %431 = fmul float %25, %393
  %432 = fmul float %26, %393
  %433 = fmul float %27, %400
  %434 = fadd float %433, %429
  %435 = fmul float %28, %400
  %436 = fadd float %435, %430
  %437 = fmul float %29, %400
  %438 = fadd float %437, %431
  %439 = fmul float %30, %400
  %440 = fadd float %439, %432
  %441 = fmul float %31, %407
  %442 = fadd float %441, %434
  %443 = fmul float %32, %407
  %444 = fadd float %443, %436
  %445 = fmul float %33, %407
  %446 = fadd float %445, %438
  %447 = fmul float %34, %407
  %448 = fadd float %447, %440
  %449 = fadd float %442, %35
  %450 = fadd float %444, %36
  %451 = fadd float %446, %37
  %452 = fadd float %448, %38
  %453 = fmul float %414, %54
  %454 = fmul float %421, %54
  %455 = fmul float %428, %54
  %456 = fmul float %11, %453
  %457 = fmul float %12, %453
  %458 = fmul float %13, %453
  %459 = fmul float %14, %454
  %460 = fadd float %459, %456
  %461 = fmul float %15, %454
  %462 = fadd float %461, %457
  %463 = fmul float %16, %454
  %464 = fadd float %463, %458
  %465 = fmul float %17, %455
  %466 = fadd float %465, %460
  %467 = fmul float %18, %455
  %468 = fadd float %467, %462
  %469 = fmul float %19, %455
  %470 = fadd float %469, %464
  %471 = fmul float %466, %466
  %472 = fmul float %468, %468
  %473 = fadd float %472, %471
  %474 = fmul float %470, %470
  %475 = fadd float %473, %474
  %476 = call float @llvm.AMDGPU.rsq(float %475)
  %477 = fmul float %466, %476
  %478 = fmul float %468, %476
  %479 = fmul float %470, %476
  %480 = fmul float %11, %393
  %481 = fmul float %12, %393
  %482 = fmul float %13, %393
  %483 = fmul float %14, %400
  %484 = fadd float %483, %480
  %485 = fmul float %15, %400
  %486 = fadd float %485, %481
  %487 = fmul float %16, %400
  %488 = fadd float %487, %482
  %489 = fmul float %17, %407
  %490 = fadd float %489, %484
  %491 = fmul float %18, %407
  %492 = fadd float %491, %486
  %493 = fmul float %19, %407
  %494 = fadd float %493, %488
  %495 = fadd float %490, %20
  %496 = fadd float %492, %21
  %497 = fadd float %494, %22
  %498 = fmul float %55, %495
  %499 = fmul float %56, %496
  %500 = fadd float %498, %499
  %501 = fmul float %57, %497
  %502 = fadd float %500, %501
  %503 = fmul float %58, 1.000000e+00
  %504 = fadd float %502, %503
  %505 = fmul float %59, %495
  %506 = fmul float %60, %496
  %507 = fadd float %505, %506
  %508 = fmul float %61, %497
  %509 = fadd float %507, %508
  %510 = fmul float %62, 1.000000e+00
  %511 = fadd float %509, %510
  %512 = fsub float -0.000000e+00, %49
  %513 = fadd float %495, %512
  %514 = fsub float -0.000000e+00, %50
  %515 = fadd float %496, %514
  %516 = fsub float -0.000000e+00, %51
  %517 = fadd float %497, %516
  %518 = fmul float %451, %52
  %519 = fadd float %518, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %513, float %515, float %517, float %519)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %504, float %511, float %477, float %478)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %479, float %478, float %479, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %449, float %450, float %451, float %452)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840708
bf8c000f
e00c2000
80021100
bf8c0770
7e021111
d2d6000d
02010701
34041a84
4a0204ff
00000100
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020400
bf8c0770
10020901
7e061112
d2d6000e
02010703
34121c84
4a0612ff
00000100
e0301000
80000303
bf8c0770
d2820001
04060b03
7e061113
d2d60010
02010703
34142084
4a0614ff
00000100
e0301000
80000303
bf8c0770
d2820001
04060d03
7e061114
d2d60011
02010703
34162284
4a0616ff
00000100
e0301000
80000303
bf8c0770
d2820001
04060f03
4a0604ff
00000104
e0301000
80000303
bf8c0770
10060903
4a1012ff
00000104
e0301000
80000808
bf8c0770
d2820003
040e0b08
4a1014ff
00000104
e0301000
80000808
bf8c0770
d2820003
040e0d08
4a1016ff
00000104
e0301000
80000808
bf8c0770
d2820003
040e0f08
c0840700
bf8c007f
e00c2000
80021500
c2020129
c2028125
bf8c0070
7e100205
d2820012
04200916
10100712
c2020128
c2028124
bf8c007f
7e180205
d2820014
04300915
d282000c
04220314
4a1004ff
00000108
e0301000
80000808
bf8c0770
10100908
4a1e12ff
00000108
e0301000
80000f0f
bf8c0770
d2820008
04220b0f
4a1e14ff
00000108
e0301000
80000f0f
bf8c0770
d2820008
04220d0f
4a1e16ff
00000108
e0301000
80000f0f
bf8c0770
d2820008
04220f0f
c202012a
c2028126
bf8c007f
7e1e0205
d2820015
043c0917
d282000c
04321115
4a0404ff
0000010c
e0301000
80000202
bf8c0770
10040902
4a1212ff
0000010c
e0301000
80000909
bf8c0770
d2820002
040a0b09
4a1214ff
0000010c
e0301000
80000909
bf8c0770
d2820002
040a0d09
4a1216ff
0000010c
e0301000
80000909
bf8c0770
d2820009
040a0f09
0604130c
c2020112
bf8c007f
102c0404
4a141a81
34141484
4a1614ff
00000100
e0301000
80000b0b
bf8c0770
1016090b
4a181c81
34261884
4a1826ff
00000100
e0301000
80000c0c
bf8c0770
d282000b
042e0b0c
4a182081
342e1884
4a182eff
00000100
e0301000
80000c0c
bf8c0770
d282000b
042e0d0c
4a182281
34301884
4a1830ff
00000100
e0301000
80000c0c
bf8c0770
d282000b
042e0f0c
4a1814ff
00000104
e0301000
80000c0c
bf8c0770
1018090c
4a1e26ff
00000104
e0301000
80000f0f
bf8c0770
d282000c
04320b0f
4a1e2eff
00000104
e0301000
80000f0f
bf8c0770
d282000c
04320d0f
4a1e30ff
00000104
e0301000
80000f0f
bf8c0770
d282000c
04320f0f
101e1912
d2820019
043e1714
4a1e14ff
00000108
e0301000
80000f0f
bf8c0770
101e090f
4a3426ff
00000108
e0301000
80001a1a
bf8c0770
d282000f
043e0b1a
4a342eff
00000108
e0301000
80001a1a
bf8c0770
d282000f
043e0d1a
4a3430ff
00000108
e0301000
80001a1a
bf8c0770
d282000f
043e0f1a
d2820019
04661f15
4a1414ff
0000010c
e0301000
80000a0a
bf8c0770
1014090a
4a2626ff
0000010c
e0301000
80001313
bf8c0770
d282000a
042a0b13
4a262eff
0000010c
e0301000
80001313
bf8c0770
d282000a
042a0d13
4a2630ff
0000010c
e0301000
80001313
bf8c0770
d2820013
042a0f13
06142719
c2020116
bf8c007f
d2820016
045a1404
4a1a1a82
342e1a84
4a1a2eff
00000100
e0301000
80000d0d
bf8c0770
101a090d
4a1c1c82
34301c84
4a1c30ff
00000100
e0301000
80000e0e
bf8c0770
d282000d
04360b0e
4a1c2082
34321c84
4a1c32ff
00000100
e0301000
80000e0e
bf8c0770
d282000d
04360d0e
4a1c2282
34221c84
4a1c22ff
00000100
e0301000
80000e0e
bf8c0770
d282000d
04360f0e
4a1c2eff
00000104
e0301000
80000e0e
bf8c0770
101c090e
4a2030ff
00000104
e0301000
80001010
bf8c0770
d282000e
043a0b10
4a2032ff
00000104
e0301000
80001010
bf8c0770
d282000e
043a0d10
4a2022ff
00000104
e0301000
80001010
bf8c0770
d282000e
043a0f10
10201d12
d2820012
04421b14
4a202eff
00000108
e0301000
80001010
bf8c0770
10200910
4a2830ff
00000108
e0301000
80001414
bf8c0770
d2820010
04420b14
4a2832ff
00000108
e0301000
80001414
bf8c0770
d2820010
04420d14
4a2822ff
00000108
e0301000
80001414
bf8c0770
d2820010
04420f14
d2820012
044a2115
4a282eff
0000010c
e0301000
80001414
bf8c0770
10280914
4a2a30ff
0000010c
e0301000
80001515
bf8c0770
d2820014
04520b15
4a2a32ff
0000010c
e0301000
80001515
bf8c0770
d2820014
04520d15
4a2222ff
0000010c
e0301000
80001111
bf8c0770
d2820006
04520f11
06080d12
c202011a
bf8c007f
d2820005
045a0804
c202011e
bf8c007f
060a0a04
c2020130
c2028131
bf8c007f
7e0e0205
d2820007
041c0905
c2020102
bf8c007f
10220404
c2028106
bf8c007f
d2820011
04461405
c204010a
bf8c007f
d2820011
04460808
c204810e
bf8c007f
06222209
c204812e
bf8c007f
0a282209
c2070101
bf8c007f
1024040e
c2050105
bf8c007f
d2820012
044a140a
c2048109
bf8c007f
d2820012
044a0809
c205810d
bf8c007f
0624240b
c205812d
bf8c007f
0a2c240b
c2058100
bf8c007f
102a040b
c2060104
bf8c007f
d2820015
0456140c
c2068108
bf8c007f
d2820015
0456080d
c207810c
bf8c007f
062a2a0f
c207812c
bf8c007f
0a2e2a0f
f800021f
07141617
c207813d
bf8c000f
100e240f
c207813c
bf8c007f
d2820007
041e2a0f
c207813e
bf8c007f
d2820007
041e220f
c207813f
bf8c007f
060e0e0f
c2078139
bf8c007f
1024240f
c2078138
bf8c007f
d2820012
044a2a0f
c207813a
bf8c007f
d2820011
044a220f
c207813b
bf8c007f
0622220f
c088070c
bf8c007f
e00c2000
80041400
bf8c0770
10000715
d2820000
04020314
d2820000
04021116
06001300
c2030137
bf8c007f
10000006
1006000e
10021915
d2820001
04061714
d2820001
04061f16
06022701
10020206
d2820003
040e020a
10101d15
d2820008
04221b14
d2820008
04222116
060c0d08
100c0c06
d2820003
040e0c09
1010000b
d2820008
0422020c
d2820008
04220c0d
10121108
d2820009
04260703
10000004
d2820000
04020205
d2820000
04020c08
d2820001
04260100
7e0c5b01
10020d03
10060d08
f800022f
01030711
10000d00
bf8c070f
7e0602f2
f800023f
03000100
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020804
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10020404
c2020115
bf8c007f
d2820001
04061404
c2020119
bf8c007f
d2820001
04060804
c202011d
bf8c007f
06020204
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820002
040a1404
c2020118
bf8c007f
d2820002
040a0804
c200011c
bf8c007f
06040400
f80008cf
00050102
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[3..15]
DCL TEMP[0]
DCL TEMP[1..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.0100,     4.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[4].xxxx
  2: MOV TEMP[1].xy, IN[3].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].w, IMM[0].yyyy
  9: DP4 TEMP[4].x, IMM[0].yyyy, CONST[13]
 10: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[12].xxxx
 11: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[5].x, TEMP[5].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 14: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 15: DP3 TEMP[2].x, TEMP[1].xyzz, IN[2].xyzz
 16: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[1].xyzz
 17: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 18: ADD TEMP[2].xyz, IN[2].xyzz, -TEMP[2].xyzz
 19: MOV TEMP[2].xyz, TEMP[2].xyzz
 20: TEX TEMP[2], TEMP[2], SAMP[0], CUBE
 21: DP4 TEMP[5].x, IMM[0].yyyy, CONST[15]
 22: ADD_SAT TEMP[5].x, TEMP[5].xxxx, CONST[12].zzzz
 23: MUL TEMP[5].x, TEMP[2].wwww, TEMP[5].xxxx
 24: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[2].xyzz, IMM[0].yyyy
 25: DP4 TEMP[2].x, IMM[0].yyyy, CONST[14]
 26: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[12].yyyy
 27: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].yyyy
 28: MUL TEMP[2], TEMP[3], TEMP[2]
 29: DP3 TEMP[3].x, TEMP[1].xyzz, CONST[4].xyzz
 30: ADD TEMP[3].x, TEMP[3].xxxx, CONST[8].wwww
 31: MOV_SAT TEMP[5].x, TEMP[3].xxxx
 32: LRP TEMP[5].xyz, TEMP[5].xxxx, CONST[6].xyzz, CONST[7].xyzz
 33: MOV_SAT TEMP[6].x, -TEMP[3].xxxx
 34: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[8].xyzz, CONST[7].xyzz
 35: SLT TEMP[7].x, TEMP[3].xxxx, IMM[0].wwww
 36: F2I TEMP[7].x, -TEMP[7]
 37: UIF TEMP[7].xxxx :1
 38:   MOV TEMP[6].xyz, TEMP[6].xyzx
 39: ELSE :1
 40:   MOV TEMP[6].xyz, TEMP[5].xyzx
 41: ENDIF
 42: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz
 43: RSQ TEMP[5].x, TEMP[5].xxxx
 44: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx
 45: ADD TEMP[5].xyz, CONST[5].xyzz, -TEMP[5].xyzz
 46: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz
 47: RSQ TEMP[7].x, TEMP[7].xxxx
 48: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 49: DP3_SAT TEMP[5].x, TEMP[5].xyzz, TEMP[1].xyzz
 50: POW TEMP[5].x, TEMP[5].xxxx, CONST[11].xxxx
 51: SGE TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx
 52: F2I TEMP[3].x, -TEMP[3]
 53: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy
 54: MUL TEMP[3].x, TEMP[5].xxxx, TEMP[3].xxxx
 55: MUL TEMP[3].xyz, CONST[9].xyzz, TEMP[3].xxxx
 56: MOV TEMP[1].xyz, TEMP[1].xyzz
 57: TEX TEMP[1].xyz, TEMP[1], SAMP[2], CUBE
 58: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].yyyy, TEMP[6].xyzz
 59: MOV TEMP[5].xy, IN[3].xyyy
 60: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 61: MUL TEMP[5], TEMP[5], CONST[10]
 62: LRP TEMP[1].xyz, TEMP[5].wwww, TEMP[5].xyzz, TEMP[1].xyzz
 63: LRP TEMP[3].xyz, TEMP[5].wwww, IMM[0].wwww, TEMP[3].xyzz
 64: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz
 65: MAD TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xxxx, TEMP[1].xyzz
 66: MAX TEMP[1].x, IN[2].wwww, CONST[3].wwww
 67: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 68: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[3].xyzz
 69: MOV OUT[0], TEMP[2]
 70: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252)
  %65 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
  %67 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
  %71 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
  %75 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = fcmp ugt float %16, 0.000000e+00
  %78 = select i1 %77, float 1.000000e+00, float 0.000000e+00
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %91 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %92 = call float @llvm.AMDIL.clamp.(float %78, float 0.000000e+00, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %96 = bitcast float %92 to i32
  %97 = icmp ne i32 %96, 0
  %. = select i1 %97, float -1.000000e+00, float 1.000000e+00
  %98 = fmul float 1.000000e+00, %53
  %99 = fmul float 1.000000e+00, %54
  %100 = fadd float %98, %99
  %101 = fmul float 1.000000e+00, %55
  %102 = fadd float %100, %101
  %103 = fmul float 1.000000e+00, %56
  %104 = fadd float %102, %103
  %105 = fadd float %104, %50
  %106 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
  %107 = fmul float %89, %89
  %108 = fmul float %90, %90
  %109 = fadd float %108, %107
  %110 = fmul float %91, %91
  %111 = fadd float %109, %110
  %112 = call float @llvm.AMDGPU.rsq(float %111)
  %113 = fmul float %89, %112
  %114 = fmul float %90, %112
  %115 = fmul float %91, %112
  %116 = fmul float %113, %.
  %117 = fmul float %114, %.
  %118 = fmul float %115, %.
  %119 = fmul float %116, %83
  %120 = fmul float %117, %84
  %121 = fadd float %120, %119
  %122 = fmul float %118, %85
  %123 = fadd float %121, %122
  %124 = fmul float %123, %116
  %125 = fmul float %123, %117
  %126 = fmul float %123, %118
  %127 = fmul float 2.000000e+00, %124
  %128 = fmul float 2.000000e+00, %125
  %129 = fmul float 2.000000e+00, %126
  %130 = fsub float -0.000000e+00, %127
  %131 = fadd float %83, %130
  %132 = fsub float -0.000000e+00, %128
  %133 = fadd float %84, %132
  %134 = fsub float -0.000000e+00, %129
  %135 = fadd float %85, %134
  %136 = insertelement <4 x float> undef, float %131, i32 0
  %137 = insertelement <4 x float> %136, float %133, i32 1
  %138 = insertelement <4 x float> %137, float %135, i32 2
  %139 = insertelement <4 x float> %138, float 0.000000e+00, i32 3
  %140 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %139)
  %141 = extractelement <4 x float> %140, i32 0
  %142 = extractelement <4 x float> %140, i32 1
  %143 = extractelement <4 x float> %140, i32 2
  %144 = extractelement <4 x float> %140, i32 3
  %145 = call float @fabs(float %143)
  %146 = fdiv float 1.000000e+00, %145
  %147 = fmul float %141, %146
  %148 = fadd float %147, 1.500000e+00
  %149 = fmul float %142, %146
  %150 = fadd float %149, 1.500000e+00
  %151 = bitcast float %150 to i32
  %152 = bitcast float %148 to i32
  %153 = bitcast float %144 to i32
  %154 = insertelement <4 x i32> undef, i32 %151, i32 0
  %155 = insertelement <4 x i32> %154, i32 %152, i32 1
  %156 = insertelement <4 x i32> %155, i32 %153, i32 2
  %157 = insertelement <4 x i32> %156, i32 undef, i32 3
  %158 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %157, <32 x i8> %66, <16 x i8> %68, i32 4)
  %159 = extractelement <4 x float> %158, i32 0
  %160 = extractelement <4 x float> %158, i32 1
  %161 = extractelement <4 x float> %158, i32 2
  %162 = extractelement <4 x float> %158, i32 3
  %163 = fmul float 1.000000e+00, %61
  %164 = fmul float 1.000000e+00, %62
  %165 = fadd float %163, %164
  %166 = fmul float 1.000000e+00, %63
  %167 = fadd float %165, %166
  %168 = fmul float 1.000000e+00, %64
  %169 = fadd float %167, %168
  %170 = fadd float %169, %52
  %171 = call float @llvm.AMDIL.clamp.(float %170, float 0.000000e+00, float 1.000000e+00)
  %172 = fmul float %162, %171
  %173 = call float @llvm.AMDGPU.lrp(float %172, float %159, float 1.000000e+00)
  %174 = call float @llvm.AMDGPU.lrp(float %172, float %160, float 1.000000e+00)
  %175 = call float @llvm.AMDGPU.lrp(float %172, float %161, float 1.000000e+00)
  %176 = fmul float 1.000000e+00, %57
  %177 = fmul float 1.000000e+00, %58
  %178 = fadd float %176, %177
  %179 = fmul float 1.000000e+00, %59
  %180 = fadd float %178, %179
  %181 = fmul float 1.000000e+00, %60
  %182 = fadd float %180, %181
  %183 = fadd float %182, %51
  %184 = call float @llvm.AMDIL.clamp.(float %183, float 0.000000e+00, float 1.000000e+00)
  %185 = call float @llvm.AMDGPU.lrp(float %184, float %79, float 1.000000e+00)
  %186 = call float @llvm.AMDGPU.lrp(float %184, float %80, float 1.000000e+00)
  %187 = call float @llvm.AMDGPU.lrp(float %184, float %81, float 1.000000e+00)
  %188 = call float @llvm.AMDGPU.lrp(float %184, float %82, float 1.000000e+00)
  %189 = fmul float %173, %185
  %190 = fmul float %174, %186
  %191 = fmul float %175, %187
  %192 = fmul float 1.000000e+00, %188
  %193 = fmul float %116, %26
  %194 = fmul float %117, %27
  %195 = fadd float %194, %193
  %196 = fmul float %118, %28
  %197 = fadd float %195, %196
  %198 = fadd float %197, %41
  %199 = call float @llvm.AMDIL.clamp.(float %198, float 0.000000e+00, float 1.000000e+00)
  %200 = call float @llvm.AMDGPU.lrp(float %199, float %32, float %35)
  %201 = call float @llvm.AMDGPU.lrp(float %199, float %33, float %36)
  %202 = call float @llvm.AMDGPU.lrp(float %199, float %34, float %37)
  %203 = fsub float -0.000000e+00, %198
  %204 = call float @llvm.AMDIL.clamp.(float %203, float 0.000000e+00, float 1.000000e+00)
  %205 = call float @llvm.AMDGPU.lrp(float %204, float %38, float %35)
  %206 = call float @llvm.AMDGPU.lrp(float %204, float %39, float %36)
  %207 = call float @llvm.AMDGPU.lrp(float %204, float %40, float %37)
  %208 = fcmp ult float %198, 0.000000e+00
  %209 = select i1 %208, float 1.000000e+00, float 0.000000e+00
  %210 = fsub float -0.000000e+00, %209
  %211 = fptosi float %210 to i32
  %212 = bitcast i32 %211 to float
  %213 = bitcast float %212 to i32
  %214 = icmp ne i32 %213, 0
  %temp24.0 = select i1 %214, float %205, float %200
  %temp25.0 = select i1 %214, float %206, float %201
  %temp26.0 = select i1 %214, float %207, float %202
  %215 = fmul float %83, %83
  %216 = fmul float %84, %84
  %217 = fadd float %216, %215
  %218 = fmul float %85, %85
  %219 = fadd float %217, %218
  %220 = call float @llvm.AMDGPU.rsq(float %219)
  %221 = fmul float %83, %220
  %222 = fmul float %84, %220
  %223 = fmul float %85, %220
  %224 = fsub float -0.000000e+00, %221
  %225 = fadd float %29, %224
  %226 = fsub float -0.000000e+00, %222
  %227 = fadd float %30, %226
  %228 = fsub float -0.000000e+00, %223
  %229 = fadd float %31, %228
  %230 = fmul float %225, %225
  %231 = fmul float %227, %227
  %232 = fadd float %231, %230
  %233 = fmul float %229, %229
  %234 = fadd float %232, %233
  %235 = call float @llvm.AMDGPU.rsq(float %234)
  %236 = fmul float %225, %235
  %237 = fmul float %227, %235
  %238 = fmul float %229, %235
  %239 = fmul float %236, %116
  %240 = fmul float %237, %117
  %241 = fadd float %240, %239
  %242 = fmul float %238, %118
  %243 = fadd float %241, %242
  %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00)
  %245 = call float @llvm.pow.f32(float %244, float %49)
  %246 = fcmp uge float %198, 0x3F847AE140000000
  %247 = select i1 %246, float 1.000000e+00, float 0.000000e+00
  %248 = fsub float -0.000000e+00, %247
  %249 = fptosi float %248 to i32
  %250 = bitcast i32 %249 to float
  %251 = bitcast float %250 to i32
  %252 = and i32 %251, 1065353216
  %253 = bitcast i32 %252 to float
  %254 = fmul float %245, %253
  %255 = fmul float %42, %254
  %256 = fmul float %43, %254
  %257 = fmul float %44, %254
  %258 = insertelement <4 x float> undef, float %116, i32 0
  %259 = insertelement <4 x float> %258, float %117, i32 1
  %260 = insertelement <4 x float> %259, float %118, i32 2
  %261 = insertelement <4 x float> %260, float 0.000000e+00, i32 3
  %262 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %261)
  %263 = extractelement <4 x float> %262, i32 0
  %264 = extractelement <4 x float> %262, i32 1
  %265 = extractelement <4 x float> %262, i32 2
  %266 = extractelement <4 x float> %262, i32 3
  %267 = call float @fabs(float %265)
  %268 = fdiv float 1.000000e+00, %267
  %269 = fmul float %263, %268
  %270 = fadd float %269, 1.500000e+00
  %271 = fmul float %264, %268
  %272 = fadd float %271, 1.500000e+00
  %273 = bitcast float %272 to i32
  %274 = bitcast float %270 to i32
  %275 = bitcast float %266 to i32
  %276 = insertelement <4 x i32> undef, i32 %273, i32 0
  %277 = insertelement <4 x i32> %276, i32 %274, i32 1
  %278 = insertelement <4 x i32> %277, i32 %275, i32 2
  %279 = insertelement <4 x i32> %278, i32 undef, i32 3
  %280 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %279, <32 x i8> %74, <16 x i8> %76, i32 4)
  %281 = extractelement <4 x float> %280, i32 0
  %282 = extractelement <4 x float> %280, i32 1
  %283 = extractelement <4 x float> %280, i32 2
  %284 = fmul float %281, 4.000000e+00
  %285 = fadd float %284, %temp24.0
  %286 = fmul float %282, 4.000000e+00
  %287 = fadd float %286, %temp25.0
  %288 = fmul float %283, 4.000000e+00
  %289 = fadd float %288, %temp26.0
  %290 = bitcast float %87 to i32
  %291 = bitcast float %88 to i32
  %292 = insertelement <2 x i32> undef, i32 %290, i32 0
  %293 = insertelement <2 x i32> %292, i32 %291, i32 1
  %294 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %293, <32 x i8> %70, <16 x i8> %72, i32 2)
  %295 = extractelement <4 x float> %294, i32 0
  %296 = extractelement <4 x float> %294, i32 1
  %297 = extractelement <4 x float> %294, i32 2
  %298 = extractelement <4 x float> %294, i32 3
  %299 = fmul float %295, %45
  %300 = fmul float %296, %46
  %301 = fmul float %297, %47
  %302 = fmul float %298, %48
  %303 = call float @llvm.AMDGPU.lrp(float %302, float %299, float %285)
  %304 = call float @llvm.AMDGPU.lrp(float %302, float %300, float %287)
  %305 = call float @llvm.AMDGPU.lrp(float %302, float %301, float %289)
  %306 = call float @llvm.AMDGPU.lrp(float %302, float 0.000000e+00, float %255)
  %307 = call float @llvm.AMDGPU.lrp(float %302, float 0.000000e+00, float %256)
  %308 = call float @llvm.AMDGPU.lrp(float %302, float 0.000000e+00, float %257)
  %309 = fmul float %189, %303
  %310 = fmul float %190, %304
  %311 = fmul float %191, %305
  %312 = fmul float %306, %106
  %313 = fadd float %312, %309
  %314 = fmul float %307, %106
  %315 = fadd float %314, %310
  %316 = fmul float %308, %106
  %317 = fadd float %316, %311
  %318 = fcmp uge float %86, %25
  %319 = select i1 %318, float %86, float %25
  %320 = call float @llvm.AMDIL.clamp.(float %319, float 0.000000e+00, float 1.000000e+00)
  %321 = call float @llvm.AMDGPU.lrp(float %320, float %313, float %22)
  %322 = call float @llvm.AMDGPU.lrp(float %320, float %315, float %23)
  %323 = call float @llvm.AMDGPU.lrp(float %320, float %317, float %24)
  %324 = call i32 @llvm.SI.packf16(float %321, float %322)
  %325 = bitcast i32 %324 to float
  %326 = call i32 @llvm.SI.packf16(float %323, float %192)
  %327 = bitcast i32 %326 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %325, float %327, float %325, float %327)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180b00
c8190b01
c8140a00
c8150a01
10060b05
d2820004
040e0d06
c80c0c00
c80d0c01
d2820004
04120703
7e085b04
100c0906
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000002
0021e6f2
10120506
100a0905
10100505
c8380400
c8390401
100a1d08
c83c0500
c83d0501
d2820005
04161f09
10060903
10140503
c8400600
c8410601
d2820002
0416210a
10061302
d2820003
040e1302
0824070f
10061102
d2820003
040e1102
0822070e
10061502
d2820002
040e1502
08260510
7e280280
d28a0017
044e2511
d28c0016
044e2511
d28e0018
044e2511
d2880019
044e2511
d2060102
02010118
7e045502
7e2a02ff
3fc00000
d2820018
04560516
d2820017
04560517
c0840300
c0c60500
bf8c007f
f0800f00
00430417
c0840100
bf8c0070
c200093c
c200893d
bf8c007f
7e040201
d2060002
02020400
c200093e
bf8c007f
06040400
c200093f
bf8c007f
06040400
c2000932
bf8c007f
06040400
d2060802
02010102
10180507
081a18f2
d2820016
04360b0c
c85c0100
c85d0101
c2000938
c2008939
bf8c007f
7e040201
d2060002
02020400
c200093a
bf8c007f
06040400
c200093b
bf8c007f
06040400
c2000931
bf8c007f
06040400
d2060802
02010102
080604f2
d2820017
040e2f02
103c2f16
7e160314
d28a0017
042a1308
d28c0016
042a1308
d28e0018
042a1308
d2880019
042a1308
d2060111
02010118
7e225511
d2820018
04562316
d2820017
04562317
c0860308
c0c80510
bf8c007f
f0800700
00641117
c2000910
bf8c0070
10281000
c2000911
bf8c007f
d2820014
04500109
c2000912
bf8c007f
d2820014
0450010a
c2000923
bf8c007f
063e2800
d2060814
0201011f
082a28f2
c200091d
bf8c007f
102c2a00
c2008919
bf8c007f
d2820018
04580314
d2060016
2201011f
d2060816
02010116
082e2cf2
10322e00
c2000921
bf8c007f
d2820019
04640116
d0020000
0201011f
d200001a
0001e480
d206001a
2201011a
7e34111a
d10a0000
0201011a
d2000018
00023318
d2820020
0461ed12
c8640900
c8650901
c8600800
c8610801
c0860304
c0c80508
bf8c007f
f0800f00
00641a18
c201092b
bf8c0070
10303a02
083230f2
10404119
c2010929
bf8c007f
10423602
d2820020
04824318
103c411e
10401d0e
d2820020
04821f0f
d2820020
04822110
7e405b20
101e410f
c2010915
bf8c007f
081e1e02
101c410e
c2010914
bf8c007f
081c1c02
10421d0e
d2820021
04861f0f
10204110
c2010916
bf8c007f
08202002
d2820020
04862110
7e405b20
101e410f
101c410e
101c110e
d282000e
043a130f
101e4110
d2820008
043a150f
d2060808
02010108
7e104f08
c201092c
bf8c007f
0e101002
7e104b08
7e1202ff
3c23d70a
d00c0002
0202131f
d2000009
0009e480
d2060009
22010109
7e121109
361212f2
10101308
c2010925
bf8c007f
10121002
10121319
d282000a
04250118
c2010934
c2018935
bf8c007f
7e120203
d2060009
02021202
c2010936
bf8c007f
06121202
c2010937
bf8c007f
06121202
c2010930
bf8c007f
06121202
d2060809
02010109
d282000e
047a130a
c8280700
c8290701
c201090f
bf8c007f
d00c0004
0200050a
7e160202
d200000a
0012150b
d206080a
0201010a
081614f2
c201090d
bf8c007f
101e1602
d282000e
043e1d0a
d282000f
0436090c
c8400000
c8410001
d2820010
040e2102
101e210f
c201091c
bf8c007f
10202a02
c2018918
bf8c007f
d2820010
04400714
103c2e02
c2010920
bf8c007f
d282001e
04780516
d2000010
00023d10
d2820010
0441ed11
10202119
c2010928
bf8c007f
103c3402
d2820010
04423d18
101e210f
c2010924
bf8c007f
10201002
10202119
d2820010
04410118
d282000f
043e1310
c201090c
bf8c007f
10201602
d282000f
04421f0a
5e1c1d0f
d2820004
04360d0c
c8140200
c8150201
d2820005
040e0b02
10080b04
c201091e
bf8c007f
100a2a02
c201891a
bf8c007f
d2820005
04140714
100c2e02
c2010922
bf8c007f
d2820006
04180516
d2000005
00020d05
d2820005
0415ed13
100a0b19
c200092a
bf8c007f
100c3800
d2820005
04160d18
10080b04
c2000926
bf8c007f
100a1000
100a0b19
d2820005
04150118
d2820004
04121305
c200090e
bf8c007f
100a1600
d2820004
0416090a
c8140300
c8150301
d2820000
040e0b02
5e000104
f8001c0f
000e000e
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..15]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2].xyz, IN[1].xyzz, CONST[13].wwww
  6: MUL TEMP[3], CONST[0], TEMP[2].xxxx
  7: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
  8: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
  9: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 10: RSQ TEMP[3].x, TEMP[3].xxxx
 11: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 12: MUL TEMP[3], CONST[0], TEMP[0].xxxx
 13: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
 14: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 15: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 16: MOV TEMP[3].w, IMM[0].yyyy
 17: MOV TEMP[3].xyz, TEMP[0].xyzx
 18: MOV TEMP[4].w, IMM[0].yyyy
 19: MOV TEMP[4].xyz, TEMP[0].xyzx
 20: DP4 TEMP[3].x, CONST[14], TEMP[3]
 21: DP4 TEMP[4].x, CONST[15], TEMP[4]
 22: MOV TEMP[3].y, TEMP[4].xxxx
 23: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[11].xyzz
 24: MAD TEMP[4].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 25: MOV TEMP[0].w, TEMP[4].xxxx
 26: MOV TEMP[4].zw, TEMP[2].yyxy
 27: MOV TEMP[2].x, TEMP[2].zzzz
 28: MOV TEMP[4].xy, TEMP[3].xyxx
 29: MOV OUT[1], CONST[8]
 30: MOV OUT[4], TEMP[2]
 31: MOV OUT[2], TEMP[0]
 32: MOV OUT[0], TEMP[1]
 33: MOV OUT[3], TEMP[4]
 34: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = fmul float %66, %46
  %76 = fadd float %75, %43
  %77 = fmul float %67, %47
  %78 = fadd float %77, %44
  %79 = fmul float %68, %48
  %80 = fadd float %79, %45
  %81 = fmul float %23, %76
  %82 = fmul float %24, %76
  %83 = fmul float %25, %76
  %84 = fmul float %26, %76
  %85 = fmul float %27, %78
  %86 = fadd float %85, %81
  %87 = fmul float %28, %78
  %88 = fadd float %87, %82
  %89 = fmul float %29, %78
  %90 = fadd float %89, %83
  %91 = fmul float %30, %78
  %92 = fadd float %91, %84
  %93 = fmul float %31, %80
  %94 = fadd float %93, %86
  %95 = fmul float %32, %80
  %96 = fadd float %95, %88
  %97 = fmul float %33, %80
  %98 = fadd float %97, %90
  %99 = fmul float %34, %80
  %100 = fadd float %99, %92
  %101 = fadd float %94, %35
  %102 = fadd float %96, %36
  %103 = fadd float %98, %37
  %104 = fadd float %100, %38
  %105 = fmul float %72, %54
  %106 = fmul float %73, %54
  %107 = fmul float %74, %54
  %108 = fmul float %11, %105
  %109 = fmul float %12, %105
  %110 = fmul float %13, %105
  %111 = fmul float %14, %106
  %112 = fadd float %111, %108
  %113 = fmul float %15, %106
  %114 = fadd float %113, %109
  %115 = fmul float %16, %106
  %116 = fadd float %115, %110
  %117 = fmul float %17, %107
  %118 = fadd float %117, %112
  %119 = fmul float %18, %107
  %120 = fadd float %119, %114
  %121 = fmul float %19, %107
  %122 = fadd float %121, %116
  %123 = fmul float %118, %118
  %124 = fmul float %120, %120
  %125 = fadd float %124, %123
  %126 = fmul float %122, %122
  %127 = fadd float %125, %126
  %128 = call float @llvm.AMDGPU.rsq(float %127)
  %129 = fmul float %118, %128
  %130 = fmul float %120, %128
  %131 = fmul float %122, %128
  %132 = fmul float %11, %76
  %133 = fmul float %12, %76
  %134 = fmul float %13, %76
  %135 = fmul float %14, %78
  %136 = fadd float %135, %132
  %137 = fmul float %15, %78
  %138 = fadd float %137, %133
  %139 = fmul float %16, %78
  %140 = fadd float %139, %134
  %141 = fmul float %17, %80
  %142 = fadd float %141, %136
  %143 = fmul float %18, %80
  %144 = fadd float %143, %138
  %145 = fmul float %19, %80
  %146 = fadd float %145, %140
  %147 = fadd float %142, %20
  %148 = fadd float %144, %21
  %149 = fadd float %146, %22
  %150 = fmul float %55, %147
  %151 = fmul float %56, %148
  %152 = fadd float %150, %151
  %153 = fmul float %57, %149
  %154 = fadd float %152, %153
  %155 = fmul float %58, 1.000000e+00
  %156 = fadd float %154, %155
  %157 = fmul float %59, %147
  %158 = fmul float %60, %148
  %159 = fadd float %157, %158
  %160 = fmul float %61, %149
  %161 = fadd float %159, %160
  %162 = fmul float %62, 1.000000e+00
  %163 = fadd float %161, %162
  %164 = fsub float -0.000000e+00, %49
  %165 = fadd float %147, %164
  %166 = fsub float -0.000000e+00, %50
  %167 = fadd float %148, %166
  %168 = fsub float -0.000000e+00, %51
  %169 = fadd float %149, %168
  %170 = fmul float %103, %52
  %171 = fadd float %170, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %165, float %167, float %169, float %171)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %156, float %163, float %129, float %130)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %131, float %130, float %131, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %101, float %102, float %103, float %104)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c2020129
c2028125
bf8c0070
7e020205
d2820001
04040904
c2020128
c2028124
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012a
c2028126
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820008
04140904
c2020102
bf8c007f
100a0404
c2028106
bf8c007f
d2820005
04160205
c204010a
bf8c007f
d2820005
04160608
c204810e
bf8c007f
060a0a09
c204812e
bf8c007f
0a120a09
c2048101
bf8c007f
100c0409
c2058105
bf8c007f
d2820006
041a020b
c2050109
bf8c007f
d2820006
041a060a
c206010d
bf8c007f
060c0c0c
c206012d
bf8c007f
0a140c0c
c2060100
bf8c007f
100e040c
c2068104
bf8c007f
d2820007
041e020d
c2070108
bf8c007f
d2820007
041e060e
c207810c
bf8c007f
060e0e0f
c207812c
bf8c007f
0a160e0f
f800021f
08090a0b
c0880704
bf8c000f
e00c2000
80040900
c2030137
bf8c0070
10001406
10101206
101a1009
d282000d
0436000b
10121606
d282000a
0436120a
1016100c
d282000b
042e000d
d282000b
042e120e
1018170b
d282000c
0432150a
10101004
d2820000
04220005
d2820000
04021208
d2820008
04320100
7e125b08
1010130a
1014130b
c202013d
bf8c007f
10160c04
c202013c
bf8c007f
d282000b
042e0e04
c202013e
bf8c007f
d282000b
042e0a04
c202013f
bf8c007f
06161604
c2020139
bf8c007f
100c0c04
c2020138
bf8c007f
d2820006
041a0e04
c202013a
bf8c007f
d2820005
041a0a04
c202013b
bf8c007f
060a0a04
f800022f
080a0b05
10001300
bf8c070f
7e0a0280
f800023f
05000800
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[9..10]
DCL CONST[2..8]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: DP4 TEMP[2].x, TEMP[1], CONST[8]
  5: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].yyyy
  6: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].xxxx
  7: MUL TEMP[1], TEMP[1], TEMP[2]
  8: MAX TEMP[2].x, IN[2].wwww, CONST[4].wwww
  9: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 10: MUL TEMP[1], TEMP[1], TEMP[2].xxxx
 11: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[2].xy, TEMP[2].xyyy
 13: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
 14: MAD TEMP[2].x, TEMP[2].xxxx, CONST[3].zzzz, CONST[3].wwww
 15: RCP TEMP[2].x, TEMP[2].xxxx
 16: ADD TEMP[2].x, TEMP[2].xxxx, -IN[3].zzzz
 17: MUL_SAT TEMP[2].x, TEMP[2].xxxx, IN[3].wwww
 18: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx
 19: MOV TEMP[1].w, TEMP[2].xxxx
 20: MOV OUT[0], TEMP[1]
 21: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %34 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %35 = load <32 x i8> addrspace(2)* %34, !tbaa !0
  %36 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0
  %38 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %39 = load <32 x i8> addrspace(2)* %38, !tbaa !0
  %40 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0
  %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %48 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %49 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %51 = fmul float %13, %32
  %52 = fadd float %51, %33
  %53 = bitcast float %47 to i32
  %54 = bitcast float %48 to i32
  %55 = insertelement <2 x i32> undef, i32 %53, i32 0
  %56 = insertelement <2 x i32> %55, i32 %54, i32 1
  %57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %35, <16 x i8> %37, i32 2)
  %58 = extractelement <4 x float> %57, i32 0
  %59 = extractelement <4 x float> %57, i32 1
  %60 = extractelement <4 x float> %57, i32 2
  %61 = extractelement <4 x float> %57, i32 3
  %62 = fmul float %58, %28
  %63 = fmul float %59, %29
  %64 = fadd float %62, %63
  %65 = fmul float %60, %30
  %66 = fadd float %64, %65
  %67 = fmul float %61, %31
  %68 = fadd float %66, %67
  %69 = fadd float %68, %27
  %70 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %71 = call float @llvm.AMDGPU.lrp(float %70, float %42, float 1.000000e+00)
  %72 = call float @llvm.AMDGPU.lrp(float %70, float %43, float 1.000000e+00)
  %73 = call float @llvm.AMDGPU.lrp(float %70, float %44, float 1.000000e+00)
  %74 = call float @llvm.AMDGPU.lrp(float %70, float %45, float 1.000000e+00)
  %75 = fmul float %58, %71
  %76 = fmul float %59, %72
  %77 = fmul float %60, %73
  %78 = fmul float %61, %74
  %79 = fcmp uge float %46, %26
  %80 = select i1 %79, float %46, float %26
  %81 = call float @llvm.AMDIL.clamp.(float %80, float 0.000000e+00, float 1.000000e+00)
  %82 = fmul float %75, %81
  %83 = fmul float %76, %81
  %84 = fmul float %77, %81
  %85 = fmul float %78, %81
  %86 = fmul float %12, %22
  %87 = fmul float %52, %23
  %88 = bitcast float %86 to i32
  %89 = bitcast float %87 to i32
  %90 = insertelement <2 x i32> undef, i32 %88, i32 0
  %91 = insertelement <2 x i32> %90, i32 %89, i32 1
  %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %39, <16 x i8> %41, i32 2)
  %93 = extractelement <4 x float> %92, i32 0
  %94 = fmul float %93, %24
  %95 = fadd float %94, %25
  %96 = fdiv float 1.000000e+00, %95
  %97 = fsub float -0.000000e+00, %49
  %98 = fadd float %96, %97
  %99 = fmul float %98, %50
  %100 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00)
  %101 = fmul float %85, %100
  %102 = call i32 @llvm.SI.packf16(float %82, float %83)
  %103 = bitcast i32 %102 to float
  %104 = call i32 @llvm.SI.packf16(float %84, float %101)
  %105 = bitcast i32 %104 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %103, float %105, float %103, float %105)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
c0840100
bf8c0070
c2000921
bf8c007f
10100a00
c2000920
bf8c007f
d2820008
04200104
c2000922
bf8c007f
d2820008
04200106
c2000923
bf8c007f
d2820008
04200107
c2000919
bf8c007f
06101000
d2060809
02010108
081612f2
c8200100
c8210101
d2820008
042e1109
10101105
c8280700
c8290701
c2000913
bf8c007f
d00c000c
0200010a
7e180200
d200000a
0032150c
d206080c
0201010a
10101908
c8280000
c8290001
d282000a
042e1509
10141504
1014190a
5e10110a
c8280200
c8290201
d282000a
042e1509
10141506
1014190a
c8340300
c8350301
d2820009
042e1b09
10081307
10081904
c8140a00
c8150a01
c2000928
c2008929
bf8c007f
7e0c0201
d2820003
04180103
c2000909
bf8c007f
100e0600
c2000908
bf8c007f
100c0400
c0800304
c0c60508
bf8c007f
f0800100
00030206
c200090e
c200890f
bf8c0070
7e060201
d2820002
040c0102
7e045502
08040b02
c80c0b00
c80d0b01
10000702
d2060800
02010100
10000104
5e00010a
f8001c0f
00080008
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..18]
DCL TEMP[0..3], LOCAL
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[15].xyzz, CONST[14].xyzz
  1: MUL TEMP[1], CONST[8], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[9], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[10], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[11]
  5: MUL TEMP[2], CONST[0], TEMP[0].xxxx
  6: MAD TEMP[2], CONST[1], TEMP[0].yyyy, TEMP[2]
  7: MAD TEMP[2], CONST[2], TEMP[0].zzzz, TEMP[2]
  8: ADD TEMP[2].xyz, TEMP[2], CONST[3]
  9: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[16].xyzz
 10: MAD TEMP[3].x, TEMP[1].zzzz, CONST[17].xxxx, CONST[17].yyyy
 11: MOV TEMP[2].w, TEMP[3].xxxx
 12: MUL TEMP[3], CONST[4], TEMP[0].xxxx
 13: MAD TEMP[3], CONST[5], TEMP[0].yyyy, TEMP[3]
 14: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[3]
 15: ADD TEMP[0].z, TEMP[0], CONST[7]
 16: MAD TEMP[0].x, TEMP[0].zzzz, CONST[18].xxxx, CONST[18].yyyy
 17: MOV TEMP[0].y, CONST[18].zzzz
 18: MAD TEMP[3].xy, IN[1].xyyy, CONST[13].xyyy, CONST[13].zwww
 19: MOV TEMP[3].zw, TEMP[0].yyxy
 20: MOV OUT[3], TEMP[3]
 21: MOV OUT[1], CONST[12]
 22: MOV OUT[2], TEMP[2]
 23: MOV OUT[0], TEMP[1]
 24: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 204)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 288)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 292)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 296)
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = extractelement <4 x float> %67, i32 2
  %71 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %5)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = fmul float %68, %54
  %77 = fadd float %76, %51
  %78 = fmul float %69, %55
  %79 = fadd float %78, %52
  %80 = fmul float %70, %56
  %81 = fadd float %80, %53
  %82 = fmul float %27, %77
  %83 = fmul float %28, %77
  %84 = fmul float %29, %77
  %85 = fmul float %30, %77
  %86 = fmul float %31, %79
  %87 = fadd float %86, %82
  %88 = fmul float %32, %79
  %89 = fadd float %88, %83
  %90 = fmul float %33, %79
  %91 = fadd float %90, %84
  %92 = fmul float %34, %79
  %93 = fadd float %92, %85
  %94 = fmul float %35, %81
  %95 = fadd float %94, %87
  %96 = fmul float %36, %81
  %97 = fadd float %96, %89
  %98 = fmul float %37, %81
  %99 = fadd float %98, %91
  %100 = fmul float %38, %81
  %101 = fadd float %100, %93
  %102 = fadd float %95, %39
  %103 = fadd float %97, %40
  %104 = fadd float %99, %41
  %105 = fadd float %101, %42
  %106 = fmul float %11, %77
  %107 = fmul float %12, %77
  %108 = fmul float %13, %77
  %109 = fmul float %14, %79
  %110 = fadd float %109, %106
  %111 = fmul float %15, %79
  %112 = fadd float %111, %107
  %113 = fmul float %16, %79
  %114 = fadd float %113, %108
  %115 = fmul float %17, %81
  %116 = fadd float %115, %110
  %117 = fmul float %18, %81
  %118 = fadd float %117, %112
  %119 = fmul float %19, %81
  %120 = fadd float %119, %114
  %121 = fadd float %116, %20
  %122 = fadd float %118, %21
  %123 = fadd float %120, %22
  %124 = fsub float -0.000000e+00, %57
  %125 = fadd float %121, %124
  %126 = fsub float -0.000000e+00, %58
  %127 = fadd float %122, %126
  %128 = fsub float -0.000000e+00, %59
  %129 = fadd float %123, %128
  %130 = fmul float %104, %60
  %131 = fadd float %130, %61
  %132 = fmul float %23, %77
  %133 = fmul float %24, %79
  %134 = fadd float %133, %132
  %135 = fmul float %25, %81
  %136 = fadd float %135, %134
  %137 = fadd float %136, %26
  %138 = fmul float %137, %62
  %139 = fadd float %138, %63
  %140 = fmul float %74, %47
  %141 = fadd float %140, %49
  %142 = fmul float %75, %48
  %143 = fadd float %142, %50
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %43, float %44, float %45, float %46)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %125, float %127, float %129, float %131)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %141, float %143, float %139, float %64)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %102, float %103, float %104, float %105)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020133
c2028132
c2040131
c2048130
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202013d
c2028139
bf8c0070
7e020205
d2820001
04040904
c202013c
c2028138
bf8c007f
7e040205
d2820002
04080903
c2020122
bf8c007f
100e0404
c2020126
bf8c007f
d2820007
041e0204
c202013e
c202813a
bf8c007f
7e100205
d2820003
04200905
c202012a
bf8c007f
d2820004
041e0604
c202012e
bf8c007f
06080804
c2020144
c2028145
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2020106
bf8c007f
d2820006
041a0204
c202010a
bf8c007f
d2820006
041a0604
c202010e
bf8c007f
060c0c04
c2020142
bf8c007f
0a0c0c04
c2020101
bf8c007f
100e0404
c2020105
bf8c007f
d2820007
041e0204
c2020109
bf8c007f
d2820007
041e0604
c202010d
bf8c007f
060e0e04
c2020141
bf8c007f
0a0e0e04
c2020100
bf8c007f
10100404
c2020104
bf8c007f
d2820008
04220204
c2020108
bf8c007f
d2820008
04220604
c202010c
bf8c007f
06101004
c2020140
bf8c007f
0a101004
f800021f
05060708
c2020112
bf8c000f
100a0404
c2020116
bf8c007f
d2820005
04160204
c202011a
bf8c007f
d2820005
04160604
c202011e
bf8c007f
060a0a04
c2020148
c2028149
bf8c007f
7e0c0205
d2820005
04180905
c0820704
bf8c007f
e00c2000
80010600
c2020135
c2028137
bf8c0070
7e000205
d2820000
04000907
c2020134
c2028136
bf8c007f
7e140205
d2820006
04280906
c202014a
bf8c007f
7e0e0204
f800022f
07050006
c2020123
bf8c000f
10000404
c2020127
bf8c007f
d2820000
04020204
c202012b
bf8c007f
d2820000
04020604
c202012f
bf8c007f
06000004
c2020121
bf8c007f
100a0404
c2020125
bf8c007f
d2820005
04160204
c2020129
bf8c007f
d2820005
04160604
c202012d
bf8c007f
060a0a04
c2020120
bf8c007f
10040404
c2020124
bf8c007f
d2820001
040a0204
c2020128
bf8c007f
d2820001
04060604
c200012c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[8..9]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy
  2: DP4 TEMP[1].x, IMM[0].xxxx, CONST[6]
  3: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[4].yyyy
  4: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].xxxx
  5: MOV TEMP[2].w, TEMP[1].wwww
  6: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[1].xyyy
  7: MOV TEMP[3].xy, TEMP[3].xyyy
  8: TEX TEMP[3], TEMP[3], SAMP[0], 2D
  9: DP4 TEMP[4].x, IMM[0].xxxx, CONST[5]
 10: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[4].xxxx
 11: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 12: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[0].yzww
 13: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx
 14: RCP TEMP[5].x, TEMP[5].xxxx
 15: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 17: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[1].xyzz
 18: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 19: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 20: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 21: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[2].xyzz
 22: MOV OUT[0], TEMP[2]
 23: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %49 = fmul float %13, %38
  %50 = fadd float %49, %39
  %51 = fmul float 1.000000e+00, %34
  %52 = fmul float 1.000000e+00, %35
  %53 = fadd float %51, %52
  %54 = fmul float 1.000000e+00, %36
  %55 = fadd float %53, %54
  %56 = fmul float 1.000000e+00, %37
  %57 = fadd float %55, %56
  %58 = fadd float %57, %29
  %59 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %60 = call float @llvm.AMDGPU.lrp(float %59, float %44, float 1.000000e+00)
  %61 = call float @llvm.AMDGPU.lrp(float %59, float %45, float 1.000000e+00)
  %62 = call float @llvm.AMDGPU.lrp(float %59, float %46, float 1.000000e+00)
  %63 = call float @llvm.AMDGPU.lrp(float %59, float %47, float 1.000000e+00)
  %64 = fmul float %12, %22
  %65 = fmul float %50, %23
  %66 = bitcast float %64 to i32
  %67 = bitcast float %65 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %41, <16 x i8> %43, i32 2)
  %71 = extractelement <4 x float> %70, i32 0
  %72 = extractelement <4 x float> %70, i32 1
  %73 = extractelement <4 x float> %70, i32 2
  %74 = extractelement <4 x float> %70, i32 3
  %75 = fmul float 1.000000e+00, %30
  %76 = fmul float 1.000000e+00, %31
  %77 = fadd float %75, %76
  %78 = fmul float 1.000000e+00, %32
  %79 = fadd float %77, %78
  %80 = fmul float 1.000000e+00, %33
  %81 = fadd float %79, %80
  %82 = fadd float %81, %28
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = fmul float %83, %74
  %85 = fmul float %71, 0x3FCB367A00000000
  %86 = fmul float %72, 0x3FE6E2EB20000000
  %87 = fadd float %86, %85
  %88 = fmul float %73, 0x3FB27BB300000000
  %89 = fadd float %87, %88
  %90 = fcmp uge float %89, 0x3F50624DE0000000
  %91 = select i1 %90, float %89, float 0x3F50624DE0000000
  %92 = fdiv float 1.000000e+00, %91
  %93 = fmul float %71, %92
  %94 = fmul float %72, %92
  %95 = fmul float %73, %92
  %96 = fmul float %60, %71
  %97 = fmul float %61, %72
  %98 = fmul float %62, %73
  %99 = fmul float %84, %93
  %100 = fadd float %99, %96
  %101 = fmul float %84, %94
  %102 = fadd float %101, %97
  %103 = fmul float %84, %95
  %104 = fadd float %103, %98
  %105 = fmul float %100, 4.000000e+00
  %106 = fmul float %102, 4.000000e+00
  %107 = fmul float %104, 4.000000e+00
  %108 = fcmp uge float %48, %27
  %109 = select i1 %108, float %48, float %27
  %110 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %110, float %105, float %24)
  %112 = call float @llvm.AMDGPU.lrp(float %110, float %106, float %25)
  %113 = call float @llvm.AMDGPU.lrp(float %110, float %107, float %26)
  %114 = call i32 @llvm.SI.packf16(float %111, float %112)
  %115 = bitcast i32 %114 to float
  %116 = call i32 @llvm.SI.packf16(float %113, float %63)
  %117 = bitcast i32 %116 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
c0840100
bf8c007f
c2000924
c2008925
bf8c007f
7e080201
d2820003
04100103
c2000905
bf8c007f
10080600
c2000904
bf8c007f
10060400
c0800300
c0c60500
bf8c007f
f0800f00
00030203
bf8c0770
100c04ff
3e59b3d0
7e0e02ff
3f371759
d2820006
041a0f03
7e0e02ff
3d93dd98
d2820006
041a0f04
7e0e02ff
3a83126f
d00c0000
02020f06
d2000006
00020d07
7e0c5506
10140d03
befc0306
c8240100
c8250101
c2000918
c2008919
bf8c007f
7e0e0201
d2060007
02020e00
c200091a
bf8c007f
060e0e00
c200091b
bf8c007f
060e0e00
c2000911
bf8c007f
060e0e00
d2060807
02010107
08100ef2
d2820009
04221307
10160709
c2000914
c2008915
bf8c007f
7e120201
d2060009
02021200
c2000916
bf8c007f
06121200
c2000917
bf8c007f
06121200
c2000910
bf8c007f
06121200
d2060809
02010109
10120b09
d282000a
042e1509
101814f6
c8280700
c8290701
c200090b
bf8c007f
d00c0002
0200010a
7e160200
d200000a
000a150b
d206080a
0201010a
081614f2
c2000909
bf8c007f
101a1600
d282000c
0436190a
101a0d02
c8380000
c8390001
d282000e
04221d07
101c050e
d282000d
043a1b09
101a1af6
c2000908
bf8c007f
101c1600
d282000d
043a1b0a
5e18190d
100c0d04
c8340200
c8350201
d282000d
04221b07
1004090d
d2820002
040a0d09
100404f6
c200090a
bf8c007f
10061600
d2820002
040e050a
c80c0300
c80d0301
d2820000
04220707
5e000102
f8001c0f
000c000c
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..8]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[6].xyzz, CONST[5].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MUL TEMP[3], CONST[0], TEMP[1].xxxx
  8: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 10: ADD TEMP[0], TEMP[0], CONST[3]
 11: MOV TEMP[2].w, IMM[0].xxxx
 12: MOV TEMP[2].xyz, CONST[4].xyzx
 13: MUL TEMP[2], TEMP[2], IN[3]
 14: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[8].xyzz
 15: MAD TEMP[3].x, TEMP[0].zzzz, CONST[7].xxxx, CONST[7].yyyy
 16: MOV TEMP[1].w, TEMP[3].xxxx
 17: MOV OUT[1], TEMP[2]
 18: MOV OUT[2], TEMP[1]
 19: MOV OUT[0], TEMP[0]
 20: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0
  %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %5)
  %44 = extractelement <4 x float> %43, i32 0
  %45 = extractelement <4 x float> %43, i32 1
  %46 = extractelement <4 x float> %43, i32 2
  %47 = extractelement <4 x float> %43, i32 3
  %48 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %5)
  %51 = extractelement <4 x float> %50, i32 0
  %52 = extractelement <4 x float> %50, i32 1
  %53 = extractelement <4 x float> %50, i32 2
  %54 = extractelement <4 x float> %50, i32 3
  %55 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %5)
  %58 = extractelement <4 x float> %57, i32 0
  %59 = extractelement <4 x float> %57, i32 1
  %60 = extractelement <4 x float> %57, i32 2
  %61 = extractelement <4 x float> %57, i32 3
  %62 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %5)
  %65 = extractelement <4 x float> %64, i32 0
  %66 = extractelement <4 x float> %64, i32 1
  %67 = extractelement <4 x float> %64, i32 2
  %68 = extractelement <4 x float> %64, i32 3
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = fmul float %72, %33
  %76 = fadd float %75, %30
  %77 = fmul float %73, %34
  %78 = fadd float %77, %31
  %79 = fmul float %74, %35
  %80 = fadd float %79, %32
  %81 = fmul float %76, %44
  %82 = fmul float %78, %45
  %83 = fadd float %81, %82
  %84 = fmul float %80, %46
  %85 = fadd float %83, %84
  %86 = fmul float 1.000000e+00, %47
  %87 = fadd float %85, %86
  %88 = fmul float %76, %51
  %89 = fmul float %78, %52
  %90 = fadd float %88, %89
  %91 = fmul float %80, %53
  %92 = fadd float %90, %91
  %93 = fmul float 1.000000e+00, %54
  %94 = fadd float %92, %93
  %95 = fmul float %76, %58
  %96 = fmul float %78, %59
  %97 = fadd float %95, %96
  %98 = fmul float %80, %60
  %99 = fadd float %97, %98
  %100 = fmul float 1.000000e+00, %61
  %101 = fadd float %99, %100
  %102 = fmul float %11, %87
  %103 = fmul float %12, %87
  %104 = fmul float %13, %87
  %105 = fmul float %14, %87
  %106 = fmul float %15, %94
  %107 = fadd float %106, %102
  %108 = fmul float %16, %94
  %109 = fadd float %108, %103
  %110 = fmul float %17, %94
  %111 = fadd float %110, %104
  %112 = fmul float %18, %94
  %113 = fadd float %112, %105
  %114 = fmul float %19, %101
  %115 = fadd float %114, %107
  %116 = fmul float %20, %101
  %117 = fadd float %116, %109
  %118 = fmul float %21, %101
  %119 = fadd float %118, %111
  %120 = fmul float %22, %101
  %121 = fadd float %120, %113
  %122 = fadd float %115, %23
  %123 = fadd float %117, %24
  %124 = fadd float %119, %25
  %125 = fadd float %121, %26
  %126 = fmul float %27, %65
  %127 = fmul float %28, %66
  %128 = fmul float %29, %67
  %129 = fmul float 1.000000e+00, %68
  %130 = fsub float -0.000000e+00, %38
  %131 = fadd float %87, %130
  %132 = fsub float -0.000000e+00, %39
  %133 = fadd float %94, %132
  %134 = fsub float -0.000000e+00, %40
  %135 = fadd float %101, %134
  %136 = fmul float %124, %36
  %137 = fadd float %136, %37
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %126, float %127, float %128, float %129)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %131, float %133, float %135, float %137)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %122, float %123, float %124, float %125)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840710
bf8c000f
e00c2000
80020500
c2020119
c2028115
bf8c0070
7e020205
d2820003
04040906
c0840704
bf8c007f
e00c2000
80020900
bf8c0770
10021503
c2020118
c2028114
bf8c007f
7e040205
d2820004
04080905
d2820001
04061304
c202011a
c2028116
bf8c007f
7e040205
d2820005
04080907
d2820001
04061705
06021901
c0840700
bf8c007f
e00c2000
80020600
bf8c0770
10040f03
d2820002
040a0d04
d2820002
040a1105
06041302
c2020102
bf8c007f
100c0404
c2020106
bf8c007f
d2820006
041a0204
c0820708
bf8c007f
e00c2000
80010700
bf8c0770
10001103
d2820000
04020f04
d2820000
04021305
06001500
c202010a
bf8c007f
d2820003
041a0004
c202010e
bf8c007f
06060604
c202011c
c202811d
bf8c007f
7e080205
d2820004
04100903
c2020122
bf8c007f
0a0a0004
c2020121
bf8c007f
0a0c0204
c2020120
bf8c007f
0a0e0404
f800021f
04050607
c2020103
bf8c000f
10080404
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120004
c202010f
bf8c007f
06080804
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160004
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820000
04060004
c200010c
bf8c007f
06000000
f80008cf
04030500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,    10.0000,     0.0000}
  0: ADD TEMP[0].x, IN[1].xxxx, -IN[1].zzzz
  1: ABS TEMP[0].x, TEMP[0].xxxx
  2: ADD_SAT TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
  3: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx
  4: LRP TEMP[0], TEMP[0].xxxx, CONST[1], IN[0]
  5: SGE TEMP[1].x, IN[1].xxxx, IN[1].yyyy
  6: F2I TEMP[1].x, -TEMP[1]
  7: UIF TEMP[1].xxxx :0
  8:   MOV TEMP[1], IMM[0].xxxx
  9: ELSE :0
 10:   MOV TEMP[1], IMM[0].yyyy
 11: ENDIF
 12: MUL TEMP[1], TEMP[0], TEMP[1]
 13: SGE TEMP[2].x, IN[1].wwww, IMM[0].zzzz
 14: F2I TEMP[2].x, -TEMP[2]
 15: UIF TEMP[2].xxxx :0
 16:   MOV TEMP[2], IMM[0].xxxx
 17: ELSE :0
 18:   MOV TEMP[2], IMM[0].yyyy
 19: ENDIF
 20: MUL TEMP[0], TEMP[1], TEMP[2]
 21: MOV TEMP[1].xy, IN[2].xyyy
 22: TEX TEMP[1], TEMP[1], SAMP[0], 2D
 23: MUL TEMP[0], TEMP[0], TEMP[1]
 24: MOV OUT[0], TEMP[0]
 25: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28)
  %26 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0
  %28 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0
  %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %40 = fsub float -0.000000e+00, %36
  %41 = fadd float %34, %40
  %42 = call float @fabs(float %41)
  %43 = fsub float -0.000000e+00, %42
  %44 = fadd float 1.000000e+00, %43
  %45 = call float @llvm.AMDIL.clamp.(float %44, float 0.000000e+00, float 1.000000e+00)
  %46 = fmul float %45, %45
  %47 = call float @llvm.AMDGPU.lrp(float %46, float %22, float %30)
  %48 = call float @llvm.AMDGPU.lrp(float %46, float %23, float %31)
  %49 = call float @llvm.AMDGPU.lrp(float %46, float %24, float %32)
  %50 = call float @llvm.AMDGPU.lrp(float %46, float %25, float %33)
  %51 = fcmp uge float %34, %35
  %52 = select i1 %51, float 1.000000e+00, float 0.000000e+00
  %53 = fsub float -0.000000e+00, %52
  %54 = fptosi float %53 to i32
  %55 = bitcast i32 %54 to float
  %56 = bitcast float %55 to i32
  %57 = icmp ne i32 %56, 0
  %. = select i1 %57, float 1.000000e+00, float 0.000000e+00
  %58 = fmul float %47, %.
  %59 = fmul float %48, %.
  %60 = fmul float %49, %.
  %61 = fmul float %50, %.
  %62 = fcmp uge float %37, 1.000000e+01
  %63 = select i1 %62, float 1.000000e+00, float 0.000000e+00
  %64 = fsub float -0.000000e+00, %63
  %65 = fptosi float %64 to i32
  %66 = bitcast i32 %65 to float
  %67 = bitcast float %66 to i32
  %68 = icmp ne i32 %67, 0
  %temp8.0 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %69 = fmul float %58, %temp8.0
  %70 = fmul float %59, %temp8.0
  %71 = fmul float %60, %temp8.0
  %72 = fmul float %61, %temp8.0
  %73 = bitcast float %38 to i32
  %74 = bitcast float %39 to i32
  %75 = insertelement <2 x i32> undef, i32 %73, i32 0
  %76 = insertelement <2 x i32> %75, i32 %74, i32 1
  %77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %76, <32 x i8> %27, <16 x i8> %29, i32 2)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = extractelement <4 x float> %77, i32 3
  %82 = fmul float %69, %78
  %83 = fmul float %70, %79
  %84 = fmul float %71, %80
  %85 = fmul float %72, %81
  %86 = call i32 @llvm.SI.packf16(float %82, float %83)
  %87 = bitcast i32 %86 to float
  %88 = call i32 @llvm.SI.packf16(float %84, float %85)
  %89 = bitcast i32 %88 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %87, float %89, float %87, float %89)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readonly
declare float @fabs(float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #3

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #3

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readonly }
attributes #3 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080600
c8090601
c8100400
c8110401
08040504
d2060102
02010102
080404f2
d2060802
02010102
10040502
080604f2
c8140300
c8150301
100a0b03
c0840100
bf8c007f
c2000907
bf8c007f
d2820005
04140102
c8180500
c8190501
d00c0000
02020d04
d2000004
0001e480
d2060004
22010104
7e081104
d10a0000
02010104
d2000004
0001e480
100c0905
c8140700
c8150701
7e0e02ff
41200000
d00c0000
02020f05
d2000005
0001e480
d2060005
22010105
7e0a1105
d10a0000
02010105
d2000005
0001e480
10140b06
c81c0900
c81d0901
c8180800
c8190801
c0800300
c0c60500
bf8c007f
f0800f00
00030606
bf8c0770
1014130a
c82c0200
c82d0201
10161703
c2000906
bf8c007f
d282000b
042c0102
1016090b
10160b0b
1016110b
5e14150b
c82c0100
c82d0101
10161703
c2000905
bf8c007f
d282000b
042c0102
1016090b
10160b0b
10160f0b
c8300000
c8310001
10001903
c2000904
bf8c007f
d2820000
04000102
10000900
10000b00
10000d00
5e001700
f8001c0f
0a000a00
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..7]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     0.0000,    -0.0003}
IMM[1] FLT32 {    1.0000,    -0.5000,  3276.7000,    40.0000}
  0: MUL TEMP[0], IN[0], IMM[0].xxxx
  1: MUL TEMP[1], IN[2], IMM[0].xxxx
  2: LRP TEMP[2].x, IN[1].xxxx, TEMP[1].wwww, TEMP[0].wwww
  3: MAD TEMP[3], IN[3], IMM[0].xxxx, IMM[0].yyyy
  4: LRP TEMP[3].xy, IN[1].xxxx, TEMP[3].zwww, TEMP[3].xyyy
  5: MOV TEMP[4].y, IMM[0].zzzz
  6: MOV TEMP[4].x, TEMP[3].xxxx
  7: MOV TEMP[4].z, TEMP[3].yyyy
  8: MUL TEMP[3].xyz, TEMP[4].xyzz, CONST[5].xxxx
  9: ADD TEMP[4].x, CONST[5].zzzz, IMM[0].wwww
 10: SGE TEMP[4].x, TEMP[2].xxxx, TEMP[4].xxxx
 11: F2I TEMP[4].x, -TEMP[4]
 12: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 13: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 14: MUL TEMP[4].x, TEMP[2].xxxx, CONST[5].yyyy
 15: MOV TEMP[4].y, IN[1].yyyy
 16: ADD TEMP[5].x, IN[1].yyyy, IMM[1].yyyy
 17: LRP TEMP[6].xyz, TEMP[0].xyzz, CONST[7].xyzz, CONST[6].xyzz
 18: LRP TEMP[1].xyz, TEMP[1].xyzz, CONST[7].xyzz, CONST[6].xyzz
 19: LRP TEMP[1].xyz, IN[1].xxxx, TEMP[1].xyzz, TEMP[6].xyzz
 20: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[5].xxxx, TEMP[1].xyzz
 21: MUL TEMP[1], CONST[0], TEMP[0].xxxx
 22: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1]
 23: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[1]
 24: ADD TEMP[0], TEMP[0], CONST[3]
 25: MOV TEMP[1].x, TEMP[2].xxxx
 26: MOV TEMP[1].yz, CONST[5].wzww
 27: LRP TEMP[2].x, IN[1].xxxx, IN[4].yyyy, IN[4].xxxx
 28: MOV TEMP[1].w, TEMP[2].xxxx
 29: MUL TEMP[1], TEMP[1], IMM[1].zzzw
 30: MOV TEMP[2].xy, TEMP[4].xyxx
 31: MOV OUT[3], TEMP[2]
 32: MOV OUT[2], TEMP[1]
 33: MOV OUT[1], CONST[4]
 34: MOV OUT[0], TEMP[0]
 35: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %41 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0
  %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %5)
  %44 = extractelement <4 x float> %43, i32 0
  %45 = extractelement <4 x float> %43, i32 1
  %46 = extractelement <4 x float> %43, i32 2
  %47 = extractelement <4 x float> %43, i32 3
  %48 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %5)
  %51 = extractelement <4 x float> %50, i32 0
  %52 = extractelement <4 x float> %50, i32 1
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = fmul float %44, 2.000000e+00
  %73 = fmul float %45, 2.000000e+00
  %74 = fmul float %46, 2.000000e+00
  %75 = fmul float %47, 2.000000e+00
  %76 = fmul float %56, 2.000000e+00
  %77 = fmul float %57, 2.000000e+00
  %78 = fmul float %58, 2.000000e+00
  %79 = fmul float %59, 2.000000e+00
  %80 = call float @llvm.AMDGPU.lrp(float %51, float %79, float %75)
  %81 = fmul float %63, 2.000000e+00
  %82 = fadd float %81, -1.000000e+00
  %83 = fmul float %64, 2.000000e+00
  %84 = fadd float %83, -1.000000e+00
  %85 = fmul float %65, 2.000000e+00
  %86 = fadd float %85, -1.000000e+00
  %87 = fmul float %66, 2.000000e+00
  %88 = fadd float %87, -1.000000e+00
  %89 = call float @llvm.AMDGPU.lrp(float %51, float %86, float %82)
  %90 = call float @llvm.AMDGPU.lrp(float %51, float %88, float %84)
  %91 = fmul float %89, %31
  %92 = fmul float 0.000000e+00, %31
  %93 = fmul float %90, %31
  %94 = fadd float %33, 0xBF31802300000000
  %95 = fcmp uge float %80, %94
  %96 = select i1 %95, float 1.000000e+00, float 0.000000e+00
  %97 = fsub float -0.000000e+00, %96
  %98 = fptosi float %97 to i32
  %99 = bitcast i32 %98 to float
  %100 = bitcast float %99 to i32
  %101 = and i32 %100, 1065353216
  %102 = bitcast i32 %101 to float
  %103 = fmul float %91, %102
  %104 = fmul float %92, %102
  %105 = fmul float %93, %102
  %106 = fmul float %80, %32
  %107 = fadd float %52, -5.000000e-01
  %108 = call float @llvm.AMDGPU.lrp(float %72, float %38, float %35)
  %109 = call float @llvm.AMDGPU.lrp(float %73, float %39, float %36)
  %110 = call float @llvm.AMDGPU.lrp(float %74, float %40, float %37)
  %111 = call float @llvm.AMDGPU.lrp(float %76, float %38, float %35)
  %112 = call float @llvm.AMDGPU.lrp(float %77, float %39, float %36)
  %113 = call float @llvm.AMDGPU.lrp(float %78, float %40, float %37)
  %114 = call float @llvm.AMDGPU.lrp(float %51, float %111, float %108)
  %115 = call float @llvm.AMDGPU.lrp(float %51, float %112, float %109)
  %116 = call float @llvm.AMDGPU.lrp(float %51, float %113, float %110)
  %117 = fmul float %103, %107
  %118 = fadd float %117, %114
  %119 = fmul float %104, %107
  %120 = fadd float %119, %115
  %121 = fmul float %105, %107
  %122 = fadd float %121, %116
  %123 = fmul float %11, %118
  %124 = fmul float %12, %118
  %125 = fmul float %13, %118
  %126 = fmul float %14, %118
  %127 = fmul float %15, %120
  %128 = fadd float %127, %123
  %129 = fmul float %16, %120
  %130 = fadd float %129, %124
  %131 = fmul float %17, %120
  %132 = fadd float %131, %125
  %133 = fmul float %18, %120
  %134 = fadd float %133, %126
  %135 = fmul float %19, %122
  %136 = fadd float %135, %128
  %137 = fmul float %20, %122
  %138 = fadd float %137, %130
  %139 = fmul float %21, %122
  %140 = fadd float %139, %132
  %141 = fmul float %22, %122
  %142 = fadd float %141, %134
  %143 = fadd float %136, %23
  %144 = fadd float %138, %24
  %145 = fadd float %140, %25
  %146 = fadd float %142, %26
  %147 = call float @llvm.AMDGPU.lrp(float %51, float %71, float %70)
  %148 = fmul float %80, 0x40A9996660000000
  %149 = fmul float %33, 0x40A9996660000000
  %150 = fmul float %34, 0x40A9996660000000
  %151 = fmul float %147, 4.000000e+01
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %148, float %149, float %150, float %151)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %106, float %52, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %143, float %144, float %145, float %146)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840710
bf8c000f
e00c2000
80020600
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
080a02f2
10140d05
d2820006
042a0f01
101e0cff
42200000
c0840700
bf8c007f
e00c2000
80020600
bf8c0770
06141309
101c1505
c0840708
bf8c007f
e00c2000
80020a00
bf8c0770
06201b0d
d282000e
043a2101
10201cff
454ccb33
c2020117
7e2202ff
454ccb33
bf8c007f
10242204
c2028116
bf8c007f
10222205
f800021f
0f121110
c2020115
bf8c000f
101e1c04
7e200280
f800022f
1010020f
bf8c070f
061e190c
08201ef2
c202011a
bf8c007f
10202004
c204011e
bf8c007f
d282000f
0440110f
06201108
082220f2
10222204
d2820010
04441110
10202105
d2820014
04421f01
c084070c
bf8c007f
e00c2000
80021000
bf8c0770
06002311
060000f3
10000105
061e2713
061e1ef3
d2820000
04021f01
c2020114
bf8c007f
10000004
7e1e02ff
b98c0118
061e1e05
d00c0006
02021f0e
d200000e
0019e480
d206000e
2201010e
7e1c110e
361c1cf2
10001d00
061e04f1
d2820000
04521f00
0628150a
082a28f2
c2028118
bf8c007f
102a2a05
c203011c
bf8c007f
d2820014
04540d14
062a0d06
082c2af2
102c2c05
d2820015
04580d15
102a2b05
d2820014
04562901
062a2110
062a2af3
102a2b05
06202512
062020f3
d2820010
04562101
10202004
10201d10
d2820010
04521f10
c2028103
bf8c007f
10222005
0614170b
081614f2
c2028119
bf8c007f
10161605
c203011d
bf8c007f
d282000a
042c0d0a
060c0f07
080e0cf2
100e0e05
d2820006
041c0d06
100a0d05
d2820001
04161501
d2100002
02010004
10041d02
d2820001
04061f02
c2020107
bf8c007f
d2820002
04460204
c202010b
bf8c007f
d2820002
040a0004
c202010f
bf8c007f
06040404
c2020102
bf8c007f
10062004
c2020106
bf8c007f
d2820003
040e0204
c202010a
bf8c007f
d2820003
040e0004
c202010e
bf8c007f
06060604
c2020101
bf8c007f
10082004
c2020105
bf8c007f
d2820004
04120204
c2020109
bf8c007f
d2820004
04120004
c202010d
bf8c007f
06080804
c2020100
bf8c007f
100a2004
c2020104
bf8c007f
d2820001
04160204
c2020108
bf8c007f
d2820000
04060004
c200010c
bf8c007f
06000000
f80008cf
02030400
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MUL TEMP[0], TEMP[0], IN[0]
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %35, %24
  %40 = fmul float %36, %25
  %41 = fmul float %37, %26
  %42 = fmul float %38, %27
  %43 = call i32 @llvm.SI.packf16(float %39, float %40)
  %44 = bitcast i32 %43 to float
  %45 = call i32 @llvm.SI.packf16(float %41, float %42)
  %46 = bitcast i32 %45 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0d05
c81c0200
c81d0201
100e0f04
5e0c0d07
c81c0100
c81d0101
100e0f03
c8200000
c8210001
10001102
5e000f00
f8001c0f
06000600
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV TEMP[1].xy, IN[2].xyxx
  5: MOV OUT[2], TEMP[1]
  6: MOV OUT[1], IN[1]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %34 = load <16 x i8> addrspace(2)* %33, !tbaa !0
  %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %5)
  %36 = extractelement <4 x float> %35, i32 0
  %37 = extractelement <4 x float> %35, i32 1
  %38 = extractelement <4 x float> %35, i32 2
  %39 = extractelement <4 x float> %35, i32 3
  %40 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0
  %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %5)
  %43 = extractelement <4 x float> %42, i32 0
  %44 = extractelement <4 x float> %42, i32 1
  %45 = fmul float %11, %30
  %46 = fmul float %12, %30
  %47 = fmul float %13, %30
  %48 = fmul float %14, %30
  %49 = fmul float %15, %31
  %50 = fadd float %49, %45
  %51 = fmul float %16, %31
  %52 = fadd float %51, %46
  %53 = fmul float %17, %31
  %54 = fadd float %53, %47
  %55 = fmul float %18, %31
  %56 = fadd float %55, %48
  %57 = fmul float %19, %32
  %58 = fadd float %57, %50
  %59 = fmul float %20, %32
  %60 = fadd float %59, %52
  %61 = fmul float %21, %32
  %62 = fadd float %61, %54
  %63 = fmul float %22, %32
  %64 = fadd float %63, %56
  %65 = fadd float %58, %23
  %66 = fadd float %60, %24
  %67 = fadd float %62, %25
  %68 = fadd float %64, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %36, float %37, float %38, float %39)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %66, float %67, float %68)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
bf8c0770
f800020f
04030201
c0840708
bf8c000f
e00c2000
80020100
7e0a0280
bf8c0770
f800021f
05050201
c0820700
bf8c000f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: SLT TEMP[1].x, IN[1].xxxx, CONST[1].xxxx
  3: F2I TEMP[1].x, -TEMP[1]
  4: UIF TEMP[1].xxxx :0
  5:   MOV TEMP[1], IMM[0].xxxx
  6: ELSE :0
  7:   MOV TEMP[1], IMM[0].yyyx
  8: ENDIF
  9: MUL TEMP[0], TEMP[0], TEMP[1]
 10: MUL TEMP[0], TEMP[0], IN[0]
 11: MOV OUT[0], TEMP[0]
 12: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %33 = bitcast float %31 to i32
  %34 = bitcast float %32 to i32
  %35 = insertelement <2 x i32> undef, i32 %33, i32 0
  %36 = insertelement <2 x i32> %35, i32 %34, i32 1
  %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %24, <16 x i8> %26, i32 2)
  %38 = extractelement <4 x float> %37, i32 0
  %39 = extractelement <4 x float> %37, i32 1
  %40 = extractelement <4 x float> %37, i32 2
  %41 = extractelement <4 x float> %37, i32 3
  %42 = fcmp ult float %31, %22
  %43 = select i1 %42, float 1.000000e+00, float 0.000000e+00
  %44 = fsub float -0.000000e+00, %43
  %45 = fptosi float %44 to i32
  %46 = bitcast i32 %45 to float
  %47 = bitcast float %46 to i32
  %48 = icmp ne i32 %47, 0
  %. = select i1 %48, float 1.000000e+00, float 0.000000e+00
  %49 = fmul float %38, %.
  %50 = fmul float %39, %.
  %51 = fmul float %40, %.
  %52 = fmul float %41, 1.000000e+00
  %53 = fmul float %49, %27
  %54 = fmul float %50, %28
  %55 = fmul float %51, %29
  %56 = fmul float %52, %30
  %57 = call i32 @llvm.SI.packf16(float %53, float %54)
  %58 = bitcast i32 %57 to float
  %59 = call i32 @llvm.SI.packf16(float %55, float %56)
  %60 = bitcast i32 %59 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %58, float %60, float %58, float %60)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c81c0500
c81d0501
c8180400
c8190401
c0840300
c0c60500
bf8c007f
f0800f00
00430206
c8200300
c8210301
bf8c0770
10101105
c0800100
bf8c007f
c2000104
bf8c007f
d0020000
02000106
d2000006
0001e480
d2060006
22010106
7e0c1106
d10a0000
02010106
d2000006
0001e480
100e0d04
c8240200
c8250201
100e1307
5e0e1107
10100d03
c8240100
c8250101
10101308
10040d02
c80c0000
c80d0001
10000702
5e001100
f8001c0f
07000700
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..4]
DCL TEMP[0..1], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV TEMP[1].xy, IN[1].xyxx
  5: MOV OUT[2], TEMP[1]
  6: MOV OUT[1], CONST[4]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %5)
  %34 = extractelement <4 x float> %33, i32 0
  %35 = extractelement <4 x float> %33, i32 1
  %36 = extractelement <4 x float> %33, i32 2
  %37 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0
  %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %5)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = fmul float %11, %34
  %43 = fmul float %12, %34
  %44 = fmul float %13, %34
  %45 = fmul float %14, %34
  %46 = fmul float %15, %35
  %47 = fadd float %46, %42
  %48 = fmul float %16, %35
  %49 = fadd float %48, %43
  %50 = fmul float %17, %35
  %51 = fadd float %50, %44
  %52 = fmul float %18, %35
  %53 = fadd float %52, %45
  %54 = fmul float %19, %36
  %55 = fadd float %54, %47
  %56 = fmul float %20, %36
  %57 = fadd float %56, %49
  %58 = fmul float %21, %36
  %59 = fadd float %58, %51
  %60 = fmul float %22, %36
  %61 = fadd float %60, %53
  %62 = fadd float %55, %23
  %63 = fadd float %57, %24
  %64 = fadd float %59, %25
  %65 = fadd float %61, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %62, float %63, float %64, float %65)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840704
bf8c000f
e00c2000
80020100
7e0a0280
bf8c0770
f800021f
05050201
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[25..26]
DCL CONST[5..24]
DCL TEMP[0]
DCL TEMP[1..11], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     1.0000,     0.0000}
IMM[1] FLT32 { 1024.0000,     0.0100,     0.9748,     0.7565}
IMM[2] FLT32 {   -0.8154,    -0.8791,     0.9456,    -0.7689}
IMM[3] FLT32 {   -0.8141,     0.9144,     4.0000,     0.0625}
IMM[4] FLT32 {   -0.9420,    -0.3991,    -0.0942,    -0.9294}
IMM[5] FLT32 {    0.3450,     0.2939,    -0.9159,     0.4577}
IMM[6] FLT32 {   -0.3828,     0.2768,     0.4432,    -0.9751}
IMM[7] FLT32 {    0.5374,    -0.4737,    -0.2650,    -0.4189}
IMM[8] FLT32 {    0.7920,     0.1909,    -0.2419,     0.9971}
IMM[9] FLT32 {    0.1998,     0.7864,     0.1438,    -0.1410}
IMM[10] FLT32 {    0.2500,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[26].xxxx, CONST[26].yyyy
  2: MAD TEMP[1], TEMP[0].xyxy, CONST[5], CONST[6]
  3: MOV TEMP[2].xy, TEMP[1].xyyy
  4: TEX TEMP[2], TEMP[2], SAMP[4], 2D
  5: MAD TEMP[3].xyz, TEMP[2].yzww, IMM[0].xxxx, IMM[0].yyyy
  6: MOV TEMP[4].xy, TEMP[3].xyxx
  7: DP2 TEMP[5].x, TEMP[3].xyyy, TEMP[3].xyyy
  8: ADD_SAT TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
  9: RSQ TEMP[6].x, TEMP[5].xxxx
 10: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 11: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 12: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[3].zzzz
 13: MOV TEMP[4].z, TEMP[5].xxxx
 14: MOV TEMP[6].z, IMM[0].zzzz
 15: MOV TEMP[6].xy, TEMP[1].zwzz
 16: MOV TEMP[1].xy, TEMP[1].xyyy
 17: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D
 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].zzzz, CONST[7].wwww
 19: RCP TEMP[1].x, TEMP[1].xxxx
 20: MUL TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx
 21: MUL TEMP[6], CONST[12], TEMP[3].xxxx
 22: MAD TEMP[3], CONST[13], TEMP[3].yyyy, TEMP[6]
 23: MAD TEMP[3].xyz, CONST[14], TEMP[5].xxxx, TEMP[3]
 24: MOV TEMP[3].xyz, TEMP[3].xyzz
 25: TEX TEMP[3].xyz, TEMP[3], SAMP[3], CUBE
 26: DP3 TEMP[5].x, TEMP[4].xyzz, CONST[16].xyzz
 27: ADD TEMP[5].x, TEMP[5].xxxx, CONST[18].wwww
 28: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz
 29: RSQ TEMP[6].x, TEMP[6].xxxx
 30: MUL TEMP[6].xyz, TEMP[1].xyzz, TEMP[6].xxxx
 31: ADD TEMP[6].xyz, CONST[21].xyzz, -TEMP[6].xyzz
 32: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz
 33: RSQ TEMP[7].x, TEMP[7].xxxx
 34: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx
 35: DP3_SAT TEMP[4].x, TEMP[6].xyzz, TEMP[4].xyzz
 36: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
 37: POW TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx
 38: SGE TEMP[4].x, TEMP[5].xxxx, IMM[1].yyyy
 39: F2I TEMP[4].x, -TEMP[4]
 40: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz
 41: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
 42: MOV_SAT TEMP[4].x, TEMP[5].xxxx
 43: LRP TEMP[4].xyz, TEMP[4].xxxx, CONST[17].xyzz, CONST[19].xyzz
 44: MOV_SAT TEMP[6].x, -TEMP[5].xxxx
 45: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[18].xyzz, CONST[19].xyzz
 46: SLT TEMP[7].x, TEMP[5].xxxx, IMM[0].wwww
 47: F2I TEMP[7].x, -TEMP[7]
 48: UIF TEMP[7].xxxx :2
 49:   MOV TEMP[6].xyz, TEMP[6].xyzx
 50: ELSE :2
 51:   MOV TEMP[6].xyz, TEMP[4].xyzx
 52: ENDIF
 53: MUL TEMP[4].x, CONST[20].wwww, TEMP[2].xxxx
 54: MUL TEMP[7], CONST[8], TEMP[1].xxxx
 55: MAD TEMP[7], CONST[9], TEMP[1].yyyy, TEMP[7]
 56: MAD TEMP[7], CONST[10], TEMP[1].zzzz, TEMP[7]
 57: ADD TEMP[7].xyz, TEMP[7], CONST[11]
 58: MOV_SAT TEMP[8].x, TEMP[5].xxxx
 59: MAD TEMP[9].xy, IMM[1].zwww, CONST[23].yyyy, TEMP[7].xyyy
 60: MOV TEMP[9].xy, TEMP[9].xyyy
 61: MOV TEMP[9].z, TEMP[7].zzzz
 62: TEX TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D
 63: MAD TEMP[10].xy, IMM[2].xyyy, CONST[23].yyyy, TEMP[7].xyyy
 64: MOV TEMP[10].xy, TEMP[10].xyyy
 65: MOV TEMP[10].z, TEMP[7].zzzz
 66: TEX TEMP[10].x, TEMP[10], SAMP[1], SHADOW2D
 67: ADD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx
 68: MAD TEMP[10].xy, IMM[2].zwww, CONST[23].yyyy, TEMP[7].xyyy
 69: MOV TEMP[10].xy, TEMP[10].xyyy
 70: MOV TEMP[10].z, TEMP[7].zzzz
 71: TEX TEMP[10].x, TEMP[10], SAMP[1], SHADOW2D
 72: ADD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx
 73: MAD TEMP[10].xy, IMM[3].xyyy, CONST[23].yyyy, TEMP[7].xyyy
 74: MOV TEMP[10].xy, TEMP[10].xyyy
 75: MOV TEMP[10].z, TEMP[7].zzzz
 76: TEX TEMP[10].x, TEMP[10], SAMP[1], SHADOW2D
 77: ADD TEMP[10].x, TEMP[9].xxxx, TEMP[10].xxxx
 78: MOV TEMP[9].x, TEMP[10].xxxx
 79: ADD TEMP[11].x, IMM[3].zzzz, -TEMP[10].xxxx
 80: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[11].xxxx
 81: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[8].xxxx
 82: SNE TEMP[8].x, TEMP[8].xxxx, IMM[0].wwww
 83: F2I TEMP[8].x, -TEMP[8]
 84: UIF TEMP[8].xxxx :2
 85:   MAD TEMP[8].xy, IMM[4].xyyy, CONST[23].yyyy, TEMP[7].xyyy
 86:   MOV TEMP[8].xy, TEMP[8].xyyy
 87:   MOV TEMP[8].z, TEMP[7].zzzz
 88:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
 89:   ADD TEMP[9].x, TEMP[10].xxxx, TEMP[8].xxxx
 90:   MAD TEMP[8].xy, IMM[4].zwww, CONST[23].yyyy, TEMP[7].xyyy
 91:   MOV TEMP[8].xy, TEMP[8].xyyy
 92:   MOV TEMP[8].z, TEMP[7].zzzz
 93:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
 94:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
 95:   MAD TEMP[8].xy, IMM[5].xyyy, CONST[23].yyyy, TEMP[7].xyyy
 96:   MOV TEMP[8].xy, TEMP[8].xyyy
 97:   MOV TEMP[8].z, TEMP[7].zzzz
 98:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
 99:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
100:   MAD TEMP[8].xy, IMM[5].zwww, CONST[23].yyyy, TEMP[7].xyyy
101:   MOV TEMP[8].xy, TEMP[8].xyyy
102:   MOV TEMP[8].z, TEMP[7].zzzz
103:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
104:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
105:   MAD TEMP[8].xy, IMM[6].xyyy, CONST[23].yyyy, TEMP[7].xyyy
106:   MOV TEMP[8].xy, TEMP[8].xyyy
107:   MOV TEMP[8].z, TEMP[7].zzzz
108:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
109:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
110:   MAD TEMP[8].xy, IMM[6].zwww, CONST[23].yyyy, TEMP[7].xyyy
111:   MOV TEMP[8].xy, TEMP[8].xyyy
112:   MOV TEMP[8].z, TEMP[7].zzzz
113:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
114:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
115:   MAD TEMP[8].xy, IMM[7].xyyy, CONST[23].yyyy, TEMP[7].xyyy
116:   MOV TEMP[8].xy, TEMP[8].xyyy
117:   MOV TEMP[8].z, TEMP[7].zzzz
118:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
119:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
120:   MAD TEMP[8].xy, IMM[7].zwww, CONST[23].yyyy, TEMP[7].xyyy
121:   MOV TEMP[8].xy, TEMP[8].xyyy
122:   MOV TEMP[8].z, TEMP[7].zzzz
123:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
124:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
125:   MAD TEMP[8].xy, IMM[8].xyyy, CONST[23].yyyy, TEMP[7].xyyy
126:   MOV TEMP[8].xy, TEMP[8].xyyy
127:   MOV TEMP[8].z, TEMP[7].zzzz
128:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
129:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
130:   MAD TEMP[8].xy, IMM[8].zwww, CONST[23].yyyy, TEMP[7].xyyy
131:   MOV TEMP[8].xy, TEMP[8].xyyy
132:   MOV TEMP[8].z, TEMP[7].zzzz
133:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
134:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
135:   MAD TEMP[8].xy, IMM[9].xyyy, CONST[23].yyyy, TEMP[7].xyyy
136:   MOV TEMP[8].xy, TEMP[8].xyyy
137:   MOV TEMP[8].z, TEMP[7].zzzz
138:   TEX TEMP[8].x, TEMP[8], SAMP[1], SHADOW2D
139:   ADD TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
140:   MAD TEMP[8].xy, IMM[9].zwww, CONST[23].yyyy, TEMP[7].xyyy
141:   MOV TEMP[8].xy, TEMP[8].xyyy
142:   MOV TEMP[8].z, TEMP[7].zzzz
143:   TEX TEMP[7].x, TEMP[8], SAMP[1], SHADOW2D
144:   ADD TEMP[7].x, TEMP[9].xxxx, TEMP[7].xxxx
145:   MUL TEMP[9].x, TEMP[7].xxxx, IMM[3].wwww
146: ELSE :2
147:   MUL TEMP[9].x, TEMP[9].xxxx, IMM[10].xxxx
148: ENDIF
149: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[9].xxxx
150: ADD TEMP[7].x, TEMP[9].xxxx, CONST[23].xxxx
151: MOV_SAT TEMP[5].x, -TEMP[5].xxxx
152: ADD_SAT TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx
153: MAD TEMP[2].xyz, TEMP[6].xyzz, TEMP[5].xxxx, TEMP[3].xyzz
154: MOV TEMP[3].w, IMM[0].zzzz
155: MOV TEMP[3].xyz, TEMP[1].xyzx
156: DP4 TEMP[1].x, CONST[24], TEMP[3]
157: DP4 TEMP[3].x, CONST[25], TEMP[3]
158: MOV TEMP[1].y, TEMP[3].xxxx
159: MOV TEMP[1].xy, TEMP[1].xyyy
160: TEX TEMP[1], TEMP[1], SAMP[0], 2D
161: MUL TEMP[1], TEMP[1], CONST[22]
162: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[1].wwww
163: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
164: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[1].wwww, TEMP[2].xyzz
165: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[3].xxxx
166: MOV TEMP[1].xyz, TEMP[1].xyzx
167: MOV TEMP[1].w, TEMP[2].xxxx
168: MOV OUT[0], TEMP[1]
169: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 264)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 272)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 276)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 280)
  %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 288)
  %62 = call float @llvm.SI.load.const(<16 x i8> %21, i32 292)
  %63 = call float @llvm.SI.load.const(<16 x i8> %21, i32 296)
  %64 = call float @llvm.SI.load.const(<16 x i8> %21, i32 300)
  %65 = call float @llvm.SI.load.const(<16 x i8> %21, i32 304)
  %66 = call float @llvm.SI.load.const(<16 x i8> %21, i32 308)
  %67 = call float @llvm.SI.load.const(<16 x i8> %21, i32 312)
  %68 = call float @llvm.SI.load.const(<16 x i8> %21, i32 332)
  %69 = call float @llvm.SI.load.const(<16 x i8> %21, i32 336)
  %70 = call float @llvm.SI.load.const(<16 x i8> %21, i32 340)
  %71 = call float @llvm.SI.load.const(<16 x i8> %21, i32 344)
  %72 = call float @llvm.SI.load.const(<16 x i8> %21, i32 352)
  %73 = call float @llvm.SI.load.const(<16 x i8> %21, i32 356)
  %74 = call float @llvm.SI.load.const(<16 x i8> %21, i32 360)
  %75 = call float @llvm.SI.load.const(<16 x i8> %21, i32 364)
  %76 = call float @llvm.SI.load.const(<16 x i8> %21, i32 368)
  %77 = call float @llvm.SI.load.const(<16 x i8> %21, i32 372)
  %78 = call float @llvm.SI.load.const(<16 x i8> %21, i32 384)
  %79 = call float @llvm.SI.load.const(<16 x i8> %21, i32 388)
  %80 = call float @llvm.SI.load.const(<16 x i8> %21, i32 392)
  %81 = call float @llvm.SI.load.const(<16 x i8> %21, i32 396)
  %82 = call float @llvm.SI.load.const(<16 x i8> %21, i32 400)
  %83 = call float @llvm.SI.load.const(<16 x i8> %21, i32 404)
  %84 = call float @llvm.SI.load.const(<16 x i8> %21, i32 408)
  %85 = call float @llvm.SI.load.const(<16 x i8> %21, i32 412)
  %86 = call float @llvm.SI.load.const(<16 x i8> %21, i32 416)
  %87 = call float @llvm.SI.load.const(<16 x i8> %21, i32 420)
  %88 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %89 = load <32 x i8> addrspace(2)* %88, !tbaa !0
  %90 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %91 = load <16 x i8> addrspace(2)* %90, !tbaa !0
  %92 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %93 = load <32 x i8> addrspace(2)* %92, !tbaa !0
  %94 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %95 = load <16 x i8> addrspace(2)* %94, !tbaa !0
  %96 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %97 = load <32 x i8> addrspace(2)* %96, !tbaa !0
  %98 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %99 = load <16 x i8> addrspace(2)* %98, !tbaa !0
  %100 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %101 = load <32 x i8> addrspace(2)* %100, !tbaa !0
  %102 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %103 = load <16 x i8> addrspace(2)* %102, !tbaa !0
  %104 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %105 = load <32 x i8> addrspace(2)* %104, !tbaa !0
  %106 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %107 = load <16 x i8> addrspace(2)* %106, !tbaa !0
  %108 = fmul float %13, %86
  %109 = fadd float %108, %87
  %110 = fmul float %12, %22
  %111 = fadd float %110, %26
  %112 = fmul float %109, %23
  %113 = fadd float %112, %27
  %114 = fmul float %12, %24
  %115 = fadd float %114, %28
  %116 = fmul float %109, %25
  %117 = fadd float %116, %29
  %118 = bitcast float %111 to i32
  %119 = bitcast float %113 to i32
  %120 = insertelement <2 x i32> undef, i32 %118, i32 0
  %121 = insertelement <2 x i32> %120, i32 %119, i32 1
  %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %105, <16 x i8> %107, i32 2)
  %123 = extractelement <4 x float> %122, i32 0
  %124 = extractelement <4 x float> %122, i32 1
  %125 = extractelement <4 x float> %122, i32 2
  %126 = extractelement <4 x float> %122, i32 3
  %127 = fmul float %124, 2.000000e+00
  %128 = fadd float %127, -1.000000e+00
  %129 = fmul float %125, 2.000000e+00
  %130 = fadd float %129, -1.000000e+00
  %131 = fmul float %126, 2.000000e+00
  %132 = fadd float %131, -1.000000e+00
  %133 = fmul float %128, %128
  %134 = fmul float %130, %130
  %135 = fadd float %133, %134
  %136 = fsub float -0.000000e+00, %135
  %137 = fadd float 1.000000e+00, %136
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.rsq(float %138)
  %140 = fmul float %139, %138
  %141 = fsub float -0.000000e+00, %138
  %142 = call float @llvm.AMDGPU.cndlt(float %141, float %140, float 0.000000e+00)
  %143 = fmul float %142, %132
  %144 = bitcast float %111 to i32
  %145 = bitcast float %113 to i32
  %146 = insertelement <2 x i32> undef, i32 %144, i32 0
  %147 = insertelement <2 x i32> %146, i32 %145, i32 1
  %148 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %97, <16 x i8> %99, i32 2)
  %149 = extractelement <4 x float> %148, i32 0
  %150 = fmul float %149, %30
  %151 = fadd float %150, %31
  %152 = fdiv float 1.000000e+00, %151
  %153 = fmul float %115, %152
  %154 = fmul float %117, %152
  %155 = fmul float 1.000000e+00, %152
  %156 = fmul float %44, %128
  %157 = fmul float %45, %128
  %158 = fmul float %46, %128
  %159 = fmul float %47, %128
  %160 = fmul float %48, %130
  %161 = fadd float %160, %156
  %162 = fmul float %49, %130
  %163 = fadd float %162, %157
  %164 = fmul float %50, %130
  %165 = fadd float %164, %158
  %166 = fmul float %51, %130
  %167 = fadd float %166, %159
  %168 = fmul float %52, %143
  %169 = fadd float %168, %161
  %170 = fmul float %53, %143
  %171 = fadd float %170, %163
  %172 = fmul float %54, %143
  %173 = fadd float %172, %165
  %174 = insertelement <4 x float> undef, float %169, i32 0
  %175 = insertelement <4 x float> %174, float %171, i32 1
  %176 = insertelement <4 x float> %175, float %173, i32 2
  %177 = insertelement <4 x float> %176, float %167, i32 3
  %178 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %177)
  %179 = extractelement <4 x float> %178, i32 0
  %180 = extractelement <4 x float> %178, i32 1
  %181 = extractelement <4 x float> %178, i32 2
  %182 = extractelement <4 x float> %178, i32 3
  %183 = call float @fabs(float %181)
  %184 = fdiv float 1.000000e+00, %183
  %185 = fmul float %179, %184
  %186 = fadd float %185, 1.500000e+00
  %187 = fmul float %180, %184
  %188 = fadd float %187, 1.500000e+00
  %189 = bitcast float %188 to i32
  %190 = bitcast float %186 to i32
  %191 = bitcast float %182 to i32
  %192 = insertelement <4 x i32> undef, i32 %189, i32 0
  %193 = insertelement <4 x i32> %192, i32 %190, i32 1
  %194 = insertelement <4 x i32> %193, i32 %191, i32 2
  %195 = insertelement <4 x i32> %194, i32 undef, i32 3
  %196 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %195, <32 x i8> %101, <16 x i8> %103, i32 4)
  %197 = extractelement <4 x float> %196, i32 0
  %198 = extractelement <4 x float> %196, i32 1
  %199 = extractelement <4 x float> %196, i32 2
  %200 = fmul float %128, %55
  %201 = fmul float %130, %56
  %202 = fadd float %201, %200
  %203 = fmul float %143, %57
  %204 = fadd float %202, %203
  %205 = fadd float %204, %64
  %206 = fmul float %153, %153
  %207 = fmul float %154, %154
  %208 = fadd float %207, %206
  %209 = fmul float %155, %155
  %210 = fadd float %208, %209
  %211 = call float @llvm.AMDGPU.rsq(float %210)
  %212 = fmul float %153, %211
  %213 = fmul float %154, %211
  %214 = fmul float %155, %211
  %215 = fsub float -0.000000e+00, %212
  %216 = fadd float %69, %215
  %217 = fsub float -0.000000e+00, %213
  %218 = fadd float %70, %217
  %219 = fsub float -0.000000e+00, %214
  %220 = fadd float %71, %219
  %221 = fmul float %216, %216
  %222 = fmul float %218, %218
  %223 = fadd float %222, %221
  %224 = fmul float %220, %220
  %225 = fadd float %223, %224
  %226 = call float @llvm.AMDGPU.rsq(float %225)
  %227 = fmul float %216, %226
  %228 = fmul float %218, %226
  %229 = fmul float %220, %226
  %230 = fmul float %227, %128
  %231 = fmul float %228, %130
  %232 = fadd float %231, %230
  %233 = fmul float %229, %143
  %234 = fadd float %232, %233
  %235 = call float @llvm.AMDIL.clamp.(float %234, float 0.000000e+00, float 1.000000e+00)
  %236 = fmul float %123, 1.024000e+03
  %237 = call float @llvm.pow.f32(float %235, float %236)
  %238 = fcmp uge float %205, 0x3F847AE140000000
  %239 = select i1 %238, float 1.000000e+00, float 0.000000e+00
  %240 = fsub float -0.000000e+00, %239
  %241 = fptosi float %240 to i32
  %242 = bitcast i32 %241 to float
  %243 = bitcast float %242 to i32
  %244 = and i32 %243, 1065353216
  %245 = bitcast i32 %244 to float
  %246 = fmul float %237, %245
  %247 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00)
  %248 = call float @llvm.AMDGPU.lrp(float %247, float %58, float %65)
  %249 = call float @llvm.AMDGPU.lrp(float %247, float %59, float %66)
  %250 = call float @llvm.AMDGPU.lrp(float %247, float %60, float %67)
  %251 = fsub float -0.000000e+00, %205
  %252 = call float @llvm.AMDIL.clamp.(float %251, float 0.000000e+00, float 1.000000e+00)
  %253 = call float @llvm.AMDGPU.lrp(float %252, float %61, float %65)
  %254 = call float @llvm.AMDGPU.lrp(float %252, float %62, float %66)
  %255 = call float @llvm.AMDGPU.lrp(float %252, float %63, float %67)
  %256 = fcmp ult float %205, 0.000000e+00
  %257 = select i1 %256, float 1.000000e+00, float 0.000000e+00
  %258 = fsub float -0.000000e+00, %257
  %259 = fptosi float %258 to i32
  %260 = bitcast i32 %259 to float
  %261 = bitcast float %260 to i32
  %262 = icmp ne i32 %261, 0
  %. = select i1 %262, float %253, float %248
  %.51 = select i1 %262, float %254, float %249
  %.52 = select i1 %262, float %255, float %250
  %263 = fmul float %68, %246
  %264 = fmul float %32, %153
  %265 = fmul float %33, %153
  %266 = fmul float %34, %153
  %267 = fmul float %35, %154
  %268 = fadd float %267, %264
  %269 = fmul float %36, %154
  %270 = fadd float %269, %265
  %271 = fmul float %37, %154
  %272 = fadd float %271, %266
  %273 = fmul float %38, %155
  %274 = fadd float %273, %268
  %275 = fmul float %39, %155
  %276 = fadd float %275, %270
  %277 = fmul float %40, %155
  %278 = fadd float %277, %272
  %279 = fadd float %274, %41
  %280 = fadd float %276, %42
  %281 = fadd float %278, %43
  %282 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00)
  %283 = fmul float 0x3FEF31EC00000000, %77
  %284 = fadd float %283, %279
  %285 = fmul float 0x3FE8351D80000000, %77
  %286 = fadd float %285, %280
  %287 = bitcast float %281 to i32
  %288 = bitcast float %284 to i32
  %289 = bitcast float %286 to i32
  %290 = insertelement <4 x i32> undef, i32 %287, i32 0
  %291 = insertelement <4 x i32> %290, i32 %288, i32 1
  %292 = insertelement <4 x i32> %291, i32 %289, i32 2
  %293 = insertelement <4 x i32> %292, i32 undef, i32 3
  %294 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %293, <32 x i8> %93, <16 x i8> %95, i32 7)
  %295 = extractelement <4 x float> %294, i32 0
  %296 = fmul float 0xBFEA181A80000000, %77
  %297 = fadd float %296, %279
  %298 = fmul float 0xBFEC21CA00000000, %77
  %299 = fadd float %298, %280
  %300 = bitcast float %281 to i32
  %301 = bitcast float %297 to i32
  %302 = bitcast float %299 to i32
  %303 = insertelement <4 x i32> undef, i32 %300, i32 0
  %304 = insertelement <4 x i32> %303, i32 %301, i32 1
  %305 = insertelement <4 x i32> %304, i32 %302, i32 2
  %306 = insertelement <4 x i32> %305, i32 undef, i32 3
  %307 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %306, <32 x i8> %93, <16 x i8> %95, i32 7)
  %308 = extractelement <4 x float> %307, i32 0
  %309 = fadd float %295, %308
  %310 = fmul float 0x3FEE423DC0000000, %77
  %311 = fadd float %310, %279
  %312 = fmul float 0xBFE89AE360000000, %77
  %313 = fadd float %312, %280
  %314 = bitcast float %281 to i32
  %315 = bitcast float %311 to i32
  %316 = bitcast float %313 to i32
  %317 = insertelement <4 x i32> undef, i32 %314, i32 0
  %318 = insertelement <4 x i32> %317, i32 %315, i32 1
  %319 = insertelement <4 x i32> %318, i32 %316, i32 2
  %320 = insertelement <4 x i32> %319, i32 undef, i32 3
  %321 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %320, <32 x i8> %93, <16 x i8> %95, i32 7)
  %322 = extractelement <4 x float> %321, i32 0
  %323 = fadd float %309, %322
  %324 = fmul float 0xBFEA0D1A80000000, %77
  %325 = fadd float %324, %279
  %326 = fmul float 0x3FED429140000000, %77
  %327 = fadd float %326, %280
  %328 = bitcast float %281 to i32
  %329 = bitcast float %325 to i32
  %330 = bitcast float %327 to i32
  %331 = insertelement <4 x i32> undef, i32 %328, i32 0
  %332 = insertelement <4 x i32> %331, i32 %329, i32 1
  %333 = insertelement <4 x i32> %332, i32 %330, i32 2
  %334 = insertelement <4 x i32> %333, i32 undef, i32 3
  %335 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %334, <32 x i8> %93, <16 x i8> %95, i32 7)
  %336 = extractelement <4 x float> %335, i32 0
  %337 = fadd float %323, %336
  %338 = fsub float -0.000000e+00, %337
  %339 = fadd float 4.000000e+00, %338
  %340 = fmul float %337, %339
  %341 = fmul float %340, %282
  %342 = fcmp une float %341, 0.000000e+00
  %343 = select i1 %342, float 1.000000e+00, float 0.000000e+00
  %344 = fsub float -0.000000e+00, %343
  %345 = fptosi float %344 to i32
  %346 = bitcast i32 %345 to float
  %347 = bitcast float %346 to i32
  %348 = icmp ne i32 %347, 0
  br i1 %348, label %IF49, label %ELSE50

IF49:                                             ; preds = %main_body
  %349 = fmul float 0xBFEE24FF40000000, %77
  %350 = fadd float %349, %279
  %351 = fmul float 0xBFD98A3C00000000, %77
  %352 = fadd float %351, %280
  %353 = bitcast float %281 to i32
  %354 = bitcast float %350 to i32
  %355 = bitcast float %352 to i32
  %356 = insertelement <4 x i32> undef, i32 %353, i32 0
  %357 = insertelement <4 x i32> %356, i32 %354, i32 1
  %358 = insertelement <4 x i32> %357, i32 %355, i32 2
  %359 = insertelement <4 x i32> %358, i32 undef, i32 3
  %360 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %359, <32 x i8> %93, <16 x i8> %95, i32 7)
  %361 = extractelement <4 x float> %360, i32 0
  %362 = fadd float %337, %361
  %363 = fmul float 0xBFB81C7300000000, %77
  %364 = fadd float %363, %279
  %365 = fmul float 0xBFEDBD8D60000000, %77
  %366 = fadd float %365, %280
  %367 = bitcast float %281 to i32
  %368 = bitcast float %364 to i32
  %369 = bitcast float %366 to i32
  %370 = insertelement <4 x i32> undef, i32 %367, i32 0
  %371 = insertelement <4 x i32> %370, i32 %368, i32 1
  %372 = insertelement <4 x i32> %371, i32 %369, i32 2
  %373 = insertelement <4 x i32> %372, i32 undef, i32 3
  %374 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %373, <32 x i8> %93, <16 x i8> %95, i32 7)
  %375 = extractelement <4 x float> %374, i32 0
  %376 = fadd float %362, %375
  %377 = fmul float 0x3FD613D080000000, %77
  %378 = fadd float %377, %279
  %379 = fmul float 0x3FD2CEE400000000, %77
  %380 = fadd float %379, %280
  %381 = bitcast float %281 to i32
  %382 = bitcast float %378 to i32
  %383 = bitcast float %380 to i32
  %384 = insertelement <4 x i32> undef, i32 %381, i32 0
  %385 = insertelement <4 x i32> %384, i32 %382, i32 1
  %386 = insertelement <4 x i32> %385, i32 %383, i32 2
  %387 = insertelement <4 x i32> %386, i32 undef, i32 3
  %388 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %387, <32 x i8> %93, <16 x i8> %95, i32 7)
  %389 = extractelement <4 x float> %388, i32 0
  %390 = fadd float %376, %389
  %391 = fmul float 0xBFED4EEFC0000000, %77
  %392 = fadd float %391, %279
  %393 = fmul float 0x3FDD4B3100000000, %77
  %394 = fadd float %393, %280
  %395 = bitcast float %281 to i32
  %396 = bitcast float %392 to i32
  %397 = bitcast float %394 to i32
  %398 = insertelement <4 x i32> undef, i32 %395, i32 0
  %399 = insertelement <4 x i32> %398, i32 %396, i32 1
  %400 = insertelement <4 x i32> %399, i32 %397, i32 2
  %401 = insertelement <4 x i32> %400, i32 undef, i32 3
  %402 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %401, <32 x i8> %93, <16 x i8> %95, i32 7)
  %403 = extractelement <4 x float> %402, i32 0
  %404 = fadd float %390, %403
  %405 = fmul float 0xBFD87F6480000000, %77
  %406 = fadd float %405, %279
  %407 = fmul float 0x3FD1B69300000000, %77
  %408 = fadd float %407, %280
  %409 = bitcast float %281 to i32
  %410 = bitcast float %406 to i32
  %411 = bitcast float %408 to i32
  %412 = insertelement <4 x i32> undef, i32 %409, i32 0
  %413 = insertelement <4 x i32> %412, i32 %410, i32 1
  %414 = insertelement <4 x i32> %413, i32 %411, i32 2
  %415 = insertelement <4 x i32> %414, i32 undef, i32 3
  %416 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %415, <32 x i8> %93, <16 x i8> %95, i32 7)
  %417 = extractelement <4 x float> %416, i32 0
  %418 = fadd float %404, %417
  %419 = fmul float 0x3FDC5DEF00000000, %77
  %420 = fadd float %419, %279
  %421 = fmul float 0xBFEF342580000000, %77
  %422 = fadd float %421, %280
  %423 = bitcast float %281 to i32
  %424 = bitcast float %420 to i32
  %425 = bitcast float %422 to i32
  %426 = insertelement <4 x i32> undef, i32 %423, i32 0
  %427 = insertelement <4 x i32> %426, i32 %424, i32 1
  %428 = insertelement <4 x i32> %427, i32 %425, i32 2
  %429 = insertelement <4 x i32> %428, i32 undef, i32 3
  %430 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %429, <32 x i8> %93, <16 x i8> %95, i32 7)
  %431 = extractelement <4 x float> %430, i32 0
  %432 = fadd float %418, %431
  %433 = fmul float 0x3FE132A000000000, %77
  %434 = fadd float %433, %279
  %435 = fmul float 0xBFDE51A940000000, %77
  %436 = fadd float %435, %280
  %437 = bitcast float %281 to i32
  %438 = bitcast float %434 to i32
  %439 = bitcast float %436 to i32
  %440 = insertelement <4 x i32> undef, i32 %437, i32 0
  %441 = insertelement <4 x i32> %440, i32 %438, i32 1
  %442 = insertelement <4 x i32> %441, i32 %439, i32 2
  %443 = insertelement <4 x i32> %442, i32 undef, i32 3
  %444 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %443, <32 x i8> %93, <16 x i8> %95, i32 7)
  %445 = extractelement <4 x float> %444, i32 0
  %446 = fadd float %432, %445
  %447 = fmul float 0xBFD0F54100000000, %77
  %448 = fadd float %447, %279
  %449 = fmul float 0xBFDACFC0C0000000, %77
  %450 = fadd float %449, %280
  %451 = bitcast float %281 to i32
  %452 = bitcast float %448 to i32
  %453 = bitcast float %450 to i32
  %454 = insertelement <4 x i32> undef, i32 %451, i32 0
  %455 = insertelement <4 x i32> %454, i32 %452, i32 1
  %456 = insertelement <4 x i32> %455, i32 %453, i32 2
  %457 = insertelement <4 x i32> %456, i32 undef, i32 3
  %458 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %457, <32 x i8> %93, <16 x i8> %95, i32 7)
  %459 = extractelement <4 x float> %458, i32 0
  %460 = fadd float %446, %459
  %461 = fmul float 0x3FE957DC40000000, %77
  %462 = fadd float %461, %279
  %463 = fmul float 0x3FC86F7900000000, %77
  %464 = fadd float %463, %280
  %465 = bitcast float %281 to i32
  %466 = bitcast float %462 to i32
  %467 = bitcast float %464 to i32
  %468 = insertelement <4 x i32> undef, i32 %465, i32 0
  %469 = insertelement <4 x i32> %468, i32 %466, i32 1
  %470 = insertelement <4 x i32> %469, i32 %467, i32 2
  %471 = insertelement <4 x i32> %470, i32 undef, i32 3
  %472 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %471, <32 x i8> %93, <16 x i8> %95, i32 7)
  %473 = extractelement <4 x float> %472, i32 0
  %474 = fadd float %460, %473
  %475 = fmul float 0xBFCEF63300000000, %77
  %476 = fadd float %475, %279
  %477 = fmul float 0x3FEFE7F500000000, %77
  %478 = fadd float %477, %280
  %479 = bitcast float %281 to i32
  %480 = bitcast float %476 to i32
  %481 = bitcast float %478 to i32
  %482 = insertelement <4 x i32> undef, i32 %479, i32 0
  %483 = insertelement <4 x i32> %482, i32 %480, i32 1
  %484 = insertelement <4 x i32> %483, i32 %481, i32 2
  %485 = insertelement <4 x i32> %484, i32 undef, i32 3
  %486 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %485, <32 x i8> %93, <16 x i8> %95, i32 7)
  %487 = extractelement <4 x float> %486, i32 0
  %488 = fadd float %474, %487
  %489 = fmul float 0x3FC9946600000000, %77
  %490 = fadd float %489, %279
  %491 = fmul float 0x3FE92A4D00000000, %77
  %492 = fadd float %491, %280
  %493 = bitcast float %281 to i32
  %494 = bitcast float %490 to i32
  %495 = bitcast float %492 to i32
  %496 = insertelement <4 x i32> undef, i32 %493, i32 0
  %497 = insertelement <4 x i32> %496, i32 %494, i32 1
  %498 = insertelement <4 x i32> %497, i32 %495, i32 2
  %499 = insertelement <4 x i32> %498, i32 undef, i32 3
  %500 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %499, <32 x i8> %93, <16 x i8> %95, i32 7)
  %501 = extractelement <4 x float> %500, i32 0
  %502 = fadd float %488, %501
  %503 = fmul float 0x3FC2691300000000, %77
  %504 = fadd float %503, %279
  %505 = fmul float 0xBFC20C8C00000000, %77
  %506 = fadd float %505, %280
  %507 = bitcast float %281 to i32
  %508 = bitcast float %504 to i32
  %509 = bitcast float %506 to i32
  %510 = insertelement <4 x i32> undef, i32 %507, i32 0
  %511 = insertelement <4 x i32> %510, i32 %508, i32 1
  %512 = insertelement <4 x i32> %511, i32 %509, i32 2
  %513 = insertelement <4 x i32> %512, i32 undef, i32 3
  %514 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %513, <32 x i8> %93, <16 x i8> %95, i32 7)
  %515 = extractelement <4 x float> %514, i32 0
  %516 = fadd float %502, %515
  %517 = fmul float %516, 6.250000e-02
  br label %ENDIF48

ELSE50:                                           ; preds = %main_body
  %518 = fmul float %337, 2.500000e-01
  br label %ENDIF48

ENDIF48:                                          ; preds = %ELSE50, %IF49
  %temp36.0 = phi float [ %517, %IF49 ], [ %518, %ELSE50 ]
  %519 = fmul float %263, %temp36.0
  %520 = fadd float %temp36.0, %76
  %521 = fsub float -0.000000e+00, %205
  %522 = call float @llvm.AMDIL.clamp.(float %521, float 0.000000e+00, float 1.000000e+00)
  %523 = fadd float %520, %522
  %524 = call float @llvm.AMDIL.clamp.(float %523, float 0.000000e+00, float 1.000000e+00)
  %525 = fmul float %., %524
  %526 = fadd float %525, %197
  %527 = fmul float %.51, %524
  %528 = fadd float %527, %198
  %529 = fmul float %.52, %524
  %530 = fadd float %529, %199
  %531 = fmul float %78, %153
  %532 = fmul float %79, %154
  %533 = fadd float %531, %532
  %534 = fmul float %80, %155
  %535 = fadd float %533, %534
  %536 = fmul float %81, 1.000000e+00
  %537 = fadd float %535, %536
  %538 = fmul float %82, %153
  %539 = fmul float %83, %154
  %540 = fadd float %538, %539
  %541 = fmul float %84, %155
  %542 = fadd float %540, %541
  %543 = fmul float %85, 1.000000e+00
  %544 = fadd float %542, %543
  %545 = bitcast float %537 to i32
  %546 = bitcast float %544 to i32
  %547 = insertelement <2 x i32> undef, i32 %545, i32 0
  %548 = insertelement <2 x i32> %547, i32 %546, i32 1
  %549 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %548, <32 x i8> %89, <16 x i8> %91, i32 2)
  %550 = extractelement <4 x float> %549, i32 0
  %551 = extractelement <4 x float> %549, i32 1
  %552 = extractelement <4 x float> %549, i32 2
  %553 = extractelement <4 x float> %549, i32 3
  %554 = fmul float %550, %72
  %555 = fmul float %551, %73
  %556 = fmul float %552, %74
  %557 = fmul float %553, %75
  %558 = fsub float -0.000000e+00, %557
  %559 = fadd float 1.000000e+00, %558
  %560 = fmul float %526, %559
  %561 = fmul float %528, %559
  %562 = fmul float %530, %559
  %563 = fmul float %554, %557
  %564 = fadd float %563, %560
  %565 = fmul float %555, %557
  %566 = fadd float %565, %561
  %567 = fmul float %556, %557
  %568 = fadd float %567, %562
  %569 = fmul float %519, %559
  %570 = call i32 @llvm.SI.packf16(float %564, float %566)
  %571 = bitcast i32 %570 to float
  %572 = call i32 @llvm.SI.packf16(float %568, float %569)
  %573 = bitcast i32 %572 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %571, float %573, float %571, float %573)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840100
bf8c007f
c2000968
c2008969
bf8c007f
7e000201
d2820009
04000103
c2000915
c2008919
bf8c007f
7e000201
d282000b
04000109
c2000914
c2008918
bf8c007f
7e000201
d282000a
04000102
c0860310
c0c80520
bf8c007f
f0800f00
0064100a
bf8c0770
06002311
062800f3
06002512
062a00f3
10002b15
d2820000
04022914
080000f2
d2060800
02010100
7e025b00
10020101
d2060000
22010100
d0080000
02020080
d2000000
00020280
06022713
060202f3
102c0300
c2000931
bf8c007f
10002800
c2000935
bf8c007f
d2820000
04022a00
c2000939
bf8c007f
d2820004
04022c00
c2000930
bf8c007f
10002800
c2000934
bf8c007f
d2820000
04022a00
c2000938
bf8c007f
d2820003
04022c00
c2000932
bf8c007f
10002800
c2000936
bf8c007f
d2820000
04022a00
c200093a
bf8c007f
d2820005
04022c00
c2000933
bf8c007f
10002800
c2000937
bf8c007f
d2820006
04022a00
d28a0018
04160903
d28c0017
04160903
d28e0019
04160903
d288001a
04160903
d2060100
02010119
7e005500
7e0202ff
3fc00000
d2820019
04060117
d2820018
04060118
c086030c
c0c80518
bf8c007f
f0800700
00640318
c2000940
bf8c0070
10002800
c2000941
bf8c007f
d2820000
04000115
c2000942
bf8c007f
d2820000
04000116
c200094b
bf8c007f
06000000
d2060817
02010100
080e2ef2
c200094e
bf8c007f
10020e00
c2008946
bf8c007f
d2820001
04040317
d2060006
22010100
d2060808
02010106
081810f2
100c1800
c200094a
bf8c007f
d2820006
04180108
d0020000
02010100
d200000d
0001e480
d206000d
2201010d
7e1a110d
d10a0000
0201010d
d2000001
00020d01
c203094d
bf8c007f
100c0e06
c2038945
bf8c007f
d2820006
04180f17
101a1806
c2030949
bf8c007f
d282000d
04340d08
d2000006
00021b06
c203094c
bf8c007f
100e0e06
c2038944
bf8c007f
d2820007
041c0f17
10181806
c2030948
bf8c007f
d2820008
04300d08
d2000007
00021107
c0860308
c0c80510
bf8c007f
f0800100
0064080a
c200091e
c200891f
bf8c0070
7e140201
d2820008
04280108
7e105508
c2000917
c200891b
bf8c007f
7e140201
d2820009
04280109
10121109
c2000916
c200891a
bf8c007f
7e140201
d2820002
04280102
10041102
c2000920
bf8c007f
10140400
c2000924
bf8c007f
d282000a
042a1200
c2000928
bf8c007f
d282000a
042a1000
c200092c
bf8c007f
061c1400
c20d095d
7e1402ff
bf50c0d4
bf8c007f
d282000b
043a141a
c2000922
bf8c007f
101e0400
c2000926
bf8c007f
d282000f
043e1200
c200092a
bf8c007f
d282000f
043e1000
c200092e
bf8c007f
06141e00
c2000921
bf8c007f
101e0400
c2000925
bf8c007f
d282000f
043e1200
c2000929
bf8c007f
d282000f
043e1000
c200092d
bf8c007f
061e1e00
7e3002ff
bf610e50
d282000c
043e301a
c0860304
c0c80508
bf8c007f
f0a00100
0064180a
7e3202ff
3f798f60
d282001d
043a321a
7e32030a
7e34030b
7e36030c
7e38030d
7e34031d
7e3a02ff
3f41a8ec
d282001b
043e3a1a
f0a00100
00641919
bf8c0770
06303119
7e3202ff
3f7211ee
d282001d
043a321a
7e32030a
7e34030b
7e36030c
7e38030d
7e34031d
7e3a02ff
bf44d71b
d282001b
043e3a1a
f0a00100
00641919
bf8c0770
06303318
7e3202ff
bf5068d4
d282001d
043a321a
7e32030a
7e34030b
7e36030c
7e38030d
7e34031d
7e3a02ff
3f6a148a
d282001b
043e3a1a
f0a00100
00641919
bf8c0770
063e3318
08303ef6
1030311f
102e2f18
d00a0000
02010117
d2000017
0001e480
d2060017
22010117
7e2e1117
d1040000
02010117
102e0502
d2820017
045e1309
d2820017
045e1108
7e305b17
102e3109
c2030955
bf8c007f
082e2e06
10323102
c2030954
bf8c007f
08323206
10343319
d282001a
046a2f17
10303108
c2030956
bf8c007f
08303006
d282001a
046a3118
7e345b1a
102e3517
10323519
10282919
d2820014
04522b17
102a3518
d2820014
04522d15
d2060814
02010114
7e284f14
102020ff
44800000
0e202910
7e204b10
7e2202ff
3c23d70a
d00c0006
02022300
d2000011
0019e480
d2060011
22010111
7e221111
362222f2
10202310
c2030953
bf8c007f
10202006
c20d8967
c20e0966
c20e8965
c20f0964
c20f8963
c2100962
c2108961
c2110960
c211895c
c212095b
c212895a
c2130959
c2138958
bf8c007f
be982400
8998187e
103c3eff
3e800000
be982518
c0800300
c0c20500
7e2c021b
7e32021c
7e3a021d
7e38021e
7e2a021f
7e300220
7e360221
7e340222
7e260223
7e280224
7e220225
7e2e0226
7e240227
bf8c007f
89fe187e
bf880093
7e3c021a
7e4002ff
becc51e0
d282000c
043e411e
7e4002ff
bf7127fa
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
bf6dec6b
d282000c
043e411e
7e4002ff
bdc0e398
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
3e967720
d282000c
043e411e
7e4002ff
3eb09e84
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
3eea5988
d282000c
043e411e
7e4002ff
bf6a777e
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
3e8db498
d282000c
043e411e
7e4002ff
bec3fb24
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
bf79a12c
d282000c
043e411e
7e4002ff
3ee2ef78
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
bef28d4a
d282000c
043e411e
7e4002ff
3f099500
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
bed67e06
d282000c
043e411e
7e4002ff
be87aa08
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
3e437bc8
d282000c
043e411e
7e4002ff
3f4abee2
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
3f7f3fa8
d282000c
043e411e
7e4002ff
be77b198
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
3f495268
d282000c
043e411e
7e4002ff
3e4ca330
d282000b
043a411e
f0a00100
0064200a
bf8c0770
063e411f
7e4002ff
be106460
d282000c
043e411e
7e1e02ff
3e134898
d282000b
043a1f1e
f0a00100
00640a0a
bf8c0770
0614151f
103c14ff
3d800000
88fe187e
1014131d
d282000a
042a051c
d282000a
042a1119
06162d0a
1012131b
d2820002
0426051a
d2820002
040a1118
06142b02
f0800f00
0001080a
bf8c0770
101a2f09
1018290b
080418f2
061c271e
d2060000
22010100
d2060800
02010100
0600010e
d2060800
02010100
d2820006
04120106
100c0506
d2820006
041a190d
101a2508
d2820007
040e0107
100e0507
d2820007
041e190d
5e0c0d07
100e230a
d2820000
04160101
10000500
d2820000
04021907
10023d10
10020501
5e000300
f8001c0f
00060006
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL TEMP[0], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MOV OUT[0], TEMP[0]
  3: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800700
bf8c007f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[2]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    0.2500,     0.5000,     0.2126,     0.7152}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].xy, IN[0].zwww
  3: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
  4: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz
  5: MOV TEMP[1].xy, IN[1].xyyy
  6: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
  7: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz
  8: MOV TEMP[1].xy, IN[1].zwww
  9: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
 10: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz
 11: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx
 12: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[2].xxxx
 13: MOV TEMP[1].y, IMM[0].yyyy
 14: MUL TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx
 15: MOV TEMP[1].xy, TEMP[1].xyyy
 16: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
 17: MOV TEMP[0].x, TEMP[1].xxxx
 18: MOV TEMP[1].y, IMM[0].yyyy
 19: MUL TEMP[1].x, TEMP[0].yyyy, IMM[0].xxxx
 20: MOV TEMP[1].xy, TEMP[1].xyyy
 21: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
 22: MOV TEMP[0].y, TEMP[1].xxxx
 23: MOV TEMP[1].y, IMM[0].yyyy
 24: MUL TEMP[1].x, TEMP[0].zzzz, IMM[0].xxxx
 25: MOV TEMP[1].xy, TEMP[1].xyyy
 26: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
 27: MOV TEMP[0].z, TEMP[1].xxxx
 28: DP3 TEMP[0].x, TEMP[0].xyzz, IMM[1].xyzz
 29: MOV OUT[0], TEMP[0].xxxx
 30: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %28 = load <32 x i8> addrspace(2)* %27, !tbaa !0
  %29 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %39 = bitcast float %31 to i32
  %40 = bitcast float %32 to i32
  %41 = insertelement <2 x i32> undef, i32 %39, i32 0
  %42 = insertelement <2 x i32> %41, i32 %40, i32 1
  %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %24, <16 x i8> %26, i32 2)
  %44 = extractelement <4 x float> %43, i32 0
  %45 = extractelement <4 x float> %43, i32 1
  %46 = extractelement <4 x float> %43, i32 2
  %47 = bitcast float %33 to i32
  %48 = bitcast float %34 to i32
  %49 = insertelement <2 x i32> undef, i32 %47, i32 0
  %50 = insertelement <2 x i32> %49, i32 %48, i32 1
  %51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %24, <16 x i8> %26, i32 2)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = fadd float %44, %52
  %56 = fadd float %45, %53
  %57 = fadd float %46, %54
  %58 = bitcast float %35 to i32
  %59 = bitcast float %36 to i32
  %60 = insertelement <2 x i32> undef, i32 %58, i32 0
  %61 = insertelement <2 x i32> %60, i32 %59, i32 1
  %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %24, <16 x i8> %26, i32 2)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = fadd float %55, %63
  %67 = fadd float %56, %64
  %68 = fadd float %57, %65
  %69 = bitcast float %37 to i32
  %70 = bitcast float %38 to i32
  %71 = insertelement <2 x i32> undef, i32 %69, i32 0
  %72 = insertelement <2 x i32> %71, i32 %70, i32 1
  %73 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %24, <16 x i8> %26, i32 2)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = extractelement <4 x float> %73, i32 2
  %77 = fadd float %66, %74
  %78 = fadd float %67, %75
  %79 = fadd float %68, %76
  %80 = fmul float %77, 2.500000e-01
  %81 = fmul float %78, 2.500000e-01
  %82 = fmul float %79, 2.500000e-01
  %83 = fmul float %80, %22
  %84 = fmul float %81, %22
  %85 = fmul float %82, %22
  %86 = fmul float %83, 2.500000e-01
  %87 = bitcast float %86 to i32
  %88 = bitcast float 5.000000e-01 to i32
  %89 = insertelement <2 x i32> undef, i32 %87, i32 0
  %90 = insertelement <2 x i32> %89, i32 %88, i32 1
  %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %28, <16 x i8> %30, i32 2)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = fmul float %84, 2.500000e-01
  %94 = bitcast float %93 to i32
  %95 = bitcast float 5.000000e-01 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %28, <16 x i8> %30, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = fmul float %85, 2.500000e-01
  %101 = bitcast float %100 to i32
  %102 = bitcast float 5.000000e-01 to i32
  %103 = insertelement <2 x i32> undef, i32 %101, i32 0
  %104 = insertelement <2 x i32> %103, i32 %102, i32 1
  %105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %28, <16 x i8> %30, i32 2)
  %106 = extractelement <4 x float> %105, i32 0
  %107 = fmul float %92, 0x3FCB367A00000000
  %108 = fmul float %99, 0x3FE6E2EB20000000
  %109 = fadd float %108, %107
  %110 = fmul float %106, 0x3FB27BB300000000
  %111 = fadd float %109, %110
  %112 = call i32 @llvm.SI.packf16(float %111, float %111)
  %113 = bitcast i32 %112 to float
  %114 = call i32 @llvm.SI.packf16(float %111, float %111)
  %115 = bitcast i32 %114 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %113, float %115, float %113, float %115)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0300
c80d0301
c8080200
c8090201
c0840300
c0c60500
bf8c007f
f0800700
00430202
c8180100
c8190101
c8140000
c8150001
f0800700
00430505
bf8c0770
06160706
c8240500
c8250501
c8200400
c8210401
f0800700
00430808
bf8c0770
061c130b
c8300700
c8310701
c82c0600
c82d0601
f0800700
00430b0b
bf8c0770
0600190e
100000ff
3e800000
c0840100
bf8c007f
c2000908
bf8c007f
10000000
100000ff
3e800000
7e0202f0
c0840304
c0c60508
bf8c007f
f0800100
00430e00
061e0505
061e110f
061e170f
101e1eff
3e800000
101e1e00
10001eff
3e800000
f0800100
00430f00
bf8c0770
101e1eff
3e59b3d0
7e2002ff
3f371759
d282000e
043e210e
06040907
06041502
06041b02
100404ff
3e800000
10040400
100004ff
3e800000
f0800100
00430000
7e0202ff
3d93dd98
bf8c0770
d2820000
043a0300
5e000100
f8001c0f
00000000
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..1]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  3: ADD TEMP[2].xy, TEMP[1].xyyy, CONST[1].xyyy
  4: ADD TEMP[3].xy, TEMP[1].xyyy, -CONST[1].xyyy
  5: MOV TEMP[2].zw, TEMP[3].yyxy
  6: ADD TEMP[3].xy, TEMP[1].xyyy, CONST[1].zwww
  7: ADD TEMP[1].xy, TEMP[1].xyyy, -CONST[1].zwww
  8: MOV TEMP[3].zw, TEMP[1].yyxy
  9: MOV OUT[1], TEMP[2]
 10: MOV OUT[2], TEMP[3]
 11: MOV OUT[0], TEMP[0]
 12: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0
  %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %5)
  %22 = extractelement <4 x float> %21, i32 0
  %23 = extractelement <4 x float> %21, i32 1
  %24 = extractelement <4 x float> %21, i32 2
  %25 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %5)
  %28 = extractelement <4 x float> %27, i32 0
  %29 = extractelement <4 x float> %27, i32 1
  %30 = fmul float %28, %11
  %31 = fadd float %30, %13
  %32 = fmul float %29, %12
  %33 = fadd float %32, %14
  %34 = fadd float %31, %15
  %35 = fadd float %33, %16
  %36 = fsub float -0.000000e+00, %15
  %37 = fadd float %31, %36
  %38 = fsub float -0.000000e+00, %16
  %39 = fadd float %33, %38
  %40 = fadd float %31, %17
  %41 = fadd float %33, %18
  %42 = fsub float -0.000000e+00, %17
  %43 = fadd float %31, %42
  %44 = fsub float -0.000000e+00, %18
  %45 = fadd float %33, %44
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %34, float %35, float %37, float %39)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float %43, float %45)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %22, float %23, float %24, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020200
c0800100
bf8c0070
c2020101
c2028103
bf8c007f
7e020205
d2820001
04040903
c2020105
bf8c007f
0a0c0204
c2028100
c2040102
bf8c007f
7e0e0208
d2820002
041c0b02
c2028104
bf8c007f
0a060405
06080204
060a0405
f800020f
06030405
c2020107
bf8c000f
0a060204
c2000106
bf8c007f
0a080400
06020204
06040400
f800021f
03040102
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL IN[3], GENERIC[22], PERSPECTIVE
DCL IN[4], GENERIC[23], PERSPECTIVE
DCL IN[5], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[5..18]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    0.5000,     0.2500,     0.4545,     2.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].w, TEMP[0].wwww
  3: ADD TEMP[2].xy, IN[0].xyyy, CONST[16].xyyy
  4: MOV TEMP[2].xy, TEMP[2].xyyy
  5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
  6: LRP TEMP[0].xyz, CONST[16].zzzz, TEMP[2].xyzz, TEMP[0].xyzz
  7: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[16].wwww
  8: MOV TEMP[2].y, IMM[0].xxxx
  9: MUL TEMP[2].x, TEMP[0].xxxx, IMM[0].yyyy
 10: MOV TEMP[2].xy, TEMP[2].xyyy
 11: TEX TEMP[2].x, TEMP[2], SAMP[4], 2D
 12: MOV TEMP[0].x, TEMP[2].xxxx
 13: MOV TEMP[2].y, IMM[0].xxxx
 14: MUL TEMP[2].x, TEMP[0].yyyy, IMM[0].yyyy
 15: MOV TEMP[2].xy, TEMP[2].xyyy
 16: TEX TEMP[2].x, TEMP[2], SAMP[4], 2D
 17: MOV TEMP[0].y, TEMP[2].xxxx
 18: MOV TEMP[2].y, IMM[0].xxxx
 19: MUL TEMP[2].x, TEMP[0].zzzz, IMM[0].yyyy
 20: MOV TEMP[2].xy, TEMP[2].xyyy
 21: TEX TEMP[2].x, TEMP[2], SAMP[4], 2D
 22: MOV TEMP[0].z, TEMP[2].xxxx
 23: MOV TEMP[2].xy, IN[0].zwww
 24: TEX TEMP[2], TEMP[2], SAMP[1], 2D
 25: MUL TEMP[2].xyz, TEMP[2], IMM[0].xxxx
 26: MOV TEMP[3].xy, IN[3].zwww
 27: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D
 28: MIN TEMP[3].x, TEMP[3].xxxx, CONST[7].zzzz
 29: MAX TEMP[3].x, TEMP[3].xxxx, CONST[7].yyyy
 30: LRP TEMP[0].xyz, CONST[7].xxxx, TEMP[0].xyzz, TEMP[3].xxxx
 31: MAD_SAT TEMP[0].xyz, TEMP[2].xyzz, CONST[5].xyzz, TEMP[0].xyzz
 32: POW TEMP[2].x, TEMP[0].xxxx, IMM[0].zzzz
 33: POW TEMP[3].x, TEMP[0].yyyy, IMM[0].zzzz
 34: MOV TEMP[2].y, TEMP[3].xxxx
 35: POW TEMP[3].x, TEMP[0].zzzz, IMM[0].zzzz
 36: MOV TEMP[2].z, TEMP[3].xxxx
 37: MOV TEMP[3].y, IMM[0].xxxx
 38: DP3_SAT TEMP[4].x, TEMP[2].xyzz, IMM[1].xyzz
 39: MOV TEMP[3].x, TEMP[4].xxxx
 40: MOV TEMP[3].xy, TEMP[3].xyyy
 41: TEX TEMP[3], TEMP[3], SAMP[3], 2D
 42: ADD TEMP[4].xyz, CONST[13].xyzz, -TEMP[2].xyzz
 43: ADD TEMP[2].xyz, CONST[14].xyzz, -TEMP[2].xyzz
 44: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz
 45: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
 46: MOV TEMP[4].y, TEMP[2].xxxx
 47: MAD_SAT TEMP[2].xy, TEMP[4].xyyy, CONST[15].xyyy, CONST[15].zwww
 48: MUL TEMP[4].x, TEMP[3].wwww, TEMP[2].xxxx
 49: MUL_SAT TEMP[2].x, TEMP[4].xxxx, TEMP[2].yyyy
 50: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[3].xyzz, TEMP[0].xyzz
 51: DP3 TEMP[0].x, TEMP[1].xyzz, IMM[1].xyzz
 52: LRP TEMP[0].xyz, CONST[6].zzzz, TEMP[1].xyzz, TEMP[0].xxxx
 53: MUL TEMP[1].xyz, TEMP[0].xyzz, IMM[0].wwww
 54: MOV TEMP[0].xy, IN[1].xyyy
 55: TEX TEMP[0], TEMP[0], SAMP[2], 2D
 56: MOV TEMP[2].xy, IN[1].zwww
 57: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 58: ADD TEMP[0], TEMP[0], TEMP[2]
 59: MOV TEMP[2].xy, IN[2].xyyy
 60: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 61: ADD TEMP[0], TEMP[0], TEMP[2]
 62: MOV TEMP[2].xy, IN[2].zwww
 63: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 64: ADD TEMP[0], TEMP[0], TEMP[2]
 65: MOV TEMP[2].xy, IN[4].xyyy
 66: TEX TEMP[2], TEMP[2], SAMP[2], 2D
 67: MUL TEMP[2], TEMP[2], CONST[12].xxxx
 68: MAD TEMP[0].xyz, TEMP[0], CONST[12].yyyy, TEMP[2]
 69: POW TEMP[2].x, TEMP[1].xxxx, CONST[6].wwww
 70: POW TEMP[2].y, TEMP[1].yyyy, CONST[6].wwww
 71: POW TEMP[2].z, TEMP[1].zzzz, CONST[6].wwww
 72: MUL_SAT TEMP[2].xyz, TEMP[2].xyzz, IMM[0].xxxx
 73: MAD TEMP[1].xyz, CONST[8].xyzz, TEMP[0].xxxx, TEMP[2].xyzz
 74: MAD TEMP[1].xyz, CONST[9].xyzz, TEMP[0].yyyy, TEMP[1].xyzz
 75: MAD TEMP[1].xyz, CONST[10].xyzz, TEMP[0].zzzz, TEMP[1].xyzz
 76: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[5].xyzz
 77: DP2 TEMP[0].x, IN[3].xyyy, IN[3].xyyy
 78: MAD_SAT TEMP[0].x, TEMP[0].xxxx, CONST[6].xxxx, CONST[6].yyyy
 79: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[0].xxxx
 80: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[0].xxxx
 81: MOV OUT[0], TEMP[1]
 82: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 264)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 268)
  %57 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %58 = load <32 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
  %63 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
  %67 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
  %71 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
  %75 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %91 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %92 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %93 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %94 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %95 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %96 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %97 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %98 = bitcast float %77 to i32
  %99 = bitcast float %78 to i32
  %100 = insertelement <2 x i32> undef, i32 %98, i32 0
  %101 = insertelement <2 x i32> %100, i32 %99, i32 1
  %102 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %101, <32 x i8> %58, <16 x i8> %60, i32 2)
  %103 = extractelement <4 x float> %102, i32 0
  %104 = extractelement <4 x float> %102, i32 1
  %105 = extractelement <4 x float> %102, i32 2
  %106 = extractelement <4 x float> %102, i32 3
  %107 = fadd float %77, %53
  %108 = fadd float %78, %54
  %109 = bitcast float %107 to i32
  %110 = bitcast float %108 to i32
  %111 = insertelement <2 x i32> undef, i32 %109, i32 0
  %112 = insertelement <2 x i32> %111, i32 %110, i32 1
  %113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %58, <16 x i8> %60, i32 2)
  %114 = extractelement <4 x float> %113, i32 0
  %115 = extractelement <4 x float> %113, i32 1
  %116 = extractelement <4 x float> %113, i32 2
  %117 = call float @llvm.AMDGPU.lrp(float %55, float %114, float %103)
  %118 = call float @llvm.AMDGPU.lrp(float %55, float %115, float %104)
  %119 = call float @llvm.AMDGPU.lrp(float %55, float %116, float %105)
  %120 = fmul float %117, %56
  %121 = fmul float %118, %56
  %122 = fmul float %119, %56
  %123 = fmul float %120, 2.500000e-01
  %124 = bitcast float %123 to i32
  %125 = bitcast float 5.000000e-01 to i32
  %126 = insertelement <2 x i32> undef, i32 %124, i32 0
  %127 = insertelement <2 x i32> %126, i32 %125, i32 1
  %128 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %127, <32 x i8> %74, <16 x i8> %76, i32 2)
  %129 = extractelement <4 x float> %128, i32 0
  %130 = fmul float %121, 2.500000e-01
  %131 = bitcast float %130 to i32
  %132 = bitcast float 5.000000e-01 to i32
  %133 = insertelement <2 x i32> undef, i32 %131, i32 0
  %134 = insertelement <2 x i32> %133, i32 %132, i32 1
  %135 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %134, <32 x i8> %74, <16 x i8> %76, i32 2)
  %136 = extractelement <4 x float> %135, i32 0
  %137 = fmul float %122, 2.500000e-01
  %138 = bitcast float %137 to i32
  %139 = bitcast float 5.000000e-01 to i32
  %140 = insertelement <2 x i32> undef, i32 %138, i32 0
  %141 = insertelement <2 x i32> %140, i32 %139, i32 1
  %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %141, <32 x i8> %74, <16 x i8> %76, i32 2)
  %143 = extractelement <4 x float> %142, i32 0
  %144 = bitcast float %79 to i32
  %145 = bitcast float %80 to i32
  %146 = insertelement <2 x i32> undef, i32 %144, i32 0
  %147 = insertelement <2 x i32> %146, i32 %145, i32 1
  %148 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %62, <16 x i8> %64, i32 2)
  %149 = extractelement <4 x float> %148, i32 0
  %150 = extractelement <4 x float> %148, i32 1
  %151 = extractelement <4 x float> %148, i32 2
  %152 = fmul float %149, 5.000000e-01
  %153 = fmul float %150, 5.000000e-01
  %154 = fmul float %151, 5.000000e-01
  %155 = bitcast float %91 to i32
  %156 = bitcast float %92 to i32
  %157 = insertelement <2 x i32> undef, i32 %155, i32 0
  %158 = insertelement <2 x i32> %157, i32 %156, i32 1
  %159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %62, <16 x i8> %64, i32 2)
  %160 = extractelement <4 x float> %159, i32 0
  %161 = fcmp uge float %160, %31
  %162 = select i1 %161, float %31, float %160
  %163 = fcmp uge float %162, %30
  %164 = select i1 %163, float %162, float %30
  %165 = call float @llvm.AMDGPU.lrp(float %29, float %129, float %164)
  %166 = call float @llvm.AMDGPU.lrp(float %29, float %136, float %164)
  %167 = call float @llvm.AMDGPU.lrp(float %29, float %143, float %164)
  %168 = fmul float %152, %22
  %169 = fadd float %168, %165
  %170 = fmul float %153, %23
  %171 = fadd float %170, %166
  %172 = fmul float %154, %24
  %173 = fadd float %172, %167
  %174 = call float @llvm.AMDIL.clamp.(float %169, float 0.000000e+00, float 1.000000e+00)
  %175 = call float @llvm.AMDIL.clamp.(float %171, float 0.000000e+00, float 1.000000e+00)
  %176 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00)
  %177 = call float @llvm.pow.f32(float %174, float 0x3FDD1745C0000000)
  %178 = call float @llvm.pow.f32(float %175, float 0x3FDD1745C0000000)
  %179 = call float @llvm.pow.f32(float %176, float 0x3FDD1745C0000000)
  %180 = fmul float %177, 0x3FCB367A00000000
  %181 = fmul float %178, 0x3FE6E2EB20000000
  %182 = fadd float %181, %180
  %183 = fmul float %179, 0x3FB27BB300000000
  %184 = fadd float %182, %183
  %185 = call float @llvm.AMDIL.clamp.(float %184, float 0.000000e+00, float 1.000000e+00)
  %186 = bitcast float %185 to i32
  %187 = bitcast float 5.000000e-01 to i32
  %188 = insertelement <2 x i32> undef, i32 %186, i32 0
  %189 = insertelement <2 x i32> %188, i32 %187, i32 1
  %190 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %189, <32 x i8> %70, <16 x i8> %72, i32 2)
  %191 = extractelement <4 x float> %190, i32 0
  %192 = extractelement <4 x float> %190, i32 1
  %193 = extractelement <4 x float> %190, i32 2
  %194 = extractelement <4 x float> %190, i32 3
  %195 = fsub float -0.000000e+00, %177
  %196 = fadd float %43, %195
  %197 = fsub float -0.000000e+00, %178
  %198 = fadd float %44, %197
  %199 = fsub float -0.000000e+00, %179
  %200 = fadd float %45, %199
  %201 = fsub float -0.000000e+00, %177
  %202 = fadd float %46, %201
  %203 = fsub float -0.000000e+00, %178
  %204 = fadd float %47, %203
  %205 = fsub float -0.000000e+00, %179
  %206 = fadd float %48, %205
  %207 = fmul float %196, %196
  %208 = fmul float %198, %198
  %209 = fadd float %208, %207
  %210 = fmul float %200, %200
  %211 = fadd float %209, %210
  %212 = fmul float %202, %202
  %213 = fmul float %204, %204
  %214 = fadd float %213, %212
  %215 = fmul float %206, %206
  %216 = fadd float %214, %215
  %217 = fmul float %211, %49
  %218 = fadd float %217, %51
  %219 = fmul float %216, %50
  %220 = fadd float %219, %52
  %221 = call float @llvm.AMDIL.clamp.(float %218, float 0.000000e+00, float 1.000000e+00)
  %222 = call float @llvm.AMDIL.clamp.(float %220, float 0.000000e+00, float 1.000000e+00)
  %223 = fmul float %194, %221
  %224 = fmul float %223, %222
  %225 = call float @llvm.AMDIL.clamp.(float %224, float 0.000000e+00, float 1.000000e+00)
  %226 = call float @llvm.AMDGPU.lrp(float %225, float %191, float %174)
  %227 = call float @llvm.AMDGPU.lrp(float %225, float %192, float %175)
  %228 = call float @llvm.AMDGPU.lrp(float %225, float %193, float %176)
  %229 = fmul float %226, 0x3FCB367A00000000
  %230 = fmul float %227, 0x3FE6E2EB20000000
  %231 = fadd float %230, %229
  %232 = fmul float %228, 0x3FB27BB300000000
  %233 = fadd float %231, %232
  %234 = call float @llvm.AMDGPU.lrp(float %27, float %226, float %233)
  %235 = call float @llvm.AMDGPU.lrp(float %27, float %227, float %233)
  %236 = call float @llvm.AMDGPU.lrp(float %27, float %228, float %233)
  %237 = fmul float %234, 2.000000e+00
  %238 = fmul float %235, 2.000000e+00
  %239 = fmul float %236, 2.000000e+00
  %240 = bitcast float %81 to i32
  %241 = bitcast float %82 to i32
  %242 = insertelement <2 x i32> undef, i32 %240, i32 0
  %243 = insertelement <2 x i32> %242, i32 %241, i32 1
  %244 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %66, <16 x i8> %68, i32 2)
  %245 = extractelement <4 x float> %244, i32 0
  %246 = extractelement <4 x float> %244, i32 1
  %247 = extractelement <4 x float> %244, i32 2
  %248 = bitcast float %83 to i32
  %249 = bitcast float %84 to i32
  %250 = insertelement <2 x i32> undef, i32 %248, i32 0
  %251 = insertelement <2 x i32> %250, i32 %249, i32 1
  %252 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %251, <32 x i8> %66, <16 x i8> %68, i32 2)
  %253 = extractelement <4 x float> %252, i32 0
  %254 = extractelement <4 x float> %252, i32 1
  %255 = extractelement <4 x float> %252, i32 2
  %256 = fadd float %245, %253
  %257 = fadd float %246, %254
  %258 = fadd float %247, %255
  %259 = bitcast float %85 to i32
  %260 = bitcast float %86 to i32
  %261 = insertelement <2 x i32> undef, i32 %259, i32 0
  %262 = insertelement <2 x i32> %261, i32 %260, i32 1
  %263 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %262, <32 x i8> %66, <16 x i8> %68, i32 2)
  %264 = extractelement <4 x float> %263, i32 0
  %265 = extractelement <4 x float> %263, i32 1
  %266 = extractelement <4 x float> %263, i32 2
  %267 = fadd float %256, %264
  %268 = fadd float %257, %265
  %269 = fadd float %258, %266
  %270 = bitcast float %87 to i32
  %271 = bitcast float %88 to i32
  %272 = insertelement <2 x i32> undef, i32 %270, i32 0
  %273 = insertelement <2 x i32> %272, i32 %271, i32 1
  %274 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %273, <32 x i8> %66, <16 x i8> %68, i32 2)
  %275 = extractelement <4 x float> %274, i32 0
  %276 = extractelement <4 x float> %274, i32 1
  %277 = extractelement <4 x float> %274, i32 2
  %278 = fadd float %267, %275
  %279 = fadd float %268, %276
  %280 = fadd float %269, %277
  %281 = bitcast float %93 to i32
  %282 = bitcast float %94 to i32
  %283 = insertelement <2 x i32> undef, i32 %281, i32 0
  %284 = insertelement <2 x i32> %283, i32 %282, i32 1
  %285 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %284, <32 x i8> %66, <16 x i8> %68, i32 2)
  %286 = extractelement <4 x float> %285, i32 0
  %287 = extractelement <4 x float> %285, i32 1
  %288 = extractelement <4 x float> %285, i32 2
  %289 = fmul float %286, %41
  %290 = fmul float %287, %41
  %291 = fmul float %288, %41
  %292 = fmul float %278, %42
  %293 = fadd float %292, %289
  %294 = fmul float %279, %42
  %295 = fadd float %294, %290
  %296 = fmul float %280, %42
  %297 = fadd float %296, %291
  %298 = call float @llvm.pow.f32(float %237, float %28)
  %299 = call float @llvm.pow.f32(float %238, float %28)
  %300 = call float @llvm.pow.f32(float %239, float %28)
  %301 = fmul float %298, 5.000000e-01
  %302 = fmul float %299, 5.000000e-01
  %303 = fmul float %300, 5.000000e-01
  %304 = call float @llvm.AMDIL.clamp.(float %301, float 0.000000e+00, float 1.000000e+00)
  %305 = call float @llvm.AMDIL.clamp.(float %302, float 0.000000e+00, float 1.000000e+00)
  %306 = call float @llvm.AMDIL.clamp.(float %303, float 0.000000e+00, float 1.000000e+00)
  %307 = fmul float %32, %293
  %308 = fadd float %307, %304
  %309 = fmul float %33, %293
  %310 = fadd float %309, %305
  %311 = fmul float %34, %293
  %312 = fadd float %311, %306
  %313 = fmul float %35, %295
  %314 = fadd float %313, %308
  %315 = fmul float %36, %295
  %316 = fadd float %315, %310
  %317 = fmul float %37, %295
  %318 = fadd float %317, %312
  %319 = fmul float %38, %297
  %320 = fadd float %319, %314
  %321 = fmul float %39, %297
  %322 = fadd float %321, %316
  %323 = fmul float %40, %297
  %324 = fadd float %323, %318
  %325 = fmul float %320, %95
  %326 = fmul float %322, %96
  %327 = fmul float %324, %97
  %328 = fmul float %89, %89
  %329 = fmul float %90, %90
  %330 = fadd float %328, %329
  %331 = fmul float %330, %25
  %332 = fadd float %331, %26
  %333 = call float @llvm.AMDIL.clamp.(float %332, float 0.000000e+00, float 1.000000e+00)
  %334 = fmul float %325, %333
  %335 = fmul float %326, %333
  %336 = fmul float %327, %333
  %337 = fmul float %334, %333
  %338 = fmul float %335, %333
  %339 = fmul float %336, %333
  %340 = call i32 @llvm.SI.packf16(float %337, float %338)
  %341 = bitcast i32 %340 to float
  %342 = call i32 @llvm.SI.packf16(float %339, float %106)
  %343 = bitcast i32 %342 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %341, float %343, float %341, float %343)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0f00
c80d0f01
c8080e00
c8090e01
c0860304
c0c80508
bf8c007f
f0800100
00640202
c0840100
bf8c0070
c200091e
bf8c007f
d00c0018
02000102
7e060200
d2000002
00620702
c200091d
bf8c007f
d00c0018
02000102
7e060200
d2000002
00620503
c200091c
bf8c007f
d2080003
020000f2
100c0503
c8200100
c8210101
c81c0000
c81d0001
c08c0300
c0ce0500
bf8c007f
f0800f00
00c70207
c2008942
bf8c0070
d208000a
020002f2
1012070a
c2038941
bf8c007f
06181007
c2038940
bf8c007f
06160e07
f0800700
00c70d0b
bf8c0770
d2820007
04261c01
c2038943
bf8c007f
100e0e07
100e0eff
3e800000
7e1002f0
c08c0310
c0ce0520
bf8c007f
f0800100
00c70907
bf8c0770
d2820009
041a1200
c8300300
c8310301
c82c0200
c82d0201
f0800700
0064110b
bf8c0770
101624f0
c2060915
bf8c007f
d2820009
0424190b
d2060809
02010109
7e164f09
0e1616ff
3ee8ba2e
7e184b0b
c2060935
bf8c007f
0828180c
1016050a
d282000b
042e1a01
10161607
100e16ff
3e800000
f0800100
00c70b07
bf8c0770
d282000b
041a1600
102022f0
c2060914
bf8c007f
d282000b
042c1910
d206080b
0201010b
7e204f0b
0e2020ff
3ee8ba2e
7e204b10
c2060934
bf8c007f
082a200c
102a2b15
d2820014
04562914
1014090a
d282000a
042a1e01
10141407
100e14ff
3e800000
f0800100
00c70a07
bf8c0770
d2820006
041a1400
101426f0
c2000916
bf8c007f
d2820006
0418010a
d2060806
02010106
7e144f06
0e1414ff
3ee8ba2e
7e1c4b0a
c2000936
bf8c007f
08141c00
d282000a
0452150a
c200093c
c200893e
bf8c007f
7e1a0201
d282000a
0434010a
d206080f
0201010a
101420ff
3e59b3d0
7e1a02ff
3f371759
d2820011
042a1b0c
7e1402ff
3d93dd98
d2820011
0446150e
d2060807
02010111
c086030c
c0c80518
bf8c007f
f0800f00
00641107
bf8c0770
100e1f14
c2000939
bf8c007f
08101800
c2000938
bf8c007f
08182000
1018190c
d2820008
04321108
c200093a
bf8c007f
08181c00
d2820008
0422190c
c200093d
c200893f
bf8c007f
7e180201
d2820008
04300108
d2060808
02010108
100e1107
d2060807
02010107
08180ef2
1010130c
d2820009
04222507
1010170c
d2820008
04222307
101610ff
3e59b3d0
d282000b
042e1b09
100c0d0c
d2820006
041a2707
d2820007
042e1506
c200091a
bf8c007f
d208000a
020000f2
100e0f0a
d2820009
041e1200
06121309
7e124f09
c200891b
bf8c007f
0e121201
7e124b09
101212f0
d2060809
02010109
c82c0700
c82d0701
c8280600
c8290601
c0860308
c0c80510
bf8c007f
f0800700
00640a0a
c8380500
c8390501
c8340400
c8350401
f0800700
00640e0d
bf8c0770
061a150e
c8480900
c8490901
c8440800
c8450801
f0800700
00641111
bf8c0770
061a230d
c8540b00
c8550b01
c8500a00
c8510a01
f0800700
00641414
bf8c0770
061a290d
c8601100
c8611101
c85c1000
c85d1001
f0800700
00641717
c2010930
bf8c0070
10342e02
c2018931
bf8c007f
d282000d
0468070d
c2020921
bf8c007f
d282001a
04261a04
0612170f
06122509
06122b09
10363002
d2820009
046c0709
c2020925
bf8c007f
d282001a
046a1204
06141910
0614270a
06142d0a
10163202
d282000a
042c070a
c2010929
bf8c007f
d282000b
046a1402
c8301500
c8311501
1018190b
c82c0c00
c82d0c01
c8380d00
c8390d01
101c1d0e
d282000b
043a170b
c2010918
c2018919
bf8c007f
7e1c0203
d282000b
0438050b
d206080b
0201010b
1018170c
1018170c
d2820008
041e1000
06101108
7e104f08
0e101001
7e104b08
101010f0
d2060808
02010108
c2010920
bf8c007f
d2820008
04221a02
c2010924
bf8c007f
d2820008
04221202
c2010928
bf8c007f
d2820008
04221402
c8381400
c8391401
10101d08
10101708
10101708
5e101908
d2820006
041e0c00
060c0d06
7e0c4f06
0e0c0c01
7e0c4b06
100c0cf0
d2060806
02010106
c2000922
bf8c007f
d2820006
041a1a00
c2000926
bf8c007f
d2820006
041a1200
c200092a
bf8c007f
d2820006
041a1400
c81c1600
c81d1601
10000f06
10001700
10001700
5e000b00
f8001c0f
00080008
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..14]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.5000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  3: MAD TEMP[2].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww
  4: MOV TEMP[1].zw, TEMP[2].yyxy
  5: ADD TEMP[3].xy, TEMP[1].xyyy, CONST[2].xyyy
  6: ADD TEMP[4].xy, TEMP[1].xyyy, -CONST[2].xyyy
  7: MOV TEMP[3].zw, TEMP[4].yyxy
  8: ADD TEMP[4].xy, TEMP[1].xyyy, CONST[2].zwww
  9: ADD TEMP[5].xy, TEMP[1].xyyy, -CONST[2].zwww
 10: MOV TEMP[4].zw, TEMP[5].yyxy
 11: MAD TEMP[5].xy, TEMP[1].xyyy, CONST[3].xyyy, CONST[3].zwww
 12: ADD TEMP[2].xy, TEMP[2].xyyy, IMM[0].yzzz
 13: MOV TEMP[5].zw, TEMP[2].yyxy
 14: LRP TEMP[2], TEMP[1].yyyy, CONST[4], CONST[5]
 15: LRP TEMP[2].xyz, TEMP[2].wwww, TEMP[2].xyzz, CONST[14].xyzz
 16: MOV TEMP[2].xyz, TEMP[2].xyzx
 17: MOV OUT[1], TEMP[1]
 18: MOV OUT[2], TEMP[3]
 19: MOV OUT[3], TEMP[4]
 20: MOV OUT[4], TEMP[5]
 21: MOV OUT[5], TEMP[1].xyxy
 22: MOV OUT[6], TEMP[2]
 23: MOV OUT[0], TEMP[0]
 24: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %38 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0
  %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %5)
  %41 = extractelement <4 x float> %40, i32 0
  %42 = extractelement <4 x float> %40, i32 1
  %43 = extractelement <4 x float> %40, i32 2
  %44 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0
  %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %5)
  %47 = extractelement <4 x float> %46, i32 0
  %48 = extractelement <4 x float> %46, i32 1
  %49 = fmul float %47, %11
  %50 = fadd float %49, %13
  %51 = fmul float %48, %12
  %52 = fadd float %51, %14
  %53 = fmul float %47, %15
  %54 = fadd float %53, %17
  %55 = fmul float %48, %16
  %56 = fadd float %55, %18
  %57 = fadd float %50, %19
  %58 = fadd float %52, %20
  %59 = fsub float -0.000000e+00, %19
  %60 = fadd float %50, %59
  %61 = fsub float -0.000000e+00, %20
  %62 = fadd float %52, %61
  %63 = fadd float %50, %21
  %64 = fadd float %52, %22
  %65 = fsub float -0.000000e+00, %21
  %66 = fadd float %50, %65
  %67 = fsub float -0.000000e+00, %22
  %68 = fadd float %52, %67
  %69 = fmul float %50, %23
  %70 = fadd float %69, %25
  %71 = fmul float %52, %24
  %72 = fadd float %71, %26
  %73 = fadd float %54, 5.000000e-01
  %74 = fadd float %56, 0.000000e+00
  %75 = call float @llvm.AMDGPU.lrp(float %52, float %27, float %31)
  %76 = call float @llvm.AMDGPU.lrp(float %52, float %28, float %32)
  %77 = call float @llvm.AMDGPU.lrp(float %52, float %29, float %33)
  %78 = call float @llvm.AMDGPU.lrp(float %52, float %30, float %34)
  %79 = call float @llvm.AMDGPU.lrp(float %78, float %75, float %35)
  %80 = call float @llvm.AMDGPU.lrp(float %78, float %76, float %36)
  %81 = call float @llvm.AMDGPU.lrp(float %78, float %77, float %37)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %50, float %52, float %54, float %56)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %57, float %58, float %60, float %62)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %63, float %64, float %66, float %68)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %70, float %72, float %73, float %74)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %50, float %52, float %50, float %52)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %79, float %80, float %81, float %78)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020400
c0800100
bf8c0070
c2020105
c2028107
bf8c007f
7e020205
d2820002
04040905
c2020104
c2028106
bf8c007f
7e020205
d2820003
04040904
c2020101
c2028103
bf8c007f
7e020205
d2820001
04040905
c2020100
c2028102
bf8c007f
7e100205
d2820004
04200904
f800020f
02030104
c2020109
bf8c000f
0a0a0204
c2028108
bf8c007f
0a0c0805
060e0204
06100805
f800021f
05060708
c202010b
bf8c000f
0a0a0204
c202810a
bf8c007f
0a0c0805
060e0204
06100805
f800022f
05060708
c202010d
c202810f
bf8c000f
7e0a0205
d2820005
04140901
c202010c
c202810e
bf8c007f
7e0c0205
d2820006
04180904
06040480
060606f0
f800023f
02030506
f800024f
01040104
bf8c070f
080402f2
c2020116
bf8c007f
10060404
c2020112
bf8c007f
d2820005
040c0901
c2020117
bf8c007f
10060404
c2020113
bf8c007f
d2820003
040c0901
080806f2
c202013a
bf8c007f
100c0804
d2820005
041a0b03
c2020115
bf8c007f
100c0404
c2020111
bf8c007f
d2820006
04180901
c2020139
bf8c007f
100e0804
d2820006
041e0d03
c2020114
bf8c007f
10040404
c2020110
bf8c007f
d2820001
04080901
c2000138
bf8c007f
10040800
d2820001
040a0303
f800025f
03050601
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
IMM[1] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].w, TEMP[0].wwww
  3: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[0].xyzz
  4: LRP TEMP[0].xyz, IN[0].wwww, TEMP[0].xyzz, TEMP[2].xxxx
  5: SLT TEMP[2].x, IN[1].yyyy, CONST[1].xxxx
  6: F2I TEMP[2].x, -TEMP[2]
  7: UIF TEMP[2].xxxx :0
  8:   MOV TEMP[2].xyz, IMM[0].wwww
  9: ELSE :0
 10:   MOV TEMP[2].xyz, IMM[1].xxxx
 11: ENDIF
 12: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz
 13: MUL TEMP[0], TEMP[1], IN[0]
 14: MOV OUT[0], TEMP[0]
 15: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %33 = bitcast float %31 to i32
  %34 = bitcast float %32 to i32
  %35 = insertelement <2 x i32> undef, i32 %33, i32 0
  %36 = insertelement <2 x i32> %35, i32 %34, i32 1
  %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %24, <16 x i8> %26, i32 2)
  %38 = extractelement <4 x float> %37, i32 0
  %39 = extractelement <4 x float> %37, i32 1
  %40 = extractelement <4 x float> %37, i32 2
  %41 = extractelement <4 x float> %37, i32 3
  %42 = fmul float %38, 0x3FCB367A00000000
  %43 = fmul float %39, 0x3FE6E2EB20000000
  %44 = fadd float %43, %42
  %45 = fmul float %40, 0x3FB27BB300000000
  %46 = fadd float %44, %45
  %47 = call float @llvm.AMDGPU.lrp(float %30, float %38, float %46)
  %48 = call float @llvm.AMDGPU.lrp(float %30, float %39, float %46)
  %49 = call float @llvm.AMDGPU.lrp(float %30, float %40, float %46)
  %50 = fcmp ult float %32, %22
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %. = select i1 %56, float 0.000000e+00, float 1.000000e+00
  %57 = fmul float %47, %.
  %58 = fmul float %48, %.
  %59 = fmul float %49, %.
  %60 = fmul float %57, %27
  %61 = fmul float %58, %28
  %62 = fmul float %59, %29
  %63 = fmul float %41, %30
  %64 = call i32 @llvm.SI.packf16(float %60, float %61)
  %65 = bitcast i32 %64 to float
  %66 = call i32 @llvm.SI.packf16(float %62, float %63)
  %67 = bitcast i32 %66 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %65, float %67, float %65, float %67)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8200500
c8210501
c81c0400
c81d0401
c0840300
c0c60500
bf8c007f
f0800f00
00430207
bf8c0770
100c04ff
3e59b3d0
7e1202ff
3f371759
d2820006
041a1303
7e1202ff
3d93dd98
d2820009
041a1304
c8180300
c8190301
08140cf2
1012130a
d282000a
04260706
c0800100
bf8c007f
c2000104
bf8c007f
d0020000
02000108
d2000007
0001e480
d2060007
22010107
7e0e1107
d10a0000
02010107
d2000007
000100f2
10100f0a
c8280100
c8290101
10101508
d282000a
04260506
10140f0a
c82c0000
c82d0001
1014170a
5e10110a
d2820009
04260906
100e0f09
c8240200
c8250201
10001307
10020d05
5e000300
f8001c0f
00080008
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..4]
DCL TEMP[0..1], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV TEMP[1].xy, IN[1].xyxx
  5: MOV OUT[2], TEMP[1]
  6: MOV OUT[1], CONST[4]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %5)
  %34 = extractelement <4 x float> %33, i32 0
  %35 = extractelement <4 x float> %33, i32 1
  %36 = extractelement <4 x float> %33, i32 2
  %37 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0
  %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %5)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = fmul float %11, %34
  %43 = fmul float %12, %34
  %44 = fmul float %13, %34
  %45 = fmul float %14, %34
  %46 = fmul float %15, %35
  %47 = fadd float %46, %42
  %48 = fmul float %16, %35
  %49 = fadd float %48, %43
  %50 = fmul float %17, %35
  %51 = fadd float %50, %44
  %52 = fmul float %18, %35
  %53 = fadd float %52, %45
  %54 = fmul float %19, %36
  %55 = fadd float %54, %47
  %56 = fmul float %20, %36
  %57 = fadd float %56, %49
  %58 = fmul float %21, %36
  %59 = fadd float %58, %51
  %60 = fmul float %22, %36
  %61 = fadd float %60, %53
  %62 = fadd float %55, %23
  %63 = fadd float %57, %24
  %64 = fadd float %59, %25
  %65 = fadd float %61, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %62, float %63, float %64, float %65)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840704
bf8c000f
e00c2000
80020100
7e0a0280
bf8c0770
f800021f
05050201
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0..1], LOCAL
  0: MOV TEMP[0].xyz, IN[0].xyzx
  1: MOV TEMP[1].xy, IN[1].xyyy
  2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D
  3: MUL TEMP[1].x, IN[0].wwww, TEMP[1].wwww
  4: MOV TEMP[0].w, TEMP[1].xxxx
  5: MOV OUT[0], TEMP[0]
  6: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 3
  %36 = fmul float %27, %35
  %37 = call i32 @llvm.SI.packf16(float %24, float %25)
  %38 = bitcast i32 %37 to float
  %39 = call i32 @llvm.SI.packf16(float %26, float %36)
  %40 = bitcast i32 %39 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080100
c8090101
c80c0000
c80d0001
5e040503
c8100500
c8110501
c80c0400
c80d0401
c0800300
c0c40500
bf8c007f
f0800800
00020303
c8100300
c8110301
bf8c0770
10060704
c8100200
c8110201
5e000704
f8001c0f
00020002
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..6]
DCL TEMP[0..1], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MAD TEMP[1], IN[1].xyxy, CONST[5], CONST[6]
  5: MOV OUT[2], TEMP[1]
  6: MOV OUT[1], CONST[4]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %39 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0
  %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %5)
  %42 = extractelement <4 x float> %41, i32 0
  %43 = extractelement <4 x float> %41, i32 1
  %44 = extractelement <4 x float> %41, i32 2
  %45 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %5)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = fmul float %11, %42
  %51 = fmul float %12, %42
  %52 = fmul float %13, %42
  %53 = fmul float %14, %42
  %54 = fmul float %15, %43
  %55 = fadd float %54, %50
  %56 = fmul float %16, %43
  %57 = fadd float %56, %51
  %58 = fmul float %17, %43
  %59 = fadd float %58, %52
  %60 = fmul float %18, %43
  %61 = fadd float %60, %53
  %62 = fmul float %19, %44
  %63 = fadd float %62, %55
  %64 = fmul float %20, %44
  %65 = fadd float %64, %57
  %66 = fmul float %21, %44
  %67 = fadd float %66, %59
  %68 = fmul float %22, %44
  %69 = fadd float %68, %61
  %70 = fadd float %63, %23
  %71 = fadd float %65, %24
  %72 = fadd float %67, %25
  %73 = fadd float %69, %26
  %74 = fmul float %48, %31
  %75 = fadd float %74, %35
  %76 = fmul float %49, %32
  %77 = fadd float %76, %36
  %78 = fmul float %48, %33
  %79 = fadd float %78, %37
  %80 = fmul float %49, %34
  %81 = fadd float %80, %38
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %77, float %79, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %71, float %72, float %73)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840704
bf8c000f
e00c2000
80020100
c2020117
c202811b
bf8c0070
7e0a0205
d2820005
04140902
c2020116
c202811a
bf8c007f
7e0c0205
d2820006
04180901
c2020115
c2028119
bf8c007f
7e0e0205
d2820007
041c0902
c2020114
c2028118
bf8c007f
7e100205
d2820001
04200901
f800021f
05060701
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xyz, IMM[0].xxxx
  1: MOV TEMP[1].xy, IN[0].xyyy
  2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D
  3: MOV TEMP[0].w, TEMP[1].wwww
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 3
  %32 = fcmp ugt float %31, 0x3FD99999A0000000
  %33 = sext i1 %32 to i32
  %34 = trunc i32 %33 to i1
  %35 = select i1 %34, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %35)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float %31)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kill(float)

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800800
00010002
7e0202ff
3ecccccd
bf8c0770
d0080000
02020300
d2000001
0001e4f3
7c260280
7e0202f2
f800180f
00010101
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..6]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[6].xyzz, CONST[5].xyzz
  1: MUL TEMP[1], CONST[0], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[3]
  5: MAD TEMP[1].xy, IN[1].xyyy, CONST[4].xyyy, CONST[4].zwww
  6: MOV OUT[1], TEMP[1]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %37 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0
  %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %5)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = extractelement <4 x float> %39, i32 2
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = fmul float %40, %34
  %49 = fadd float %48, %31
  %50 = fmul float %41, %35
  %51 = fadd float %50, %32
  %52 = fmul float %42, %36
  %53 = fadd float %52, %33
  %54 = fmul float %11, %49
  %55 = fmul float %12, %49
  %56 = fmul float %13, %49
  %57 = fmul float %14, %49
  %58 = fmul float %15, %51
  %59 = fadd float %58, %54
  %60 = fmul float %16, %51
  %61 = fadd float %60, %55
  %62 = fmul float %17, %51
  %63 = fadd float %62, %56
  %64 = fmul float %18, %51
  %65 = fadd float %64, %57
  %66 = fmul float %19, %53
  %67 = fadd float %66, %59
  %68 = fmul float %20, %53
  %69 = fadd float %68, %61
  %70 = fmul float %21, %53
  %71 = fadd float %70, %63
  %72 = fmul float %22, %53
  %73 = fadd float %72, %65
  %74 = fadd float %67, %23
  %75 = fadd float %69, %24
  %76 = fadd float %71, %25
  %77 = fadd float %73, %26
  %78 = fmul float %46, %27
  %79 = fadd float %78, %29
  %80 = fmul float %47, %28
  %81 = fadd float %80, %30
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %81, float %63, float %65)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %75, float %76, float %77)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840700
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020119
c2028115
bf8c007f
7e020205
d2820001
04040904
c2020118
c2028114
bf8c007f
7e040205
d2820002
04080903
c2020103
bf8c007f
100e0404
c2020107
bf8c007f
d2820007
041e0204
c2020102
bf8c007f
10100404
c2020106
bf8c007f
d2820008
04220204
c0820704
bf8c007f
e00c2000
80010900
c2020111
c2028113
bf8c0070
7e000205
d2820000
0400090a
c2020110
c2028112
bf8c007f
7e1a0205
d2820009
04340909
f800020f
07080009
c202011a
c2028116
bf8c000f
7e000205
d2820000
04000905
c202010b
bf8c007f
d2820003
041e0004
c202010f
bf8c007f
06060604
c202010a
bf8c007f
d2820004
04220004
c202010e
bf8c007f
06080804
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160004
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820000
04060004
c200010c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV OUT[0], IMM[0].xxxx
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
SI CODE:
7e0002f2
f800180f
00000000
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..5]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[5].xyzz, CONST[4].xyzz
  1: MUL TEMP[1], CONST[0], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[3]
  5: MOV OUT[0], TEMP[0]
  6: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %34 = load <16 x i8> addrspace(2)* %33, !tbaa !0
  %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %5)
  %36 = extractelement <4 x float> %35, i32 0
  %37 = extractelement <4 x float> %35, i32 1
  %38 = extractelement <4 x float> %35, i32 2
  %39 = fmul float %36, %30
  %40 = fadd float %39, %27
  %41 = fmul float %37, %31
  %42 = fadd float %41, %28
  %43 = fmul float %38, %32
  %44 = fadd float %43, %29
  %45 = fmul float %11, %40
  %46 = fmul float %12, %40
  %47 = fmul float %13, %40
  %48 = fmul float %14, %40
  %49 = fmul float %15, %42
  %50 = fadd float %49, %45
  %51 = fmul float %16, %42
  %52 = fadd float %51, %46
  %53 = fmul float %17, %42
  %54 = fadd float %53, %47
  %55 = fmul float %18, %42
  %56 = fadd float %55, %48
  %57 = fmul float %19, %44
  %58 = fadd float %57, %50
  %59 = fmul float %20, %44
  %60 = fadd float %59, %52
  %61 = fmul float %21, %44
  %62 = fadd float %61, %54
  %63 = fmul float %22, %44
  %64 = fadd float %63, %56
  %65 = fadd float %58, %23
  %66 = fadd float %60, %24
  %67 = fadd float %62, %25
  %68 = fadd float %64, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %66, float %67, float %68)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0820700
bf8c007f
e00c2000
80010200
c0800100
bf8c0070
c2020115
c2028111
bf8c007f
7e000205
d2820000
04000903
c2020114
c2028110
bf8c007f
7e020205
d2820001
04040902
c2020103
bf8c007f
100c0204
c2020107
bf8c007f
d2820006
041a0004
c2020116
c2028112
bf8c007f
7e0e0205
d2820002
041c0904
c202010b
bf8c007f
d2820003
041a0404
c202010f
bf8c007f
06060604
c2020102
bf8c007f
10080204
c2020106
bf8c007f
d2820004
04120004
c202010a
bf8c007f
d2820004
04120404
c202010e
bf8c007f
06080804
c2020101
bf8c007f
100a0204
c2020105
bf8c007f
d2820005
04160004
c2020109
bf8c007f
d2820005
04160404
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10020204
c2020104
bf8c007f
d2820000
04060004
c2020108
bf8c007f
d2820000
04020404
c200010c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..10]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[8].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MOV OUT[1], TEMP[1]
 13: MOV OUT[0], TEMP[0]
 14: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = fmul float %46, %40
  %56 = fadd float %55, %37
  %57 = fmul float %47, %41
  %58 = fadd float %57, %38
  %59 = fmul float %48, %42
  %60 = fadd float %59, %39
  %61 = fmul float %20, %56
  %62 = fmul float %21, %56
  %63 = fmul float %22, %56
  %64 = fmul float %23, %56
  %65 = fmul float %24, %58
  %66 = fadd float %65, %61
  %67 = fmul float %25, %58
  %68 = fadd float %67, %62
  %69 = fmul float %26, %58
  %70 = fadd float %69, %63
  %71 = fmul float %27, %58
  %72 = fadd float %71, %64
  %73 = fmul float %28, %60
  %74 = fadd float %73, %66
  %75 = fmul float %29, %60
  %76 = fadd float %75, %68
  %77 = fmul float %30, %60
  %78 = fadd float %77, %70
  %79 = fmul float %31, %60
  %80 = fadd float %79, %72
  %81 = fadd float %74, %32
  %82 = fadd float %76, %33
  %83 = fadd float %78, %34
  %84 = fadd float %80, %35
  %85 = fmul float %52, %36
  %86 = fmul float %53, %36
  %87 = fmul float %54, %36
  %88 = fmul float %11, %85
  %89 = fmul float %12, %85
  %90 = fmul float %13, %85
  %91 = fmul float %14, %86
  %92 = fadd float %91, %88
  %93 = fmul float %15, %86
  %94 = fadd float %93, %89
  %95 = fmul float %16, %86
  %96 = fadd float %95, %90
  %97 = fmul float %17, %87
  %98 = fadd float %97, %92
  %99 = fmul float %18, %87
  %100 = fadd float %99, %94
  %101 = fmul float %19, %87
  %102 = fadd float %101, %96
  %103 = fmul float %98, %98
  %104 = fmul float %100, %100
  %105 = fadd float %104, %103
  %106 = fmul float %102, %102
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %98, %108
  %110 = fmul float %100, %108
  %111 = fmul float %102, %108
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %111, float %72)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020123
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820001
04060804
d2820002
041a0301
7e045b02
100c0501
100e0503
10100505
c0820700
bf8c007f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020113
bf8c007f
10120204
c2020117
bf8c007f
d2820009
04260004
f800020f
09060708
c202012a
c2028126
bf8c000f
7e0c0205
d2820002
04180904
c202011b
bf8c007f
d2820003
04260404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..3], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
  9: RSQ TEMP[3].x, TEMP[3].xxxx
 10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 13: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 14: F2I TEMP[1].x, -TEMP[1]
 15: UIF TEMP[1].xxxx :1
 16:   MOV TEMP[1].x, IMM[0].yyyy
 17: ELSE :1
 18:   MOV TEMP[1].x, IMM[0].wwww
 19: ENDIF
 20: MOV TEMP[2].z, TEMP[1].xxxx
 21: MUL TEMP[1].x, CONST[2].xxxx, IMM[1].xxxx
 22: MOV TEMP[3].xy, IN[1].xyyy
 23: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
 24: MAD TEMP[3].x, TEMP[3].wwww, CONST[1].yyyy, CONST[1].zzzz
 25: SLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
 26: F2I TEMP[3].x, -TEMP[3]
 27: UIF TEMP[3].xxxx :1
 28:   KILL
 29: ENDIF
 30: MOV TEMP[1].x, TEMP[1].xxxx
 31: MOV TEMP[1].yzw, TEMP[2].yxyz
 32: MOV OUT[0], TEMP[1]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %26 = load <32 x i8> addrspace(2)* %25, !tbaa !0
  %27 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = fcmp ugt float %16, 0.000000e+00
  %30 = select i1 %29, float 1.000000e+00, float 0.000000e+00
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.AMDIL.clamp.(float %30, float 0.000000e+00, float 1.000000e+00)
  %37 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %38 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %39 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %40 = bitcast float %36 to i32
  %41 = icmp ne i32 %40, 0
  %. = select i1 %41, float -1.000000e+00, float 1.000000e+00
  %42 = fmul float %33, %33
  %43 = fmul float %34, %34
  %44 = fadd float %43, %42
  %45 = fmul float %35, %35
  %46 = fadd float %44, %45
  %47 = call float @llvm.AMDGPU.rsq(float %46)
  %48 = fmul float %33, %47
  %49 = fmul float %34, %47
  %50 = fmul float %35, %47
  %51 = fmul float %48, %.
  %52 = fmul float %49, %.
  %53 = fmul float %50, %.
  %54 = fmul float %51, 5.000000e-01
  %55 = fadd float %54, 5.000000e-01
  %56 = fmul float %52, 5.000000e-01
  %57 = fadd float %56, 5.000000e-01
  %58 = fcmp uge float %53, 0.000000e+00
  %59 = select i1 %58, float 1.000000e+00, float 0.000000e+00
  %60 = fsub float -0.000000e+00, %59
  %61 = fptosi float %60 to i32
  %62 = bitcast i32 %61 to float
  %63 = bitcast float %62 to i32
  %64 = icmp ne i32 %63, 0
  %temp4.0 = select i1 %64, float 1.000000e+00, float 0.000000e+00
  %65 = fmul float %24, 9.765625e-04
  %66 = bitcast float %31 to i32
  %67 = bitcast float %32 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %26, <16 x i8> %28, i32 2)
  %71 = extractelement <4 x float> %70, i32 3
  %72 = fmul float %71, %22
  %73 = fadd float %72, %23
  %74 = fcmp ult float %73, 0.000000e+00
  %75 = select i1 %74, float 1.000000e+00, float 0.000000e+00
  %76 = fsub float -0.000000e+00, %75
  %77 = fptosi float %76 to i32
  %78 = bitcast i32 %77 to float
  %79 = bitcast float %78 to i32
  %80 = icmp ne i32 %79, 0
  br i1 %80, label %IF20, label %ENDIF19

IF20:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF19

ENDIF19:                                          ; preds = %main_body, %IF20
  %81 = call i32 @llvm.SI.packf16(float %65, float %55)
  %82 = bitcast i32 %81 to float
  %83 = call i32 @llvm.SI.packf16(float %57, float %temp4.0)
  %84 = bitcast i32 %83 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %82, float %84, float %82, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100300
c8110301
c80c0200
c80d0201
100a0703
d2820006
04160904
c8140400
c8150401
d2820006
041a0b05
7e0c5b06
100e0d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000004
0021e6f2
10040907
d2820002
03c1e102
10060d03
10060903
d2820003
03c1e103
100a0d05
10080905
d00c0008
02010104
d2000004
0021e480
d2060004
22010104
7e081104
d10a0008
02010104
d2000004
0021e480
c8180100
c8190101
c8140000
c8150001
c0840300
c0c60500
bf8c007f
f0800800
00430005
c0800100
bf8c0070
c2020105
c2028106
bf8c007f
7e020205
d2820000
04040900
d0020004
02010100
d2000000
0011e480
d2060000
22010100
7e001100
d10a0004
02010100
c2000108
7e0002ff
3a800000
bf8c007f
10000000
be802404
8980007e
7e0202f3
7c260280
88fe007e
5e020902
5e000700
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..11]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MAD TEMP[2].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 13: MOV TEMP[2].zw, TEMP[1].yyxy
 14: MOV TEMP[1].x, TEMP[1].zzzz
 15: MOV OUT[1], TEMP[2]
 16: MOV OUT[2], TEMP[1]
 17: MOV OUT[0], TEMP[0]
 18: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = fmul float %50, %44
  %65 = fadd float %64, %41
  %66 = fmul float %51, %45
  %67 = fadd float %66, %42
  %68 = fmul float %52, %46
  %69 = fadd float %68, %43
  %70 = fmul float %20, %65
  %71 = fmul float %21, %65
  %72 = fmul float %22, %65
  %73 = fmul float %23, %65
  %74 = fmul float %24, %67
  %75 = fadd float %74, %70
  %76 = fmul float %25, %67
  %77 = fadd float %76, %71
  %78 = fmul float %26, %67
  %79 = fadd float %78, %72
  %80 = fmul float %27, %67
  %81 = fadd float %80, %73
  %82 = fmul float %28, %69
  %83 = fadd float %82, %75
  %84 = fmul float %29, %69
  %85 = fadd float %84, %77
  %86 = fmul float %30, %69
  %87 = fadd float %86, %79
  %88 = fmul float %31, %69
  %89 = fadd float %88, %81
  %90 = fadd float %83, %32
  %91 = fadd float %85, %33
  %92 = fadd float %87, %34
  %93 = fadd float %89, %35
  %94 = fmul float %56, %40
  %95 = fmul float %57, %40
  %96 = fmul float %58, %40
  %97 = fmul float %11, %94
  %98 = fmul float %12, %94
  %99 = fmul float %13, %94
  %100 = fmul float %14, %95
  %101 = fadd float %100, %97
  %102 = fmul float %15, %95
  %103 = fadd float %102, %98
  %104 = fmul float %16, %95
  %105 = fadd float %104, %99
  %106 = fmul float %17, %96
  %107 = fadd float %106, %101
  %108 = fmul float %18, %96
  %109 = fadd float %108, %103
  %110 = fmul float %19, %96
  %111 = fadd float %110, %105
  %112 = fmul float %107, %107
  %113 = fmul float %109, %109
  %114 = fadd float %113, %112
  %115 = fmul float %111, %111
  %116 = fadd float %114, %115
  %117 = call float @llvm.AMDGPU.rsq(float %116)
  %118 = fmul float %107, %117
  %119 = fmul float %109, %117
  %120 = fmul float %111, %117
  %121 = fmul float %62, %36
  %122 = fadd float %121, %38
  %123 = fmul float %63, %37
  %124 = fadd float %123, %39
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %122, float %124, float %118, float %119)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %119, float %120, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %90, float %91, float %92, float %93)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820002
04060804
d2820001
041a0502
7e085b01
10020903
10060905
c0840708
bf8c007f
e00c2000
80020500
c2020121
c2028123
bf8c0070
7e120205
d2820009
04240906
c2020120
c2028122
bf8c007f
7e140205
d2820005
04280905
f800020f
01030905
100e0902
c0820700
bf8c000f
e00c2000
80010300
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020113
bf8c007f
10100404
c2020117
bf8c007f
d2820008
04220004
f800021f
08070107
c202012e
c202812a
bf8c000f
7e020205
d2820001
04040905
c202011b
bf8c007f
d2820003
04220204
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080404
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120204
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160204
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820000
040a0004
c2020118
bf8c007f
d2820000
04020204
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[2..8]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[3].xxxx, IMM[1].yyyy
 40: MOV TEMP[3].xy, IN[1].xyyy
 41: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
 42: MAD TEMP[3].x, TEMP[3].wwww, CONST[2].yyyy, CONST[2].zzzz
 43: SLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
 44: F2I TEMP[3].x, -TEMP[3]
 45: UIF TEMP[3].xxxx :1
 46:   KILL
 47: ENDIF
 48: MOV TEMP[1].x, TEMP[1].xxxx
 49: MOV TEMP[1].yzw, TEMP[2].yxyz
 50: MOV OUT[0], TEMP[1]
 51: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %26 = load <32 x i8> addrspace(2)* %25, !tbaa !0
  %27 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %30 = load <32 x i8> addrspace(2)* %29, !tbaa !0
  %31 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = fcmp ugt float %16, 0.000000e+00
  %34 = select i1 %33, float 1.000000e+00, float 0.000000e+00
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.AMDIL.clamp.(float %34, float 0.000000e+00, float 1.000000e+00)
  %47 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %48 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %49 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %50 = bitcast float %46 to i32
  %51 = icmp ne i32 %50, 0
  %. = select i1 %51, float -1.000000e+00, float 1.000000e+00
  %52 = bitcast float %35 to i32
  %53 = bitcast float %36 to i32
  %54 = insertelement <2 x i32> undef, i32 %52, i32 0
  %55 = insertelement <2 x i32> %54, i32 %53, i32 1
  %56 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %55, <32 x i8> %30, <16 x i8> %32, i32 2)
  %57 = extractelement <4 x float> %56, i32 1
  %58 = extractelement <4 x float> %56, i32 3
  %59 = fmul float 2.000000e+00, %57
  %60 = fadd float %59, -1.000000e+00
  %61 = fmul float 2.000000e+00, %58
  %62 = fadd float %61, -1.000000e+00
  %63 = fmul float %37, %37
  %64 = fmul float %38, %38
  %65 = fadd float %64, %63
  %66 = fmul float %39, %39
  %67 = fadd float %65, %66
  %68 = call float @llvm.AMDGPU.rsq(float %67)
  %69 = fmul float %37, %68
  %70 = fmul float %38, %68
  %71 = fmul float %39, %68
  %72 = fmul float %60, %60
  %73 = fmul float %62, %62
  %74 = fadd float %72, %73
  %75 = fsub float -0.000000e+00, %74
  %76 = fadd float 1.000000e+00, %75
  %77 = fcmp uge float 0.000000e+00, %76
  %78 = select i1 %77, float 0.000000e+00, float %76
  %79 = call float @llvm.AMDGPU.rsq(float %78)
  %80 = fmul float %79, %78
  %81 = fsub float -0.000000e+00, %78
  %82 = call float @llvm.AMDGPU.cndlt(float %81, float %80, float 0.000000e+00)
  %83 = fmul float %69, %82
  %84 = fmul float %70, %82
  %85 = fmul float %71, %82
  %86 = fmul float %40, %40
  %87 = fmul float %41, %41
  %88 = fadd float %87, %86
  %89 = fmul float %42, %42
  %90 = fadd float %88, %89
  %91 = call float @llvm.AMDGPU.rsq(float %90)
  %92 = fmul float %40, %91
  %93 = fmul float %41, %91
  %94 = fmul float %42, %91
  %95 = fmul float %43, %43
  %96 = fmul float %44, %44
  %97 = fadd float %96, %95
  %98 = fmul float %45, %45
  %99 = fadd float %97, %98
  %100 = call float @llvm.AMDGPU.rsq(float %99)
  %101 = fmul float %43, %100
  %102 = fmul float %44, %100
  %103 = fmul float %45, %100
  %104 = fmul float %101, %62
  %105 = fmul float %102, %62
  %106 = fmul float %103, %62
  %107 = fmul float %92, %60
  %108 = fadd float %107, %104
  %109 = fmul float %93, %60
  %110 = fadd float %109, %105
  %111 = fmul float %94, %60
  %112 = fadd float %111, %106
  %113 = fmul float %83, %.
  %114 = fadd float %113, %108
  %115 = fmul float %84, %.
  %116 = fadd float %115, %110
  %117 = fmul float %85, %.
  %118 = fadd float %117, %112
  %119 = fmul float %114, 5.000000e-01
  %120 = fadd float %119, 5.000000e-01
  %121 = fmul float %116, 5.000000e-01
  %122 = fadd float %121, 5.000000e-01
  %123 = fcmp uge float %118, 0.000000e+00
  %124 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %125 = fsub float -0.000000e+00, %124
  %126 = fptosi float %125 to i32
  %127 = bitcast i32 %126 to float
  %128 = bitcast float %127 to i32
  %129 = icmp ne i32 %128, 0
  %temp4.0 = select i1 %129, float 1.000000e+00, float 0.000000e+00
  %130 = fmul float %24, 9.765625e-04
  %131 = bitcast float %35 to i32
  %132 = bitcast float %36 to i32
  %133 = insertelement <2 x i32> undef, i32 %131, i32 0
  %134 = insertelement <2 x i32> %133, i32 %132, i32 1
  %135 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %134, <32 x i8> %26, <16 x i8> %28, i32 2)
  %136 = extractelement <4 x float> %135, i32 3
  %137 = fmul float %136, %22
  %138 = fadd float %137, %23
  %139 = fcmp ult float %138, 0.000000e+00
  %140 = select i1 %139, float 1.000000e+00, float 0.000000e+00
  %141 = fsub float -0.000000e+00, %140
  %142 = fptosi float %141 to i32
  %143 = bitcast i32 %142 to float
  %144 = bitcast float %143 to i32
  %145 = icmp ne i32 %144, 0
  br i1 %145, label %IF28, label %ENDIF27

IF28:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF27

ENDIF27:                                          ; preds = %main_body, %IF28
  %146 = call i32 @llvm.SI.packf16(float %130, float %120)
  %147 = bitcast i32 %146 to float
  %148 = call i32 @llvm.SI.packf16(float %122, float %temp4.0)
  %149 = bitcast i32 %148 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %147, float %149, float %147, float %149)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840304
c0c60508
bf8c007f
f0800a00
00430603
bf8c0770
060a0d06
060a0af3
060c0f07
060c0cf3
100e0d06
d2820007
041e0b05
080e0ef2
d0060008
02010107
d2000007
00210107
7e105b07
10100f08
d2060007
22010107
d0080008
02020e80
d2000007
00221080
c82c0300
c82d0301
c8240200
c8250201
10101309
d282000a
0422170b
c8200400
c8210401
d282000a
042a1108
7e145b0a
1016150b
10160f0b
c83c0900
c83d0901
c8340800
c8350801
10181b0d
d282000e
04321f0f
c8300a00
c8310a01
d282000e
043a190c
7e1c5b0e
101e1d0f
10240d0f
c84c0600
c84d0601
c8400500
c8410501
101e2110
d2820011
043e2713
c83c0700
c83d0701
d2820000
04461f0f
7e225b00
10002313
d2820000
044a0b00
d0080006
02010102
d2000001
0019e480
d2060801
02010101
d10a0006
02010101
d2000002
0019e6f2
d2820000
0402050b
d2820000
03c1e100
10021509
10020f01
10121d0d
10120d09
10162310
d2820009
04260b0b
d2820001
04260501
d2820001
03c1e101
10101508
100e0f08
10101d0c
100c0d08
1010230f
d2820005
041a0b08
d2820002
04160507
d00c0006
02010102
d2000002
0019e480
d2060002
22010102
7e041102
d10a0006
02010102
d2000002
0019e480
c0840300
c0c60500
bf8c007f
f0800800
00430303
c0800100
bf8c0070
c2020109
c202810a
bf8c007f
7e080205
d2820003
04100903
d0020004
02010103
d2000003
0011e480
d2060003
22010103
7e061103
d10a0004
02010103
c200010c
7e0602ff
3a800000
bf8c007f
10060600
be802404
8980007e
7e0802f3
7c260880
88fe007e
5e000500
5e020303
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[8..9]
DCL CONST[2..7]
DCL TEMP[0]
DCL TEMP[1..7], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     1.0000,     0.0000}
IMM[1] FLT32 { 1024.0000,     0.0100,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy
  2: MAD TEMP[1], TEMP[0].xyxy, CONST[2], CONST[3]
  3: MOV TEMP[2].xy, TEMP[1].xyyy
  4: TEX TEMP[2], TEMP[2], SAMP[1], 2D
  5: MAD TEMP[3].xyz, TEMP[2].yzww, IMM[0].xxxx, IMM[0].yyyy
  6: MOV TEMP[4].xy, TEMP[3].xyxx
  7: DP2 TEMP[5].x, TEMP[3].xyyy, TEMP[3].xyyy
  8: ADD_SAT TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
  9: RSQ TEMP[6].x, TEMP[5].xxxx
 10: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 11: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 12: MUL TEMP[3].x, TEMP[6].xxxx, TEMP[3].zzzz
 13: MOV TEMP[4].z, TEMP[3].xxxx
 14: MOV TEMP[3].z, IMM[0].zzzz
 15: MOV TEMP[3].xy, TEMP[1].zwzz
 16: MOV TEMP[1].xy, TEMP[1].xyyy
 17: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D
 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[4].zzzz, CONST[4].wwww
 19: RCP TEMP[1].x, TEMP[1].xxxx
 20: MUL TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xxxx
 21: ADD TEMP[3].xyz, CONST[7].xyzz, -TEMP[1].xyzz
 22: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz
 23: RSQ TEMP[5].x, TEMP[5].xxxx
 24: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 25: DP3_SAT TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz
 26: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz
 27: RSQ TEMP[7].x, TEMP[7].xxxx
 28: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[7].xxxx
 29: ADD TEMP[1].xyz, TEMP[5].xyzz, -TEMP[1].xyzz
 30: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz
 31: MAD_SAT TEMP[3].x, CONST[8].xxxx, TEMP[3].xxxx, CONST[8].yyyy
 32: MUL TEMP[5].xyz, CONST[5].xyzz, TEMP[3].xxxx
 33: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
 34: MUL TEMP[3].x, CONST[6].wwww, TEMP[3].xxxx
 35: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz
 36: RSQ TEMP[7].x, TEMP[7].xxxx
 37: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[7].xxxx
 38: DP3_SAT TEMP[1].x, TEMP[1].xyzz, TEMP[4].xyzz
 39: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
 40: POW TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 41: SGE TEMP[2].x, TEMP[6].xxxx, IMM[1].yyyy
 42: F2I TEMP[2].x, -TEMP[2]
 43: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz
 44: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 45: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx
 46: MOV TEMP[5].w, TEMP[1].xxxx
 47: MOV OUT[0], TEMP[5]
 48: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %43 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %44 = load <32 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %48 = load <32 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = fmul float %13, %41
  %52 = fadd float %51, %42
  %53 = fmul float %12, %22
  %54 = fadd float %53, %26
  %55 = fmul float %52, %23
  %56 = fadd float %55, %27
  %57 = fmul float %12, %24
  %58 = fadd float %57, %28
  %59 = fmul float %52, %25
  %60 = fadd float %59, %29
  %61 = bitcast float %54 to i32
  %62 = bitcast float %56 to i32
  %63 = insertelement <2 x i32> undef, i32 %61, i32 0
  %64 = insertelement <2 x i32> %63, i32 %62, i32 1
  %65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %64, <32 x i8> %48, <16 x i8> %50, i32 2)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = extractelement <4 x float> %65, i32 3
  %70 = fmul float %67, 2.000000e+00
  %71 = fadd float %70, -1.000000e+00
  %72 = fmul float %68, 2.000000e+00
  %73 = fadd float %72, -1.000000e+00
  %74 = fmul float %69, 2.000000e+00
  %75 = fadd float %74, -1.000000e+00
  %76 = fmul float %71, %71
  %77 = fmul float %73, %73
  %78 = fadd float %76, %77
  %79 = fsub float -0.000000e+00, %78
  %80 = fadd float 1.000000e+00, %79
  %81 = call float @llvm.AMDIL.clamp.(float %80, float 0.000000e+00, float 1.000000e+00)
  %82 = call float @llvm.AMDGPU.rsq(float %81)
  %83 = fmul float %82, %81
  %84 = fsub float -0.000000e+00, %81
  %85 = call float @llvm.AMDGPU.cndlt(float %84, float %83, float 0.000000e+00)
  %86 = fmul float %85, %75
  %87 = bitcast float %54 to i32
  %88 = bitcast float %56 to i32
  %89 = insertelement <2 x i32> undef, i32 %87, i32 0
  %90 = insertelement <2 x i32> %89, i32 %88, i32 1
  %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %44, <16 x i8> %46, i32 2)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = fmul float %92, %30
  %94 = fadd float %93, %31
  %95 = fdiv float 1.000000e+00, %94
  %96 = fmul float %58, %95
  %97 = fmul float %60, %95
  %98 = fmul float 1.000000e+00, %95
  %99 = fsub float -0.000000e+00, %96
  %100 = fadd float %36, %99
  %101 = fsub float -0.000000e+00, %97
  %102 = fadd float %37, %101
  %103 = fsub float -0.000000e+00, %98
  %104 = fadd float %38, %103
  %105 = fmul float %100, %100
  %106 = fmul float %102, %102
  %107 = fadd float %106, %105
  %108 = fmul float %104, %104
  %109 = fadd float %107, %108
  %110 = call float @llvm.AMDGPU.rsq(float %109)
  %111 = fmul float %100, %110
  %112 = fmul float %102, %110
  %113 = fmul float %104, %110
  %114 = fmul float %71, %111
  %115 = fmul float %73, %112
  %116 = fadd float %115, %114
  %117 = fmul float %86, %113
  %118 = fadd float %116, %117
  %119 = call float @llvm.AMDIL.clamp.(float %118, float 0.000000e+00, float 1.000000e+00)
  %120 = fmul float %96, %96
  %121 = fmul float %97, %97
  %122 = fadd float %121, %120
  %123 = fmul float %98, %98
  %124 = fadd float %122, %123
  %125 = call float @llvm.AMDGPU.rsq(float %124)
  %126 = fmul float %96, %125
  %127 = fmul float %97, %125
  %128 = fmul float %98, %125
  %129 = fsub float -0.000000e+00, %126
  %130 = fadd float %111, %129
  %131 = fsub float -0.000000e+00, %127
  %132 = fadd float %112, %131
  %133 = fsub float -0.000000e+00, %128
  %134 = fadd float %113, %133
  %135 = fmul float %100, %100
  %136 = fmul float %102, %102
  %137 = fadd float %136, %135
  %138 = fmul float %104, %104
  %139 = fadd float %137, %138
  %140 = fmul float %39, %139
  %141 = fadd float %140, %40
  %142 = call float @llvm.AMDIL.clamp.(float %141, float 0.000000e+00, float 1.000000e+00)
  %143 = fmul float %32, %142
  %144 = fmul float %33, %142
  %145 = fmul float %34, %142
  %146 = fmul float %143, %119
  %147 = fmul float %144, %119
  %148 = fmul float %145, %119
  %149 = fmul float %35, %142
  %150 = fmul float %130, %130
  %151 = fmul float %132, %132
  %152 = fadd float %151, %150
  %153 = fmul float %134, %134
  %154 = fadd float %152, %153
  %155 = call float @llvm.AMDGPU.rsq(float %154)
  %156 = fmul float %130, %155
  %157 = fmul float %132, %155
  %158 = fmul float %134, %155
  %159 = fmul float %156, %71
  %160 = fmul float %157, %73
  %161 = fadd float %160, %159
  %162 = fmul float %158, %86
  %163 = fadd float %161, %162
  %164 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00)
  %165 = fmul float %66, 1.024000e+03
  %166 = call float @llvm.pow.f32(float %164, float %165)
  %167 = fcmp uge float %119, 0x3F847AE140000000
  %168 = select i1 %167, float 1.000000e+00, float 0.000000e+00
  %169 = fsub float -0.000000e+00, %168
  %170 = fptosi float %169 to i32
  %171 = bitcast i32 %170 to float
  %172 = bitcast float %171 to i32
  %173 = and i32 %172, 1065353216
  %174 = bitcast i32 %173 to float
  %175 = fmul float %166, %174
  %176 = fmul float %149, %175
  %177 = call i32 @llvm.SI.packf16(float %146, float %147)
  %178 = bitcast i32 %177 to float
  %179 = call i32 @llvm.SI.packf16(float %148, float %176)
  %180 = bitcast i32 %179 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %178, float %180, float %178, float %180)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840100
bf8c007f
c2000924
c2008925
bf8c007f
7e000201
d2820001
04000103
c2000909
c200890d
bf8c007f
7e000201
d2820004
04000101
c2000908
c200890c
bf8c007f
7e000201
d2820003
04000102
c0860300
c0c80500
bf8c007f
f0800100
00640003
c2000912
c2008913
bf8c0070
7e0a0201
d2820000
04140100
7e005500
c200090b
c200890f
bf8c007f
7e0a0201
d2820001
04140101
10020101
c200091d
bf8c007f
080a0200
c200090a
c200890e
bf8c007f
7e0c0201
d2820002
04180102
100c0102
c200091c
bf8c007f
08040c00
100e0502
d2820008
041e0b05
c200091e
bf8c007f
080e0000
d282000c
04220f07
7e1a5b0c
10161b05
10121b02
c0800304
c0c60508
bf8c007f
f0800f00
00030203
bf8c0770
06100703
061010f3
101c1308
06140904
061414f3
d2820010
043a170a
101c150a
d282000e
043a1108
081c1cf2
d206080e
0201010e
7e1e5b0e
101e1d0f
d206000e
2201010e
d0080000
02021c80
d200000e
00021e80
061e0b05
061e1ef3
101c1f0e
101e1b07
d2820007
04421f0e
d2060807
02010107
c2000920
c2008921
bf8c007f
7e1a0201
d282000c
04361800
d206080c
0201010c
c2000915
bf8c007f
101a1800
101a0f0d
c2000914
bf8c007f
10201800
10200f10
5e1a1b10
10200d06
d2820010
04420301
d2820010
04420100
7e205b10
10022101
0802030b
100c2106
080c0d09
10120d06
d2820009
04260301
10002100
0800010f
d2820009
04260100
7e125b09
10021301
100c1306
100c1106
d2820001
041a1501
10001300
d2820000
04061d00
d2060800
02010100
7e004f00
100204ff
44800000
0e000101
7e004b00
7e0202ff
3c23d70a
d00c0000
02020307
d2000001
0001e480
d2060001
22010101
7e021101
360202f2
10000300
c200091b
bf8c007f
10021800
10000101
c2000916
bf8c007f
10021800
10020f01
5e000101
f8001c0f
000d000d
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = fmul float %11, %30
  %34 = fmul float %12, %30
  %35 = fmul float %13, %30
  %36 = fmul float %14, %30
  %37 = fmul float %15, %31
  %38 = fadd float %37, %33
  %39 = fmul float %16, %31
  %40 = fadd float %39, %34
  %41 = fmul float %17, %31
  %42 = fadd float %41, %35
  %43 = fmul float %18, %31
  %44 = fadd float %43, %36
  %45 = fmul float %19, %32
  %46 = fadd float %45, %38
  %47 = fmul float %20, %32
  %48 = fadd float %47, %40
  %49 = fmul float %21, %32
  %50 = fadd float %49, %42
  %51 = fmul float %22, %32
  %52 = fadd float %51, %44
  %53 = fadd float %46, %23
  %54 = fadd float %48, %24
  %55 = fadd float %50, %25
  %56 = fadd float %52, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %53, float %54, float %55, float %56)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0820700
bf8c007f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[12..13]
DCL CONST[5..11]
DCL TEMP[0..1]
DCL TEMP[2..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[13].xxxx, CONST[13].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].xy, IN[5].xyyy
 13: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 14: MAD TEMP[5].yw, IMM[0].zzzz, TEMP[5], IMM[0].xxxx
 15: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[2].xyzz
 16: RSQ TEMP[6].x, TEMP[6].xxxx
 17: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 18: DP2 TEMP[6].x, TEMP[5].ywww, TEMP[5].ywww
 19: ADD TEMP[6].x, IMM[0].yyyy, -TEMP[6].xxxx
 20: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
 21: RSQ TEMP[7].x, TEMP[6].xxxx
 22: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[6].xxxx
 23: CMP TEMP[7].x, -TEMP[6].xxxx, TEMP[7].xxxx, IMM[0].wwww
 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 25: DP3 TEMP[6].x, IN[6].yzww, IN[6].yzww
 26: RSQ TEMP[6].x, TEMP[6].xxxx
 27: MUL TEMP[6].xyz, IN[6].yzww, TEMP[6].xxxx
 28: DP3 TEMP[7].x, IN[7].xyzz, IN[7].xyzz
 29: RSQ TEMP[7].x, TEMP[7].xxxx
 30: MUL TEMP[7].xyz, IN[7].xyzz, TEMP[7].xxxx
 31: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].wwww
 32: MAD TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].yyyy, TEMP[7].xyzz
 33: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[5].xyzz
 34: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 35: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 36: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 37: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 38: MOV TEMP[2].xyz, TEMP[2].xyzz
 39: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 40: DP4 TEMP[3].x, TEMP[4], CONST[10]
 41: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[8].yyyy
 42: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 43: MUL TEMP[3], TEMP[4], TEMP[3]
 44: MUL TEMP[5].xy, TEMP[0].xyyy, CONST[5].xyyy
 45: MOV TEMP[5].xy, TEMP[5].xyyy
 46: TEX TEMP[5], TEMP[5], SAMP[4], 2D
 47: DP4 TEMP[6].x, TEMP[4], CONST[9]
 48: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[8].xxxx
 49: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].wwww
 50: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[1].xyzz
 51: MAX TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
 52: RCP TEMP[7].x, TEMP[7].xxxx
 53: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 54: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xyzz
 55: MAD TEMP[5].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[5].xyzz
 56: MUL TEMP[3].xyz, TEMP[5].xyzz, IMM[2].xxxx
 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 58: DP4 TEMP[4].x, TEMP[4], CONST[11]
 59: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[8].zzzz
 60: MAD TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[3].xyzz
 61: MOV TEMP[2].xy, IN[3].zwww
 62: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D
 63: MAD TEMP[3].xyz, CONST[12].xyzz, TEMP[2].xyzz, TEMP[3].xyzz
 64: MAX TEMP[2].x, IN[4].wwww, CONST[6].wwww
 65: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 66: LRP TEMP[3].xyz, TEMP[2].xxxx, TEMP[3].xyzz, CONST[6].xyzz
 67: MOV OUT[0], TEMP[3]
 68: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %91 = fmul float %13, %46
  %92 = fadd float %91, %47
  %93 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %97 = bitcast float %93 to i32
  %98 = icmp ne i32 %97, 0
  %. = select i1 %98, float -1.000000e+00, float 1.000000e+00
  %99 = bitcast float %80 to i32
  %100 = bitcast float %81 to i32
  %101 = insertelement <2 x i32> undef, i32 %99, i32 0
  %102 = insertelement <2 x i32> %101, i32 %100, i32 1
  %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %49, <16 x i8> %51, i32 2)
  %104 = extractelement <4 x float> %103, i32 0
  %105 = extractelement <4 x float> %103, i32 1
  %106 = extractelement <4 x float> %103, i32 2
  %107 = extractelement <4 x float> %103, i32 3
  %108 = bitcast float %80 to i32
  %109 = bitcast float %81 to i32
  %110 = insertelement <2 x i32> undef, i32 %108, i32 0
  %111 = insertelement <2 x i32> %110, i32 %109, i32 1
  %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %53, <16 x i8> %55, i32 2)
  %113 = extractelement <4 x float> %112, i32 1
  %114 = extractelement <4 x float> %112, i32 3
  %115 = fmul float 2.000000e+00, %113
  %116 = fadd float %115, -1.000000e+00
  %117 = fmul float 2.000000e+00, %114
  %118 = fadd float %117, -1.000000e+00
  %119 = fmul float %82, %82
  %120 = fmul float %83, %83
  %121 = fadd float %120, %119
  %122 = fmul float %84, %84
  %123 = fadd float %121, %122
  %124 = call float @llvm.AMDGPU.rsq(float %123)
  %125 = fmul float %82, %124
  %126 = fmul float %83, %124
  %127 = fmul float %84, %124
  %128 = fmul float %116, %116
  %129 = fmul float %118, %118
  %130 = fadd float %128, %129
  %131 = fsub float -0.000000e+00, %130
  %132 = fadd float 1.000000e+00, %131
  %133 = fcmp uge float 0.000000e+00, %132
  %134 = select i1 %133, float 0.000000e+00, float %132
  %135 = call float @llvm.AMDGPU.rsq(float %134)
  %136 = fmul float %135, %134
  %137 = fsub float -0.000000e+00, %134
  %138 = call float @llvm.AMDGPU.cndlt(float %137, float %136, float 0.000000e+00)
  %139 = fmul float %125, %138
  %140 = fmul float %126, %138
  %141 = fmul float %127, %138
  %142 = fmul float %85, %85
  %143 = fmul float %86, %86
  %144 = fadd float %143, %142
  %145 = fmul float %87, %87
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %85, %147
  %149 = fmul float %86, %147
  %150 = fmul float %87, %147
  %151 = fmul float %88, %88
  %152 = fmul float %89, %89
  %153 = fadd float %152, %151
  %154 = fmul float %90, %90
  %155 = fadd float %153, %154
  %156 = call float @llvm.AMDGPU.rsq(float %155)
  %157 = fmul float %88, %156
  %158 = fmul float %89, %156
  %159 = fmul float %90, %156
  %160 = fmul float %157, %118
  %161 = fmul float %158, %118
  %162 = fmul float %159, %118
  %163 = fmul float %148, %116
  %164 = fadd float %163, %160
  %165 = fmul float %149, %116
  %166 = fadd float %165, %161
  %167 = fmul float %150, %116
  %168 = fadd float %167, %162
  %169 = fmul float %139, %.
  %170 = fadd float %169, %164
  %171 = fmul float %140, %.
  %172 = fadd float %171, %166
  %173 = fmul float %141, %.
  %174 = fadd float %173, %168
  %175 = fmul float %170, %76
  %176 = fmul float %172, %77
  %177 = fadd float %176, %175
  %178 = fmul float %174, %78
  %179 = fadd float %177, %178
  %180 = fmul float %179, %170
  %181 = fmul float %179, %172
  %182 = fmul float %179, %174
  %183 = fmul float 2.000000e+00, %180
  %184 = fmul float 2.000000e+00, %181
  %185 = fmul float 2.000000e+00, %182
  %186 = fsub float -0.000000e+00, %183
  %187 = fadd float %76, %186
  %188 = fsub float -0.000000e+00, %184
  %189 = fadd float %77, %188
  %190 = fsub float -0.000000e+00, %185
  %191 = fadd float %78, %190
  %192 = insertelement <4 x float> undef, float %187, i32 0
  %193 = insertelement <4 x float> %192, float %189, i32 1
  %194 = insertelement <4 x float> %193, float %191, i32 2
  %195 = insertelement <4 x float> %194, float 0.000000e+00, i32 3
  %196 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %195)
  %197 = extractelement <4 x float> %196, i32 0
  %198 = extractelement <4 x float> %196, i32 1
  %199 = extractelement <4 x float> %196, i32 2
  %200 = extractelement <4 x float> %196, i32 3
  %201 = call float @fabs(float %199)
  %202 = fdiv float 1.000000e+00, %201
  %203 = fmul float %197, %202
  %204 = fadd float %203, 1.500000e+00
  %205 = fmul float %198, %202
  %206 = fadd float %205, 1.500000e+00
  %207 = bitcast float %206 to i32
  %208 = bitcast float %204 to i32
  %209 = bitcast float %200 to i32
  %210 = insertelement <4 x i32> undef, i32 %207, i32 0
  %211 = insertelement <4 x i32> %210, i32 %208, i32 1
  %212 = insertelement <4 x i32> %211, i32 %209, i32 2
  %213 = insertelement <4 x i32> %212, i32 undef, i32 3
  %214 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %213, <32 x i8> %57, <16 x i8> %59, i32 4)
  %215 = extractelement <4 x float> %214, i32 0
  %216 = extractelement <4 x float> %214, i32 1
  %217 = extractelement <4 x float> %214, i32 2
  %218 = extractelement <4 x float> %214, i32 3
  %219 = fmul float %104, %35
  %220 = fmul float %105, %36
  %221 = fadd float %219, %220
  %222 = fmul float %106, %37
  %223 = fadd float %221, %222
  %224 = fmul float %107, %38
  %225 = fadd float %223, %224
  %226 = fadd float %225, %29
  %227 = call float @llvm.AMDIL.clamp.(float %226, float 0.000000e+00, float 1.000000e+00)
  %228 = call float @llvm.AMDGPU.lrp(float %227, float %70, float 1.000000e+00)
  %229 = call float @llvm.AMDGPU.lrp(float %227, float %71, float 1.000000e+00)
  %230 = call float @llvm.AMDGPU.lrp(float %227, float %72, float 1.000000e+00)
  %231 = call float @llvm.AMDGPU.lrp(float %227, float %73, float 1.000000e+00)
  %232 = fmul float %104, %228
  %233 = fmul float %105, %229
  %234 = fmul float %106, %230
  %235 = fmul float %107, %231
  %236 = fmul float %12, %22
  %237 = fmul float %92, %23
  %238 = bitcast float %236 to i32
  %239 = bitcast float %237 to i32
  %240 = insertelement <2 x i32> undef, i32 %238, i32 0
  %241 = insertelement <2 x i32> %240, i32 %239, i32 1
  %242 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %241, <32 x i8> %65, <16 x i8> %67, i32 2)
  %243 = extractelement <4 x float> %242, i32 0
  %244 = extractelement <4 x float> %242, i32 1
  %245 = extractelement <4 x float> %242, i32 2
  %246 = extractelement <4 x float> %242, i32 3
  %247 = fmul float %104, %31
  %248 = fmul float %105, %32
  %249 = fadd float %247, %248
  %250 = fmul float %106, %33
  %251 = fadd float %249, %250
  %252 = fmul float %107, %34
  %253 = fadd float %251, %252
  %254 = fadd float %253, %28
  %255 = call float @llvm.AMDIL.clamp.(float %254, float 0.000000e+00, float 1.000000e+00)
  %256 = fmul float %255, %246
  %257 = fmul float %243, 0x3FCB367A00000000
  %258 = fmul float %244, 0x3FE6E2EB20000000
  %259 = fadd float %258, %257
  %260 = fmul float %245, 0x3FB27BB300000000
  %261 = fadd float %259, %260
  %262 = fcmp uge float %261, 0x3F50624DE0000000
  %263 = select i1 %262, float %261, float 0x3F50624DE0000000
  %264 = fdiv float 1.000000e+00, %263
  %265 = fmul float %243, %264
  %266 = fmul float %244, %264
  %267 = fmul float %245, %264
  %268 = fmul float %232, %243
  %269 = fmul float %233, %244
  %270 = fmul float %234, %245
  %271 = fmul float %256, %265
  %272 = fadd float %271, %268
  %273 = fmul float %256, %266
  %274 = fadd float %273, %269
  %275 = fmul float %256, %267
  %276 = fadd float %275, %270
  %277 = fmul float %272, 4.000000e+00
  %278 = fmul float %274, 4.000000e+00
  %279 = fmul float %276, 4.000000e+00
  %280 = fmul float %215, %218
  %281 = fmul float %216, %218
  %282 = fmul float %217, %218
  %283 = fmul float %104, %39
  %284 = fmul float %105, %40
  %285 = fadd float %283, %284
  %286 = fmul float %106, %41
  %287 = fadd float %285, %286
  %288 = fmul float %107, %42
  %289 = fadd float %287, %288
  %290 = fadd float %289, %30
  %291 = call float @llvm.AMDIL.clamp.(float %290, float 0.000000e+00, float 1.000000e+00)
  %292 = fmul float %280, %291
  %293 = fadd float %292, %277
  %294 = fmul float %281, %291
  %295 = fadd float %294, %278
  %296 = fmul float %282, %291
  %297 = fadd float %296, %279
  %298 = bitcast float %74 to i32
  %299 = bitcast float %75 to i32
  %300 = insertelement <2 x i32> undef, i32 %298, i32 0
  %301 = insertelement <2 x i32> %300, i32 %299, i32 1
  %302 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %301, <32 x i8> %61, <16 x i8> %63, i32 2)
  %303 = extractelement <4 x float> %302, i32 0
  %304 = extractelement <4 x float> %302, i32 1
  %305 = extractelement <4 x float> %302, i32 2
  %306 = fmul float %43, %303
  %307 = fadd float %306, %293
  %308 = fmul float %44, %304
  %309 = fadd float %308, %295
  %310 = fmul float %45, %305
  %311 = fadd float %310, %297
  %312 = fcmp uge float %79, %27
  %313 = select i1 %312, float %79, float %27
  %314 = call float @llvm.AMDIL.clamp.(float %313, float 0.000000e+00, float 1.000000e+00)
  %315 = call float @llvm.AMDGPU.lrp(float %314, float %307, float %24)
  %316 = call float @llvm.AMDGPU.lrp(float %314, float %309, float %25)
  %317 = call float @llvm.AMDGPU.lrp(float %314, float %311, float %26)
  %318 = call i32 @llvm.SI.packf16(float %315, float %316)
  %319 = bitcast i32 %318 to float
  %320 = call i32 @llvm.SI.packf16(float %317, float %235)
  %321 = bitcast i32 %320 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %319, float %321, float %319, float %321)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340f00
c8350f01
c8300e00
c8310e01
1014190c
d282000b
042a1b0d
c8281000
c8291001
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441500
c8451501
c8401400
c8411401
101a2110
d282000e
04362311
c8341600
c8351601
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541200
c8551201
c8501100
c8511101
10222914
d2820012
04462b15
c8441300
c8451301
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0800
c83d0801
10281f0c
c8400900
c8410901
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200a00
c8210a01
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430a08
bf8c0770
10241b0b
c0840300
c0c60500
bf8c007f
f0800f00
00430405
c0840100
bf8c0070
c2000929
bf8c007f
10100a00
c2000928
bf8c007f
d2820008
04200104
c200092a
bf8c007f
d2820008
04200106
c200092b
bf8c007f
d2820008
04200107
c2000921
bf8c007f
06101000
d2060808
02010108
081210f2
c8380100
c8390101
d282000e
04261d08
10261d05
c2000934
c2008935
bf8c007f
7e1c0201
d2820003
04380103
c2000915
bf8c007f
101e0600
c2000914
bf8c007f
101c0400
c0860310
c0c80520
bf8c007f
f0800f00
00640e0e
bf8c0770
10261f13
10041cff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070f
7e0602ff
3d93dd98
d2820002
040a0710
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
1028050f
c2000925
bf8c007f
10060a00
c2000924
bf8c007f
d2820003
040c0104
c2000926
bf8c007f
d2820003
040c0106
c2000927
bf8c007f
d2820003
040c0107
c2000920
bf8c007f
06060600
d2060803
02010103
10062303
d2820013
044e2903
102826f6
c200092d
bf8c007f
10260a00
c200092c
bf8c007f
d2820013
044c0104
c200092e
bf8c007f
d2820013
044c0106
c200092f
bf8c007f
d2820013
044c0107
c2000922
bf8c007f
06262600
d2060813
02010113
d2820012
04522712
c8540700
c8550701
c8500600
c8510601
c080030c
c0c60518
bf8c007f
f0800700
00031414
c2000931
bf8c0070
d2820018
044a2a00
c8480b00
c8490b01
c200091b
bf8c007f
d00c0002
02000112
7e2e0200
d2000012
000a2517
d2060812
02010112
082e24f2
c2000919
bf8c007f
10322e00
d2820018
04663112
10321b0a
c8680000
c8690001
d282001a
04263508
10343504
10341d1a
1036050e
d282001a
046a3703
103434f6
d2820019
046a2719
c2000930
bf8c007f
d2820019
04662800
c2000918
bf8c007f
10342e00
d2820019
046a3312
5e303119
10141b0c
c82c0200
c82d0201
d282000b
04261708
10161706
1016210b
10040510
d2820002
042e0503
100404f6
d2820002
040a270a
c2000932
bf8c007f
d2820002
040a2c00
c200091a
bf8c007f
10062e00
d2820002
040e0512
c80c0300
c80d0301
d2820000
04260708
10000107
5e000102
f8001c0f
00180018
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..14]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3].xyz, IN[1].xyzz, CONST[10].wwww
  8: MUL TEMP[4], CONST[0], TEMP[3].xxxx
  9: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 10: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 11: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 14: MUL TEMP[4].xyz, IN[4].xyzz, CONST[10].wwww
 15: MUL TEMP[5], CONST[0], TEMP[4].xxxx
 16: MAD TEMP[5], CONST[1], TEMP[4].yyyy, TEMP[5]
 17: MAD TEMP[4].xyz, CONST[2], TEMP[4].zzzz, TEMP[5]
 18: MUL TEMP[5], CONST[0], TEMP[0].xxxx
 19: MAD TEMP[5], CONST[1], TEMP[0].yyyy, TEMP[5]
 20: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[5]
 21: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 22: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[14].xyzz
 23: MAD TEMP[5].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 24: MOV TEMP[0].w, TEMP[5].xxxx
 25: MAD TEMP[5].xy, IN[2].xyyy, CONST[9].xyyy, CONST[9].zwww
 26: MOV TEMP[5].zw, TEMP[3].yyxy
 27: MOV TEMP[6].x, TEMP[3].zzzz
 28: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 29: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 30: MOV TEMP[6].yzw, TEMP[3].yxyz
 31: MOV TEMP[3].xyz, TEMP[4].xyzx
 32: MOV OUT[2], IN[3]
 33: MOV OUT[1], TEMP[2]
 34: MOV OUT[4], TEMP[5]
 35: MOV OUT[6], TEMP[3]
 36: MOV OUT[5], TEMP[6]
 37: MOV OUT[3], TEMP[0]
 38: MOV OUT[0], TEMP[1]
 39: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = extractelement <4 x float> %77, i32 3
  %82 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %83 = load <16 x i8> addrspace(2)* %82, !tbaa !0
  %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %5)
  %85 = extractelement <4 x float> %84, i32 0
  %86 = extractelement <4 x float> %84, i32 1
  %87 = extractelement <4 x float> %84, i32 2
  %88 = fmul float %61, %50
  %89 = fadd float %88, %47
  %90 = fmul float %62, %51
  %91 = fadd float %90, %48
  %92 = fmul float %63, %52
  %93 = fadd float %92, %49
  %94 = fmul float %23, %89
  %95 = fmul float %24, %89
  %96 = fmul float %25, %89
  %97 = fmul float %26, %89
  %98 = fmul float %27, %91
  %99 = fadd float %98, %94
  %100 = fmul float %28, %91
  %101 = fadd float %100, %95
  %102 = fmul float %29, %91
  %103 = fadd float %102, %96
  %104 = fmul float %30, %91
  %105 = fadd float %104, %97
  %106 = fmul float %31, %93
  %107 = fadd float %106, %99
  %108 = fmul float %32, %93
  %109 = fadd float %108, %101
  %110 = fmul float %33, %93
  %111 = fadd float %110, %103
  %112 = fmul float %34, %93
  %113 = fadd float %112, %105
  %114 = fadd float %107, %35
  %115 = fadd float %109, %36
  %116 = fadd float %111, %37
  %117 = fadd float %113, %38
  %118 = fmul float %67, %46
  %119 = fmul float %68, %46
  %120 = fmul float %69, %46
  %121 = fmul float %11, %118
  %122 = fmul float %12, %118
  %123 = fmul float %13, %118
  %124 = fmul float %14, %119
  %125 = fadd float %124, %121
  %126 = fmul float %15, %119
  %127 = fadd float %126, %122
  %128 = fmul float %16, %119
  %129 = fadd float %128, %123
  %130 = fmul float %17, %120
  %131 = fadd float %130, %125
  %132 = fmul float %18, %120
  %133 = fadd float %132, %127
  %134 = fmul float %19, %120
  %135 = fadd float %134, %129
  %136 = fmul float %131, %131
  %137 = fmul float %133, %133
  %138 = fadd float %137, %136
  %139 = fmul float %135, %135
  %140 = fadd float %138, %139
  %141 = call float @llvm.AMDGPU.rsq(float %140)
  %142 = fmul float %131, %141
  %143 = fmul float %133, %141
  %144 = fmul float %135, %141
  %145 = fmul float %85, %46
  %146 = fmul float %86, %46
  %147 = fmul float %87, %46
  %148 = fmul float %11, %145
  %149 = fmul float %12, %145
  %150 = fmul float %13, %145
  %151 = fmul float %14, %146
  %152 = fadd float %151, %148
  %153 = fmul float %15, %146
  %154 = fadd float %153, %149
  %155 = fmul float %16, %146
  %156 = fadd float %155, %150
  %157 = fmul float %17, %147
  %158 = fadd float %157, %152
  %159 = fmul float %18, %147
  %160 = fadd float %159, %154
  %161 = fmul float %19, %147
  %162 = fadd float %161, %156
  %163 = fmul float %11, %89
  %164 = fmul float %12, %89
  %165 = fmul float %13, %89
  %166 = fmul float %14, %91
  %167 = fadd float %166, %163
  %168 = fmul float %15, %91
  %169 = fadd float %168, %164
  %170 = fmul float %16, %91
  %171 = fadd float %170, %165
  %172 = fmul float %17, %93
  %173 = fadd float %172, %167
  %174 = fmul float %18, %93
  %175 = fadd float %174, %169
  %176 = fmul float %19, %93
  %177 = fadd float %176, %171
  %178 = fadd float %173, %20
  %179 = fadd float %175, %21
  %180 = fadd float %177, %22
  %181 = fsub float -0.000000e+00, %55
  %182 = fadd float %178, %181
  %183 = fsub float -0.000000e+00, %56
  %184 = fadd float %179, %183
  %185 = fsub float -0.000000e+00, %57
  %186 = fadd float %180, %185
  %187 = fmul float %116, %53
  %188 = fadd float %187, %54
  %189 = fmul float %73, %42
  %190 = fadd float %189, %44
  %191 = fmul float %74, %43
  %192 = fadd float %191, %45
  %193 = fmul float %162, %143
  %194 = fmul float %158, %144
  %195 = fmul float %160, %142
  %196 = fsub float -0.000000e+00, %193
  %197 = fmul float %160, %144
  %198 = fadd float %197, %196
  %199 = fsub float -0.000000e+00, %194
  %200 = fmul float %162, %142
  %201 = fadd float %200, %199
  %202 = fsub float -0.000000e+00, %195
  %203 = fmul float %158, %143
  %204 = fadd float %203, %202
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %78, float %79, float %80, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %182, float %184, float %186, float %188)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %190, float %192, float %142, float %143)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %144, float %198, float %201, float %204)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %158, float %160, float %162, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c084070c
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840700
bf8c000f
e00c2000
80020300
c2020131
c202812d
bf8c0070
7e020205
d2820001
04040904
c2020130
c202812c
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c2020132
c202812e
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2028106
bf8c007f
d2820006
041a0205
c204010a
bf8c007f
d2820006
041a0608
c204810e
bf8c007f
060c0c09
c204813a
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060139
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078138
bf8c007f
0a10100f
f800022f
05060708
c0880704
bf8c000f
e00c2000
80040700
c207812b
bf8c0070
100a100f
100c0e0f
10160c09
d282000b
042e0a0b
100e120f
d2820009
042e0e0a
10100c0c
d2820008
04220a0d
d282000a
04220e0e
1010150a
d2820008
04221309
100c0c04
d2820005
041a0a05
d2820007
04160e08
d2820005
04220f07
7e105b05
100a1109
100c110a
c0880708
bf8c007f
e00c2000
80040900
c2080125
c2088127
bf8c0070
7e1a0211
d282000d
0434210a
c2080124
c2088126
bf8c007f
7e1c0211
d2820009
04382109
f800023f
05060d09
c0880710
bf8c000f
e00c2000
80040c00
bf8c0770
10121a0f
1016180f
10001609
d2820000
0402120b
10181c0f
d2820000
0402180a
101a0d00
1014160c
d282000a
042a120d
d282000a
042a180e
101c0b0a
081a1b0e
100e1107
101c0f0a
10101604
d2820008
04221205
d2820008
04221808
100c0d08
080c1d06
100a0b08
10120f00
080a0b09
f800024f
0d060507
bf8c070f
7e0a0280
f800025f
0508000a
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[12..13]
DCL CONST[5..11]
DCL TEMP[0..1]
DCL TEMP[2..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[13].xxxx, CONST[13].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: MOV TEMP[6].xy, IN[5].xyyy
 14: TEX TEMP[6], TEMP[6], SAMP[1], 2D
 15: MAD TEMP[6].yw, IMM[0].zzzz, TEMP[6], IMM[0].xxxx
 16: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz
 17: RSQ TEMP[7].x, TEMP[7].xxxx
 18: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 19: DP2 TEMP[7].x, TEMP[6].ywww, TEMP[6].ywww
 20: ADD TEMP[7].x, IMM[0].yyyy, -TEMP[7].xxxx
 21: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
 22: RSQ TEMP[8].x, TEMP[7].xxxx
 23: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[7].xxxx
 24: CMP TEMP[8].x, -TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].wwww
 25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
 26: DP3 TEMP[7].x, IN[6].yzww, IN[6].yzww
 27: RSQ TEMP[7].x, TEMP[7].xxxx
 28: MUL TEMP[7].xyz, IN[6].yzww, TEMP[7].xxxx
 29: DP3 TEMP[8].x, IN[7].xyzz, IN[7].xyzz
 30: RSQ TEMP[8].x, TEMP[8].xxxx
 31: MUL TEMP[8].xyz, IN[7].xyzz, TEMP[8].xxxx
 32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[6].wwww
 33: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[6].yyyy, TEMP[8].xyzz
 34: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
 35: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 36: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 37: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 38: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 39: MOV TEMP[2].xyz, TEMP[2].xyzz
 40: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 41: DP4 TEMP[3].x, TEMP[4], CONST[11]
 42: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[8].zzzz
 43: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 44: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[4].xyzz
 45: DP4 TEMP[2].x, TEMP[4], CONST[10]
 46: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[8].yyyy
 47: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 48: MUL TEMP[2], TEMP[5], TEMP[2]
 49: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 50: MOV TEMP[3].xy, TEMP[3].xyyy
 51: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 52: DP4 TEMP[4].x, TEMP[4], CONST[9]
 53: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[8].xxxx
 54: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 55: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 56: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww
 57: RCP TEMP[5].x, TEMP[5].xxxx
 58: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 59: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 60: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 61: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[2].xxxx
 62: MOV TEMP[3].xy, IN[3].zwww
 63: TEX TEMP[3].xyz, TEMP[3], SAMP[3], 2D
 64: MAD TEMP[2].xyz, CONST[12].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
 65: MAX TEMP[3].x, IN[4].wwww, CONST[6].wwww
 66: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 67: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[6].xyzz
 68: MOV OUT[0], TEMP[2]
 69: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %91 = fmul float %13, %46
  %92 = fadd float %91, %47
  %93 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %97 = bitcast float %93 to i32
  %98 = icmp ne i32 %97, 0
  %. = select i1 %98, float -1.000000e+00, float 1.000000e+00
  %99 = bitcast float %80 to i32
  %100 = bitcast float %81 to i32
  %101 = insertelement <2 x i32> undef, i32 %99, i32 0
  %102 = insertelement <2 x i32> %101, i32 %100, i32 1
  %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %49, <16 x i8> %51, i32 2)
  %104 = extractelement <4 x float> %103, i32 0
  %105 = extractelement <4 x float> %103, i32 1
  %106 = extractelement <4 x float> %103, i32 2
  %107 = extractelement <4 x float> %103, i32 3
  %108 = bitcast float %80 to i32
  %109 = bitcast float %81 to i32
  %110 = insertelement <2 x i32> undef, i32 %108, i32 0
  %111 = insertelement <2 x i32> %110, i32 %109, i32 1
  %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %53, <16 x i8> %55, i32 2)
  %113 = extractelement <4 x float> %112, i32 1
  %114 = extractelement <4 x float> %112, i32 3
  %115 = fmul float 2.000000e+00, %113
  %116 = fadd float %115, -1.000000e+00
  %117 = fmul float 2.000000e+00, %114
  %118 = fadd float %117, -1.000000e+00
  %119 = fmul float %82, %82
  %120 = fmul float %83, %83
  %121 = fadd float %120, %119
  %122 = fmul float %84, %84
  %123 = fadd float %121, %122
  %124 = call float @llvm.AMDGPU.rsq(float %123)
  %125 = fmul float %82, %124
  %126 = fmul float %83, %124
  %127 = fmul float %84, %124
  %128 = fmul float %116, %116
  %129 = fmul float %118, %118
  %130 = fadd float %128, %129
  %131 = fsub float -0.000000e+00, %130
  %132 = fadd float 1.000000e+00, %131
  %133 = fcmp uge float 0.000000e+00, %132
  %134 = select i1 %133, float 0.000000e+00, float %132
  %135 = call float @llvm.AMDGPU.rsq(float %134)
  %136 = fmul float %135, %134
  %137 = fsub float -0.000000e+00, %134
  %138 = call float @llvm.AMDGPU.cndlt(float %137, float %136, float 0.000000e+00)
  %139 = fmul float %125, %138
  %140 = fmul float %126, %138
  %141 = fmul float %127, %138
  %142 = fmul float %85, %85
  %143 = fmul float %86, %86
  %144 = fadd float %143, %142
  %145 = fmul float %87, %87
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %85, %147
  %149 = fmul float %86, %147
  %150 = fmul float %87, %147
  %151 = fmul float %88, %88
  %152 = fmul float %89, %89
  %153 = fadd float %152, %151
  %154 = fmul float %90, %90
  %155 = fadd float %153, %154
  %156 = call float @llvm.AMDGPU.rsq(float %155)
  %157 = fmul float %88, %156
  %158 = fmul float %89, %156
  %159 = fmul float %90, %156
  %160 = fmul float %157, %118
  %161 = fmul float %158, %118
  %162 = fmul float %159, %118
  %163 = fmul float %148, %116
  %164 = fadd float %163, %160
  %165 = fmul float %149, %116
  %166 = fadd float %165, %161
  %167 = fmul float %150, %116
  %168 = fadd float %167, %162
  %169 = fmul float %139, %.
  %170 = fadd float %169, %164
  %171 = fmul float %140, %.
  %172 = fadd float %171, %166
  %173 = fmul float %141, %.
  %174 = fadd float %173, %168
  %175 = fmul float %170, %76
  %176 = fmul float %172, %77
  %177 = fadd float %176, %175
  %178 = fmul float %174, %78
  %179 = fadd float %177, %178
  %180 = fmul float %179, %170
  %181 = fmul float %179, %172
  %182 = fmul float %179, %174
  %183 = fmul float 2.000000e+00, %180
  %184 = fmul float 2.000000e+00, %181
  %185 = fmul float 2.000000e+00, %182
  %186 = fsub float -0.000000e+00, %183
  %187 = fadd float %76, %186
  %188 = fsub float -0.000000e+00, %184
  %189 = fadd float %77, %188
  %190 = fsub float -0.000000e+00, %185
  %191 = fadd float %78, %190
  %192 = insertelement <4 x float> undef, float %187, i32 0
  %193 = insertelement <4 x float> %192, float %189, i32 1
  %194 = insertelement <4 x float> %193, float %191, i32 2
  %195 = insertelement <4 x float> %194, float 0.000000e+00, i32 3
  %196 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %195)
  %197 = extractelement <4 x float> %196, i32 0
  %198 = extractelement <4 x float> %196, i32 1
  %199 = extractelement <4 x float> %196, i32 2
  %200 = extractelement <4 x float> %196, i32 3
  %201 = call float @fabs(float %199)
  %202 = fdiv float 1.000000e+00, %201
  %203 = fmul float %197, %202
  %204 = fadd float %203, 1.500000e+00
  %205 = fmul float %198, %202
  %206 = fadd float %205, 1.500000e+00
  %207 = bitcast float %206 to i32
  %208 = bitcast float %204 to i32
  %209 = bitcast float %200 to i32
  %210 = insertelement <4 x i32> undef, i32 %207, i32 0
  %211 = insertelement <4 x i32> %210, i32 %208, i32 1
  %212 = insertelement <4 x i32> %211, i32 %209, i32 2
  %213 = insertelement <4 x i32> %212, i32 undef, i32 3
  %214 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %213, <32 x i8> %57, <16 x i8> %59, i32 4)
  %215 = extractelement <4 x float> %214, i32 0
  %216 = extractelement <4 x float> %214, i32 1
  %217 = extractelement <4 x float> %214, i32 2
  %218 = extractelement <4 x float> %214, i32 3
  %219 = fmul float %104, %39
  %220 = fmul float %105, %40
  %221 = fadd float %219, %220
  %222 = fmul float %106, %41
  %223 = fadd float %221, %222
  %224 = fmul float %107, %42
  %225 = fadd float %223, %224
  %226 = fadd float %225, %30
  %227 = call float @llvm.AMDIL.clamp.(float %226, float 0.000000e+00, float 1.000000e+00)
  %228 = fmul float %218, %227
  %229 = call float @llvm.AMDGPU.lrp(float %228, float %215, float %104)
  %230 = call float @llvm.AMDGPU.lrp(float %228, float %216, float %105)
  %231 = call float @llvm.AMDGPU.lrp(float %228, float %217, float %106)
  %232 = fmul float %104, %35
  %233 = fmul float %105, %36
  %234 = fadd float %232, %233
  %235 = fmul float %106, %37
  %236 = fadd float %234, %235
  %237 = fmul float %107, %38
  %238 = fadd float %236, %237
  %239 = fadd float %238, %29
  %240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00)
  %241 = call float @llvm.AMDGPU.lrp(float %240, float %70, float 1.000000e+00)
  %242 = call float @llvm.AMDGPU.lrp(float %240, float %71, float 1.000000e+00)
  %243 = call float @llvm.AMDGPU.lrp(float %240, float %72, float 1.000000e+00)
  %244 = call float @llvm.AMDGPU.lrp(float %240, float %73, float 1.000000e+00)
  %245 = fmul float %229, %241
  %246 = fmul float %230, %242
  %247 = fmul float %231, %243
  %248 = fmul float %107, %244
  %249 = fmul float %12, %22
  %250 = fmul float %92, %23
  %251 = bitcast float %249 to i32
  %252 = bitcast float %250 to i32
  %253 = insertelement <2 x i32> undef, i32 %251, i32 0
  %254 = insertelement <2 x i32> %253, i32 %252, i32 1
  %255 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %65, <16 x i8> %67, i32 2)
  %256 = extractelement <4 x float> %255, i32 0
  %257 = extractelement <4 x float> %255, i32 1
  %258 = extractelement <4 x float> %255, i32 2
  %259 = extractelement <4 x float> %255, i32 3
  %260 = fmul float %104, %31
  %261 = fmul float %105, %32
  %262 = fadd float %260, %261
  %263 = fmul float %106, %33
  %264 = fadd float %262, %263
  %265 = fmul float %107, %34
  %266 = fadd float %264, %265
  %267 = fadd float %266, %28
  %268 = call float @llvm.AMDIL.clamp.(float %267, float 0.000000e+00, float 1.000000e+00)
  %269 = fmul float %268, %259
  %270 = fmul float %256, 0x3FCB367A00000000
  %271 = fmul float %257, 0x3FE6E2EB20000000
  %272 = fadd float %271, %270
  %273 = fmul float %258, 0x3FB27BB300000000
  %274 = fadd float %272, %273
  %275 = fcmp uge float %274, 0x3F50624DE0000000
  %276 = select i1 %275, float %274, float 0x3F50624DE0000000
  %277 = fdiv float 1.000000e+00, %276
  %278 = fmul float %256, %277
  %279 = fmul float %257, %277
  %280 = fmul float %258, %277
  %281 = fmul float %245, %256
  %282 = fmul float %246, %257
  %283 = fmul float %247, %258
  %284 = fmul float %269, %278
  %285 = fadd float %284, %281
  %286 = fmul float %269, %279
  %287 = fadd float %286, %282
  %288 = fmul float %269, %280
  %289 = fadd float %288, %283
  %290 = fmul float %285, 4.000000e+00
  %291 = fmul float %287, 4.000000e+00
  %292 = fmul float %289, 4.000000e+00
  %293 = bitcast float %74 to i32
  %294 = bitcast float %75 to i32
  %295 = insertelement <2 x i32> undef, i32 %293, i32 0
  %296 = insertelement <2 x i32> %295, i32 %294, i32 1
  %297 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %296, <32 x i8> %61, <16 x i8> %63, i32 2)
  %298 = extractelement <4 x float> %297, i32 0
  %299 = extractelement <4 x float> %297, i32 1
  %300 = extractelement <4 x float> %297, i32 2
  %301 = fmul float %43, %298
  %302 = fadd float %301, %290
  %303 = fmul float %44, %299
  %304 = fadd float %303, %291
  %305 = fmul float %45, %300
  %306 = fadd float %305, %292
  %307 = fcmp uge float %79, %27
  %308 = select i1 %307, float %79, float %27
  %309 = call float @llvm.AMDIL.clamp.(float %308, float 0.000000e+00, float 1.000000e+00)
  %310 = call float @llvm.AMDGPU.lrp(float %309, float %302, float %24)
  %311 = call float @llvm.AMDGPU.lrp(float %309, float %304, float %25)
  %312 = call float @llvm.AMDGPU.lrp(float %309, float %306, float %26)
  %313 = call i32 @llvm.SI.packf16(float %310, float %311)
  %314 = bitcast i32 %313 to float
  %315 = call i32 @llvm.SI.packf16(float %312, float %248)
  %316 = bitcast i32 %315 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %314, float %316, float %314, float %316)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340f00
c8350f01
c8300e00
c8310e01
1014190c
d282000b
042a1b0d
c8281000
c8291001
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441500
c8451501
c8401400
c8411401
101a2110
d282000e
04362311
c8341600
c8351601
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541200
c8551201
c8501100
c8511101
10222914
d2820012
04462b15
c8441300
c8451301
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0800
c83d0801
10281f0c
c8400900
c8410901
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200a00
c8210a01
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430808
c0840300
c0c60500
bf8c0070
f0800f00
00430405
c0840100
bf8c0070
c200092d
bf8c007f
10180a00
c200092c
bf8c007f
d282000c
04300104
c200092e
bf8c007f
d282000c
04300106
c200092f
bf8c007f
d282000c
04300107
c2000922
bf8c007f
06181800
d206080c
0201010c
101c190b
081e1cf2
10180b0f
d2820010
0432130e
c2000929
bf8c007f
10180a00
c2000928
bf8c007f
d282000c
04300104
c200092a
bf8c007f
d282000c
04300106
c200092b
bf8c007f
d282000c
04300107
c2000921
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8440100
c8450101
d2820011
0436230c
10282310
c2000934
c2008935
bf8c007f
7e200201
d2820003
04400103
c2000915
bf8c007f
10220600
c2000914
bf8c007f
10200400
c0860310
c0c80520
bf8c007f
f0800f00
00641010
bf8c0770
10282314
100420ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0711
7e0602ff
3d93dd98
d2820002
040a0712
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
102a0511
c2000925
bf8c007f
10060a00
c2000924
bf8c007f
d2820003
040c0104
c2000926
bf8c007f
d2820003
040c0106
c2000927
bf8c007f
d2820003
040c0107
c2000920
bf8c007f
06060600
d2060803
02010103
10062703
d2820014
04522b03
102e28f6
c8540700
c8550701
c8500600
c8510601
c080030c
c0c60518
bf8c007f
f0800700
00031414
c2000931
bf8c0070
d2820019
045e2a00
c85c0b00
c85d0b01
c200091b
bf8c007f
d00c0002
02000117
7e300200
d2000017
000a2f18
d2060817
02010117
08302ef2
c2000919
bf8c007f
10343000
d2820019
046a3317
1034090f
d282001a
046a110e
c86c0000
c86d0001
d282001b
0436370c
1034371a
1034211a
10360510
d282001a
046a3703
103434f6
c2000930
bf8c007f
d282001a
046a2800
c2000918
bf8c007f
10363000
d282001a
046e3517
5e32331a
101e0d0f
d2820008
043e150e
c8240200
c8250201
d2820009
0436130c
10101308
10102508
10040512
d2820002
04220503
100404f6
c2000932
bf8c007f
d2820002
040a2c00
c200091a
bf8c007f
10063000
d2820002
040e0517
c80c0300
c80d0301
d2820000
0436070c
10000107
5e000102
f8001c0f
00190019
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..14]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3].xyz, IN[1].xyzz, CONST[10].wwww
  8: MUL TEMP[4], CONST[0], TEMP[3].xxxx
  9: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 10: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 11: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 14: MUL TEMP[4].xyz, IN[4].xyzz, CONST[10].wwww
 15: MUL TEMP[5], CONST[0], TEMP[4].xxxx
 16: MAD TEMP[5], CONST[1], TEMP[4].yyyy, TEMP[5]
 17: MAD TEMP[4].xyz, CONST[2], TEMP[4].zzzz, TEMP[5]
 18: MUL TEMP[5], CONST[0], TEMP[0].xxxx
 19: MAD TEMP[5], CONST[1], TEMP[0].yyyy, TEMP[5]
 20: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[5]
 21: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 22: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[14].xyzz
 23: MAD TEMP[5].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 24: MOV TEMP[0].w, TEMP[5].xxxx
 25: MAD TEMP[5].xy, IN[2].xyyy, CONST[9].xyyy, CONST[9].zwww
 26: MOV TEMP[5].zw, TEMP[3].yyxy
 27: MOV TEMP[6].x, TEMP[3].zzzz
 28: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 29: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 30: MOV TEMP[6].yzw, TEMP[3].yxyz
 31: MOV TEMP[3].xyz, TEMP[4].xyzx
 32: MOV OUT[2], IN[3]
 33: MOV OUT[1], TEMP[2]
 34: MOV OUT[4], TEMP[5]
 35: MOV OUT[6], TEMP[3]
 36: MOV OUT[5], TEMP[6]
 37: MOV OUT[3], TEMP[0]
 38: MOV OUT[0], TEMP[1]
 39: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = extractelement <4 x float> %77, i32 3
  %82 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %83 = load <16 x i8> addrspace(2)* %82, !tbaa !0
  %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %5)
  %85 = extractelement <4 x float> %84, i32 0
  %86 = extractelement <4 x float> %84, i32 1
  %87 = extractelement <4 x float> %84, i32 2
  %88 = fmul float %61, %50
  %89 = fadd float %88, %47
  %90 = fmul float %62, %51
  %91 = fadd float %90, %48
  %92 = fmul float %63, %52
  %93 = fadd float %92, %49
  %94 = fmul float %23, %89
  %95 = fmul float %24, %89
  %96 = fmul float %25, %89
  %97 = fmul float %26, %89
  %98 = fmul float %27, %91
  %99 = fadd float %98, %94
  %100 = fmul float %28, %91
  %101 = fadd float %100, %95
  %102 = fmul float %29, %91
  %103 = fadd float %102, %96
  %104 = fmul float %30, %91
  %105 = fadd float %104, %97
  %106 = fmul float %31, %93
  %107 = fadd float %106, %99
  %108 = fmul float %32, %93
  %109 = fadd float %108, %101
  %110 = fmul float %33, %93
  %111 = fadd float %110, %103
  %112 = fmul float %34, %93
  %113 = fadd float %112, %105
  %114 = fadd float %107, %35
  %115 = fadd float %109, %36
  %116 = fadd float %111, %37
  %117 = fadd float %113, %38
  %118 = fmul float %67, %46
  %119 = fmul float %68, %46
  %120 = fmul float %69, %46
  %121 = fmul float %11, %118
  %122 = fmul float %12, %118
  %123 = fmul float %13, %118
  %124 = fmul float %14, %119
  %125 = fadd float %124, %121
  %126 = fmul float %15, %119
  %127 = fadd float %126, %122
  %128 = fmul float %16, %119
  %129 = fadd float %128, %123
  %130 = fmul float %17, %120
  %131 = fadd float %130, %125
  %132 = fmul float %18, %120
  %133 = fadd float %132, %127
  %134 = fmul float %19, %120
  %135 = fadd float %134, %129
  %136 = fmul float %131, %131
  %137 = fmul float %133, %133
  %138 = fadd float %137, %136
  %139 = fmul float %135, %135
  %140 = fadd float %138, %139
  %141 = call float @llvm.AMDGPU.rsq(float %140)
  %142 = fmul float %131, %141
  %143 = fmul float %133, %141
  %144 = fmul float %135, %141
  %145 = fmul float %85, %46
  %146 = fmul float %86, %46
  %147 = fmul float %87, %46
  %148 = fmul float %11, %145
  %149 = fmul float %12, %145
  %150 = fmul float %13, %145
  %151 = fmul float %14, %146
  %152 = fadd float %151, %148
  %153 = fmul float %15, %146
  %154 = fadd float %153, %149
  %155 = fmul float %16, %146
  %156 = fadd float %155, %150
  %157 = fmul float %17, %147
  %158 = fadd float %157, %152
  %159 = fmul float %18, %147
  %160 = fadd float %159, %154
  %161 = fmul float %19, %147
  %162 = fadd float %161, %156
  %163 = fmul float %11, %89
  %164 = fmul float %12, %89
  %165 = fmul float %13, %89
  %166 = fmul float %14, %91
  %167 = fadd float %166, %163
  %168 = fmul float %15, %91
  %169 = fadd float %168, %164
  %170 = fmul float %16, %91
  %171 = fadd float %170, %165
  %172 = fmul float %17, %93
  %173 = fadd float %172, %167
  %174 = fmul float %18, %93
  %175 = fadd float %174, %169
  %176 = fmul float %19, %93
  %177 = fadd float %176, %171
  %178 = fadd float %173, %20
  %179 = fadd float %175, %21
  %180 = fadd float %177, %22
  %181 = fsub float -0.000000e+00, %55
  %182 = fadd float %178, %181
  %183 = fsub float -0.000000e+00, %56
  %184 = fadd float %179, %183
  %185 = fsub float -0.000000e+00, %57
  %186 = fadd float %180, %185
  %187 = fmul float %116, %53
  %188 = fadd float %187, %54
  %189 = fmul float %73, %42
  %190 = fadd float %189, %44
  %191 = fmul float %74, %43
  %192 = fadd float %191, %45
  %193 = fmul float %162, %143
  %194 = fmul float %158, %144
  %195 = fmul float %160, %142
  %196 = fsub float -0.000000e+00, %193
  %197 = fmul float %160, %144
  %198 = fadd float %197, %196
  %199 = fsub float -0.000000e+00, %194
  %200 = fmul float %162, %142
  %201 = fadd float %200, %199
  %202 = fsub float -0.000000e+00, %195
  %203 = fmul float %158, %143
  %204 = fadd float %203, %202
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %78, float %79, float %80, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %182, float %184, float %186, float %188)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %190, float %192, float %142, float %143)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %144, float %198, float %201, float %204)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %158, float %160, float %162, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c084070c
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840700
bf8c000f
e00c2000
80020300
c2020131
c202812d
bf8c0070
7e020205
d2820001
04040904
c2020130
c202812c
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c2020132
c202812e
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2028106
bf8c007f
d2820006
041a0205
c204010a
bf8c007f
d2820006
041a0608
c204810e
bf8c007f
060c0c09
c204813a
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060139
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078138
bf8c007f
0a10100f
f800022f
05060708
c0880704
bf8c000f
e00c2000
80040700
c207812b
bf8c0070
100a100f
100c0e0f
10160c09
d282000b
042e0a0b
100e120f
d2820009
042e0e0a
10100c0c
d2820008
04220a0d
d282000a
04220e0e
1010150a
d2820008
04221309
100c0c04
d2820005
041a0a05
d2820007
04160e08
d2820005
04220f07
7e105b05
100a1109
100c110a
c0880708
bf8c007f
e00c2000
80040900
c2080125
c2088127
bf8c0070
7e1a0211
d282000d
0434210a
c2080124
c2088126
bf8c007f
7e1c0211
d2820009
04382109
f800023f
05060d09
c0880710
bf8c000f
e00c2000
80040c00
bf8c0770
10121a0f
1016180f
10001609
d2820000
0402120b
10181c0f
d2820000
0402180a
101a0d00
1014160c
d282000a
042a120d
d282000a
042a180e
101c0b0a
081a1b0e
100e1107
101c0f0a
10101604
d2820008
04221205
d2820008
04221808
100c0d08
080c1d06
100a0b08
10120f00
080a0b09
f800024f
0d060507
bf8c070f
7e0a0280
f800025f
0508000a
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[11..12]
DCL CONST[3..10]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV TEMP[1].xy, IN[4].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[1].xyz, CONST[5].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: MOV TEMP[3].xy, IN[2].xyyy
  8: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D
  9: LRP TEMP[3].xyz, CONST[6].wwww, TEMP[3].xyzz, IMM[0].wwww
 10: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz
 11: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 12: DP4 TEMP[1].x, TEMP[2], CONST[9]
 13: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[7].yyyy
 14: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 15: MUL TEMP[1], TEMP[2], TEMP[1]
 16: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[3].xyyy
 17: MOV TEMP[3].xy, TEMP[3].xyyy
 18: TEX TEMP[3], TEMP[3], SAMP[2], 2D
 19: DP4 TEMP[2].x, TEMP[2], CONST[8]
 20: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[7].xxxx
 21: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 22: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 23: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 24: RCP TEMP[4].x, TEMP[4].xxxx
 25: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 26: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 27: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 28: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 29: MAX TEMP[2].x, IN[3].wwww, CONST[4].wwww
 30: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 31: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[4].xyzz
 32: MOV OUT[0], TEMP[1]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %42 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %43 = load <32 x i8> addrspace(2)* %42, !tbaa !0
  %44 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %47 = load <32 x i8> addrspace(2)* %46, !tbaa !0
  %48 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %51 = load <32 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %63 = fmul float %13, %40
  %64 = fadd float %63, %41
  %65 = bitcast float %61 to i32
  %66 = bitcast float %62 to i32
  %67 = insertelement <2 x i32> undef, i32 %65, i32 0
  %68 = insertelement <2 x i32> %67, i32 %66, i32 1
  %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %43, <16 x i8> %45, i32 2)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = fmul float %70, 0x3FCB367A00000000
  %75 = fmul float %71, 0x3FE6E2EB20000000
  %76 = fadd float %75, %74
  %77 = fmul float %72, 0x3FB27BB300000000
  %78 = fadd float %76, %77
  %79 = call float @llvm.AMDGPU.lrp(float %28, float %78, float %70)
  %80 = call float @llvm.AMDGPU.lrp(float %28, float %78, float %71)
  %81 = call float @llvm.AMDGPU.lrp(float %28, float %78, float %72)
  %82 = bitcast float %58 to i32
  %83 = bitcast float %59 to i32
  %84 = insertelement <2 x i32> undef, i32 %82, i32 0
  %85 = insertelement <2 x i32> %84, i32 %83, i32 1
  %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %47, <16 x i8> %49, i32 2)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = extractelement <4 x float> %86, i32 2
  %90 = call float @llvm.AMDGPU.lrp(float %29, float %87, float 1.000000e+00)
  %91 = call float @llvm.AMDGPU.lrp(float %29, float %88, float 1.000000e+00)
  %92 = call float @llvm.AMDGPU.lrp(float %29, float %89, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float %90, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float %91, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float %92, float 0.000000e+00, float 1.000000e+00)
  %96 = fmul float %79, %93
  %97 = fmul float %80, %94
  %98 = fmul float %81, %95
  %99 = fmul float %96, %36
  %100 = fmul float %97, %37
  %101 = fadd float %99, %100
  %102 = fmul float %98, %38
  %103 = fadd float %101, %102
  %104 = fmul float %73, %39
  %105 = fadd float %103, %104
  %106 = fadd float %105, %31
  %107 = call float @llvm.AMDIL.clamp.(float %106, float 0.000000e+00, float 1.000000e+00)
  %108 = call float @llvm.AMDGPU.lrp(float %107, float %54, float 1.000000e+00)
  %109 = call float @llvm.AMDGPU.lrp(float %107, float %55, float 1.000000e+00)
  %110 = call float @llvm.AMDGPU.lrp(float %107, float %56, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %107, float %57, float 1.000000e+00)
  %112 = fmul float %96, %108
  %113 = fmul float %97, %109
  %114 = fmul float %98, %110
  %115 = fmul float %73, %111
  %116 = fmul float %12, %22
  %117 = fmul float %64, %23
  %118 = bitcast float %116 to i32
  %119 = bitcast float %117 to i32
  %120 = insertelement <2 x i32> undef, i32 %118, i32 0
  %121 = insertelement <2 x i32> %120, i32 %119, i32 1
  %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %51, <16 x i8> %53, i32 2)
  %123 = extractelement <4 x float> %122, i32 0
  %124 = extractelement <4 x float> %122, i32 1
  %125 = extractelement <4 x float> %122, i32 2
  %126 = extractelement <4 x float> %122, i32 3
  %127 = fmul float %96, %32
  %128 = fmul float %97, %33
  %129 = fadd float %127, %128
  %130 = fmul float %98, %34
  %131 = fadd float %129, %130
  %132 = fmul float %73, %35
  %133 = fadd float %131, %132
  %134 = fadd float %133, %30
  %135 = call float @llvm.AMDIL.clamp.(float %134, float 0.000000e+00, float 1.000000e+00)
  %136 = fmul float %135, %126
  %137 = fmul float %123, 0x3FCB367A00000000
  %138 = fmul float %124, 0x3FE6E2EB20000000
  %139 = fadd float %138, %137
  %140 = fmul float %125, 0x3FB27BB300000000
  %141 = fadd float %139, %140
  %142 = fcmp uge float %141, 0x3F50624DE0000000
  %143 = select i1 %142, float %141, float 0x3F50624DE0000000
  %144 = fdiv float 1.000000e+00, %143
  %145 = fmul float %123, %144
  %146 = fmul float %124, %144
  %147 = fmul float %125, %144
  %148 = fmul float %112, %123
  %149 = fmul float %113, %124
  %150 = fmul float %114, %125
  %151 = fmul float %136, %145
  %152 = fadd float %151, %148
  %153 = fmul float %136, %146
  %154 = fadd float %153, %149
  %155 = fmul float %136, %147
  %156 = fadd float %155, %150
  %157 = fmul float %152, 4.000000e+00
  %158 = fmul float %154, 4.000000e+00
  %159 = fmul float %156, 4.000000e+00
  %160 = fcmp uge float %60, %27
  %161 = select i1 %160, float %60, float %27
  %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
  %163 = call float @llvm.AMDGPU.lrp(float %162, float %157, float %24)
  %164 = call float @llvm.AMDGPU.lrp(float %162, float %158, float %25)
  %165 = call float @llvm.AMDGPU.lrp(float %162, float %159, float %26)
  %166 = call i32 @llvm.SI.packf16(float %163, float %164)
  %167 = bitcast i32 %166 to float
  %168 = call i32 @llvm.SI.packf16(float %165, float %115)
  %169 = bitcast i32 %168 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %167, float %169, float %167, float %169)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140d00
c8150d01
c8100c00
c8110c01
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1202ff
3f371759
d2820008
04221305
7e1602ff
3d93dd98
d282000c
04221706
c0840100
bf8c007f
c2000914
bf8c007f
d208000d
020000f2
1010090d
d2820008
04221800
c83c0500
c83d0501
c8380400
c8390401
c0860304
c0c80508
bf8c007f
f0800700
00640e0e
c200891b
bf8c0070
d2080011
020002f2
d282000a
04461c01
d206080a
0201010a
10101508
10140b0d
d282000a
042a1800
d2820012
04461e01
d2060812
02010112
1014250a
c2038925
bf8c007f
10241407
c2038924
bf8c007f
d2820013
04480f08
101a0d0d
d282000c
04361800
d282000d
04462001
d206080d
0201010d
10241b0c
c2000926
bf8c007f
d282000c
044c0112
c2000927
bf8c007f
d282000c
04300107
c200091d
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8380200
c8390201
d282000e
04361d0c
10261d12
c2000930
c2008931
bf8c007f
7e1c0201
d2820003
04380103
c200090d
bf8c007f
101e0600
c200090c
bf8c007f
101c0400
c0800308
c0c60510
bf8c007f
f0800f00
00030e0e
bf8c0770
10262113
c2000921
bf8c007f
10041400
c2000920
bf8c007f
d2820002
04080108
c2000922
bf8c007f
d2820002
04080112
c2000923
bf8c007f
d2820002
04080107
c200091c
bf8c007f
06040400
d2060802
02010102
10042302
10061cff
3e59b3d0
d2820003
040e130f
d2820003
040e1710
7e1202ff
3a83126f
d00c0000
02021303
d2000003
00020709
7e065503
10120710
d2820009
044e1302
102412f6
c8240b00
c8250b01
c2000913
bf8c007f
d00c0002
02000109
7e160200
d2000009
000a130b
d2060809
02010109
081612f2
c2000912
bf8c007f
10261600
d2820012
044e2509
c84c0300
c84d0301
d2820013
0436270c
10082707
5e080912
c8140100
c8150101
d2820005
04360b0c
100a0b0a
100a1f05
100c070f
d2820005
04160d02
100a0af6
c2000911
bf8c007f
100c1600
d2820005
041a0b09
c8180000
c8190001
d2820000
04360d0c
10000108
10001d00
1002070e
d2820000
04020302
100000f6
c2000910
bf8c007f
10021600
d2820000
04060109
5e000b00
f8001c0f
04000400
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[4], TEMP[3]
 16: MOV OUT[2], IN[2]
 17: MOV OUT[1], TEMP[2]
 18: MOV OUT[3], TEMP[0]
 19: MOV OUT[0], TEMP[1]
 20: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = extractelement <4 x float> %72, i32 2
  %76 = extractelement <4 x float> %72, i32 3
  %77 = fmul float %62, %51
  %78 = fadd float %77, %48
  %79 = fmul float %63, %52
  %80 = fadd float %79, %49
  %81 = fmul float %64, %53
  %82 = fadd float %81, %50
  %83 = fmul float %25, %78
  %84 = fmul float %26, %78
  %85 = fmul float %27, %78
  %86 = fmul float %28, %78
  %87 = fmul float %29, %80
  %88 = fadd float %87, %83
  %89 = fmul float %30, %80
  %90 = fadd float %89, %84
  %91 = fmul float %31, %80
  %92 = fadd float %91, %85
  %93 = fmul float %32, %80
  %94 = fadd float %93, %86
  %95 = fmul float %33, %82
  %96 = fadd float %95, %88
  %97 = fmul float %34, %82
  %98 = fadd float %97, %90
  %99 = fmul float %35, %82
  %100 = fadd float %99, %92
  %101 = fmul float %36, %82
  %102 = fadd float %101, %94
  %103 = fadd float %96, %37
  %104 = fadd float %98, %38
  %105 = fadd float %100, %39
  %106 = fadd float %102, %40
  %107 = fmul float %11, %78
  %108 = fmul float %12, %78
  %109 = fmul float %13, %78
  %110 = fmul float %14, %78
  %111 = fmul float %15, %80
  %112 = fadd float %111, %107
  %113 = fmul float %16, %80
  %114 = fadd float %113, %108
  %115 = fmul float %17, %80
  %116 = fadd float %115, %109
  %117 = fmul float %18, %80
  %118 = fadd float %117, %110
  %119 = fmul float %19, %82
  %120 = fadd float %119, %112
  %121 = fmul float %20, %82
  %122 = fadd float %121, %114
  %123 = fmul float %21, %82
  %124 = fadd float %123, %116
  %125 = fadd float %120, %22
  %126 = fadd float %122, %23
  %127 = fadd float %124, %24
  %128 = fsub float -0.000000e+00, %56
  %129 = fadd float %125, %128
  %130 = fsub float -0.000000e+00, %57
  %131 = fadd float %126, %130
  %132 = fsub float -0.000000e+00, %58
  %133 = fadd float %127, %132
  %134 = fmul float %105, %54
  %135 = fadd float %134, %55
  %136 = fmul float %68, %44
  %137 = fadd float %136, %46
  %138 = fmul float %69, %45
  %139 = fadd float %138, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %75, float %76)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %131, float %133, float %135)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %137, float %139, float %116, float %118)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %104, float %105, float %106)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800022f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800023f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: DP4 TEMP[2].x, TEMP[1], CONST[8]
  5: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].yyyy
  6: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].xxxx
  7: MUL TEMP[2], TEMP[1], TEMP[2]
  8: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[3].xy, TEMP[3].xyyy
 10: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 11: DP4 TEMP[1].x, TEMP[1], CONST[7]
 12: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].xxxx
 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].wwww
 14: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].yzww
 15: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 16: RCP TEMP[4].x, TEMP[4].xxxx
 17: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 18: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 19: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 21: MAX TEMP[1].x, IN[2].wwww, CONST[3].wwww
 22: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 23: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[3].xyzz
 24: MAD TEMP[1].x, TEMP[2].wwww, CONST[4].yyyy, CONST[4].zzzz
 25: SLT TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz
 26: F2I TEMP[1].x, -TEMP[1]
 27: UIF TEMP[1].xxxx :2
 28:   KILL
 29: ENDIF
 30: MOV OUT[0], TEMP[2]
 31: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %42 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %43 = load <32 x i8> addrspace(2)* %42, !tbaa !0
  %44 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %47 = load <32 x i8> addrspace(2)* %46, !tbaa !0
  %48 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %57 = fmul float %13, %40
  %58 = fadd float %57, %41
  %59 = bitcast float %55 to i32
  %60 = bitcast float %56 to i32
  %61 = insertelement <2 x i32> undef, i32 %59, i32 0
  %62 = insertelement <2 x i32> %61, i32 %60, i32 1
  %63 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %62, <32 x i8> %43, <16 x i8> %45, i32 2)
  %64 = extractelement <4 x float> %63, i32 0
  %65 = extractelement <4 x float> %63, i32 1
  %66 = extractelement <4 x float> %63, i32 2
  %67 = extractelement <4 x float> %63, i32 3
  %68 = fmul float %64, %36
  %69 = fmul float %65, %37
  %70 = fadd float %68, %69
  %71 = fmul float %66, %38
  %72 = fadd float %70, %71
  %73 = fmul float %67, %39
  %74 = fadd float %72, %73
  %75 = fadd float %74, %31
  %76 = call float @llvm.AMDIL.clamp.(float %75, float 0.000000e+00, float 1.000000e+00)
  %77 = call float @llvm.AMDGPU.lrp(float %76, float %50, float 1.000000e+00)
  %78 = call float @llvm.AMDGPU.lrp(float %76, float %51, float 1.000000e+00)
  %79 = call float @llvm.AMDGPU.lrp(float %76, float %52, float 1.000000e+00)
  %80 = call float @llvm.AMDGPU.lrp(float %76, float %53, float 1.000000e+00)
  %81 = fmul float %64, %77
  %82 = fmul float %65, %78
  %83 = fmul float %66, %79
  %84 = fmul float %67, %80
  %85 = fmul float %12, %22
  %86 = fmul float %58, %23
  %87 = bitcast float %85 to i32
  %88 = bitcast float %86 to i32
  %89 = insertelement <2 x i32> undef, i32 %87, i32 0
  %90 = insertelement <2 x i32> %89, i32 %88, i32 1
  %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %47, <16 x i8> %49, i32 2)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = extractelement <4 x float> %91, i32 1
  %94 = extractelement <4 x float> %91, i32 2
  %95 = extractelement <4 x float> %91, i32 3
  %96 = fmul float %64, %32
  %97 = fmul float %65, %33
  %98 = fadd float %96, %97
  %99 = fmul float %66, %34
  %100 = fadd float %98, %99
  %101 = fmul float %67, %35
  %102 = fadd float %100, %101
  %103 = fadd float %102, %30
  %104 = call float @llvm.AMDIL.clamp.(float %103, float 0.000000e+00, float 1.000000e+00)
  %105 = fmul float %104, %95
  %106 = fmul float %92, 0x3FCB367A00000000
  %107 = fmul float %93, 0x3FE6E2EB20000000
  %108 = fadd float %107, %106
  %109 = fmul float %94, 0x3FB27BB300000000
  %110 = fadd float %108, %109
  %111 = fcmp uge float %110, 0x3F50624DE0000000
  %112 = select i1 %111, float %110, float 0x3F50624DE0000000
  %113 = fdiv float 1.000000e+00, %112
  %114 = fmul float %92, %113
  %115 = fmul float %93, %113
  %116 = fmul float %94, %113
  %117 = fmul float %81, %92
  %118 = fmul float %82, %93
  %119 = fmul float %83, %94
  %120 = fmul float %105, %114
  %121 = fadd float %120, %117
  %122 = fmul float %105, %115
  %123 = fadd float %122, %118
  %124 = fmul float %105, %116
  %125 = fadd float %124, %119
  %126 = fmul float %121, 4.000000e+00
  %127 = fmul float %123, 4.000000e+00
  %128 = fmul float %125, 4.000000e+00
  %129 = fcmp uge float %54, %27
  %130 = select i1 %129, float %54, float %27
  %131 = call float @llvm.AMDIL.clamp.(float %130, float 0.000000e+00, float 1.000000e+00)
  %132 = call float @llvm.AMDGPU.lrp(float %131, float %126, float %24)
  %133 = call float @llvm.AMDGPU.lrp(float %131, float %127, float %25)
  %134 = call float @llvm.AMDGPU.lrp(float %131, float %128, float %26)
  %135 = fmul float %84, %28
  %136 = fadd float %135, %29
  %137 = fcmp ult float %136, 0.000000e+00
  %138 = select i1 %137, float 1.000000e+00, float 0.000000e+00
  %139 = fsub float -0.000000e+00, %138
  %140 = fptosi float %139 to i32
  %141 = bitcast i32 %140 to float
  %142 = bitcast float %141 to i32
  %143 = icmp ne i32 %142, 0
  br i1 %143, label %IF, label %ENDIF

IF:                                               ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF

ENDIF:                                            ; preds = %main_body, %IF
  %144 = call i32 @llvm.SI.packf16(float %132, float %133)
  %145 = bitcast i32 %144 to float
  %146 = call i32 @llvm.SI.packf16(float %134, float %84)
  %147 = bitcast i32 %146 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %145, float %147, float %145, float %147)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
c0840100
bf8c0070
c2000921
bf8c007f
10100a00
c2000920
bf8c007f
d2820008
04200104
c2000922
bf8c007f
d2820008
04200106
c2000923
bf8c007f
d2820008
04200107
c2000919
bf8c007f
06101000
d2060808
02010108
081210f2
c8280200
c8290201
d282000a
04261508
101c1506
c200092c
c200892d
bf8c007f
7e140201
d2820003
04280103
c2000909
bf8c007f
10160600
c2000908
bf8c007f
10140400
c0800304
c0c60508
bf8c007f
f0800f00
00030a0a
bf8c0770
1006190e
100414ff
3e59b3d0
7e1c02ff
3f371759
d2820002
040a1d0b
7e1c02ff
3d93dd98
d2820002
040a1d0c
7e1c02ff
3a83126f
d00c0000
02021d02
d2000002
0002050e
7e045502
101e050c
c200091d
bf8c007f
101c0a00
c200091c
bf8c007f
d282000e
04380104
c200091e
bf8c007f
d282000e
04380106
c200091f
bf8c007f
d282000e
04380107
c2000918
bf8c007f
061c1c00
d206080e
0201010e
101c1b0e
d2820003
040e1f0e
100606f6
c83c0700
c83d0701
c200090f
bf8c007f
d00c0002
0200010f
7e200200
d200000f
000a1f10
d2060810
0201010f
082220f2
c200090e
bf8c007f
101e2200
d2820003
043e0710
c83c0100
c83d0101
d282000f
04261f08
101e1f05
101e170f
1024050b
d282000f
043e250e
101e1ef6
c200090d
bf8c007f
10242200
d282000f
044a1f10
c8480000
c8490001
d2820012
04262508
10242504
10241512
1004050a
d2820002
044a050e
100404f6
c200090c
bf8c007f
10142200
d2820002
042a0510
c8280300
c8290301
d2820000
04261508
10000107
c2000911
c2008912
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000103
5e021f02
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[3], TEMP[3]
 16: MOV OUT[1], TEMP[2]
 17: MOV OUT[2], TEMP[0]
 18: MOV OUT[0], TEMP[1]
 19: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = fmul float %62, %51
  %71 = fadd float %70, %48
  %72 = fmul float %63, %52
  %73 = fadd float %72, %49
  %74 = fmul float %64, %53
  %75 = fadd float %74, %50
  %76 = fmul float %25, %71
  %77 = fmul float %26, %71
  %78 = fmul float %27, %71
  %79 = fmul float %28, %71
  %80 = fmul float %29, %73
  %81 = fadd float %80, %76
  %82 = fmul float %30, %73
  %83 = fadd float %82, %77
  %84 = fmul float %31, %73
  %85 = fadd float %84, %78
  %86 = fmul float %32, %73
  %87 = fadd float %86, %79
  %88 = fmul float %33, %75
  %89 = fadd float %88, %81
  %90 = fmul float %34, %75
  %91 = fadd float %90, %83
  %92 = fmul float %35, %75
  %93 = fadd float %92, %85
  %94 = fmul float %36, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %37
  %97 = fadd float %91, %38
  %98 = fadd float %93, %39
  %99 = fadd float %95, %40
  %100 = fmul float %11, %71
  %101 = fmul float %12, %71
  %102 = fmul float %13, %71
  %103 = fmul float %14, %71
  %104 = fmul float %15, %73
  %105 = fadd float %104, %100
  %106 = fmul float %16, %73
  %107 = fadd float %106, %101
  %108 = fmul float %17, %73
  %109 = fadd float %108, %102
  %110 = fmul float %18, %73
  %111 = fadd float %110, %103
  %112 = fmul float %19, %75
  %113 = fadd float %112, %105
  %114 = fmul float %20, %75
  %115 = fadd float %114, %107
  %116 = fmul float %21, %75
  %117 = fadd float %116, %109
  %118 = fadd float %113, %22
  %119 = fadd float %115, %23
  %120 = fadd float %117, %24
  %121 = fsub float -0.000000e+00, %56
  %122 = fadd float %118, %121
  %123 = fsub float -0.000000e+00, %57
  %124 = fadd float %119, %123
  %125 = fsub float -0.000000e+00, %58
  %126 = fadd float %120, %125
  %127 = fmul float %98, %54
  %128 = fadd float %127, %55
  %129 = fmul float %68, %44
  %130 = fadd float %129, %46
  %131 = fmul float %69, %45
  %132 = fadd float %131, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %130, float %132, float %109, float %111)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[13..14]
DCL CONST[5..12]
DCL TEMP[0..1]
DCL TEMP[2..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[4].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: DP3 TEMP[6].x, TEMP[4].xyzz, IMM[1].xyzz
 14: LRP TEMP[5].xyz, CONST[7].xxxx, TEMP[6].xxxx, TEMP[4].xyzz
 15: MOV TEMP[4].xy, IN[4].xyyy
 16: TEX TEMP[4], TEMP[4], SAMP[2], 2D
 17: MOV TEMP[6].xy, IN[4].xyyy
 18: TEX TEMP[6], TEMP[6], SAMP[1], 2D
 19: MAD TEMP[6].yw, IMM[0].zzzz, TEMP[6], IMM[0].xxxx
 20: DP3 TEMP[7].x, TEMP[2].xyzz, TEMP[2].xyzz
 21: RSQ TEMP[7].x, TEMP[7].xxxx
 22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 23: DP2 TEMP[7].x, TEMP[6].ywww, TEMP[6].ywww
 24: ADD TEMP[7].x, IMM[0].yyyy, -TEMP[7].xxxx
 25: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
 26: RSQ TEMP[8].x, TEMP[7].xxxx
 27: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[7].xxxx
 28: CMP TEMP[8].x, -TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].wwww
 29: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
 30: DP3 TEMP[7].x, IN[5].yzww, IN[5].yzww
 31: RSQ TEMP[7].x, TEMP[7].xxxx
 32: MUL TEMP[7].xyz, IN[5].yzww, TEMP[7].xxxx
 33: DP3 TEMP[8].x, IN[6].xyzz, IN[6].xyzz
 34: RSQ TEMP[8].x, TEMP[8].xxxx
 35: MUL TEMP[8].xyz, IN[6].xyzz, TEMP[8].xxxx
 36: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[6].wwww
 37: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[6].yyyy, TEMP[8].xyzz
 38: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
 39: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 40: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 41: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 42: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 43: MOV TEMP[2].xyz, TEMP[2].xyzz
 44: TEX TEMP[2], TEMP[2], SAMP[3], CUBE
 45: DP4 TEMP[3].x, TEMP[4], CONST[12]
 46: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[9].zzzz
 47: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 48: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[5].xyzz
 49: DP4 TEMP[2].x, TEMP[4], CONST[11]
 50: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[9].yyyy
 51: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 52: MUL TEMP[2], TEMP[5], TEMP[2]
 53: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 54: MOV TEMP[3].xy, TEMP[3].xyyy
 55: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 56: DP4 TEMP[4].x, TEMP[4], CONST[10]
 57: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[9].xxxx
 58: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 59: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 60: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww
 61: RCP TEMP[5].x, TEMP[5].xxxx
 62: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 63: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 64: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 65: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[2].xxxx
 66: MAX TEMP[3].x, IN[3].wwww, CONST[6].wwww
 67: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 68: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[6].xyzz
 69: MOV OUT[0], TEMP[2]
 70: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %46 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %47 = load <32 x i8> addrspace(2)* %46, !tbaa !0
  %48 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %51 = load <32 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %55 = load <32 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %59 = load <32 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %63 = load <32 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = fcmp ugt float %16, 0.000000e+00
  %67 = select i1 %66, float 1.000000e+00, float 0.000000e+00
  %68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %87 = fmul float %13, %44
  %88 = fadd float %87, %45
  %89 = call float @llvm.AMDIL.clamp.(float %67, float 0.000000e+00, float 1.000000e+00)
  %90 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %91 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %92 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %93 = bitcast float %89 to i32
  %94 = icmp ne i32 %93, 0
  %. = select i1 %94, float -1.000000e+00, float 1.000000e+00
  %95 = bitcast float %76 to i32
  %96 = bitcast float %77 to i32
  %97 = insertelement <2 x i32> undef, i32 %95, i32 0
  %98 = insertelement <2 x i32> %97, i32 %96, i32 1
  %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %98, <32 x i8> %47, <16 x i8> %49, i32 2)
  %100 = extractelement <4 x float> %99, i32 0
  %101 = extractelement <4 x float> %99, i32 1
  %102 = extractelement <4 x float> %99, i32 2
  %103 = extractelement <4 x float> %99, i32 3
  %104 = fmul float %100, 0x3FCB367A00000000
  %105 = fmul float %101, 0x3FE6E2EB20000000
  %106 = fadd float %105, %104
  %107 = fmul float %102, 0x3FB27BB300000000
  %108 = fadd float %106, %107
  %109 = call float @llvm.AMDGPU.lrp(float %28, float %108, float %100)
  %110 = call float @llvm.AMDGPU.lrp(float %28, float %108, float %101)
  %111 = call float @llvm.AMDGPU.lrp(float %28, float %108, float %102)
  %112 = bitcast float %76 to i32
  %113 = bitcast float %77 to i32
  %114 = insertelement <2 x i32> undef, i32 %112, i32 0
  %115 = insertelement <2 x i32> %114, i32 %113, i32 1
  %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %115, <32 x i8> %55, <16 x i8> %57, i32 2)
  %117 = extractelement <4 x float> %116, i32 0
  %118 = extractelement <4 x float> %116, i32 1
  %119 = extractelement <4 x float> %116, i32 2
  %120 = extractelement <4 x float> %116, i32 3
  %121 = bitcast float %76 to i32
  %122 = bitcast float %77 to i32
  %123 = insertelement <2 x i32> undef, i32 %121, i32 0
  %124 = insertelement <2 x i32> %123, i32 %122, i32 1
  %125 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %124, <32 x i8> %51, <16 x i8> %53, i32 2)
  %126 = extractelement <4 x float> %125, i32 1
  %127 = extractelement <4 x float> %125, i32 3
  %128 = fmul float 2.000000e+00, %126
  %129 = fadd float %128, -1.000000e+00
  %130 = fmul float 2.000000e+00, %127
  %131 = fadd float %130, -1.000000e+00
  %132 = fmul float %78, %78
  %133 = fmul float %79, %79
  %134 = fadd float %133, %132
  %135 = fmul float %80, %80
  %136 = fadd float %134, %135
  %137 = call float @llvm.AMDGPU.rsq(float %136)
  %138 = fmul float %78, %137
  %139 = fmul float %79, %137
  %140 = fmul float %80, %137
  %141 = fmul float %129, %129
  %142 = fmul float %131, %131
  %143 = fadd float %141, %142
  %144 = fsub float -0.000000e+00, %143
  %145 = fadd float 1.000000e+00, %144
  %146 = fcmp uge float 0.000000e+00, %145
  %147 = select i1 %146, float 0.000000e+00, float %145
  %148 = call float @llvm.AMDGPU.rsq(float %147)
  %149 = fmul float %148, %147
  %150 = fsub float -0.000000e+00, %147
  %151 = call float @llvm.AMDGPU.cndlt(float %150, float %149, float 0.000000e+00)
  %152 = fmul float %138, %151
  %153 = fmul float %139, %151
  %154 = fmul float %140, %151
  %155 = fmul float %81, %81
  %156 = fmul float %82, %82
  %157 = fadd float %156, %155
  %158 = fmul float %83, %83
  %159 = fadd float %157, %158
  %160 = call float @llvm.AMDGPU.rsq(float %159)
  %161 = fmul float %81, %160
  %162 = fmul float %82, %160
  %163 = fmul float %83, %160
  %164 = fmul float %84, %84
  %165 = fmul float %85, %85
  %166 = fadd float %165, %164
  %167 = fmul float %86, %86
  %168 = fadd float %166, %167
  %169 = call float @llvm.AMDGPU.rsq(float %168)
  %170 = fmul float %84, %169
  %171 = fmul float %85, %169
  %172 = fmul float %86, %169
  %173 = fmul float %170, %131
  %174 = fmul float %171, %131
  %175 = fmul float %172, %131
  %176 = fmul float %161, %129
  %177 = fadd float %176, %173
  %178 = fmul float %162, %129
  %179 = fadd float %178, %174
  %180 = fmul float %163, %129
  %181 = fadd float %180, %175
  %182 = fmul float %152, %.
  %183 = fadd float %182, %177
  %184 = fmul float %153, %.
  %185 = fadd float %184, %179
  %186 = fmul float %154, %.
  %187 = fadd float %186, %181
  %188 = fmul float %183, %72
  %189 = fmul float %185, %73
  %190 = fadd float %189, %188
  %191 = fmul float %187, %74
  %192 = fadd float %190, %191
  %193 = fmul float %192, %183
  %194 = fmul float %192, %185
  %195 = fmul float %192, %187
  %196 = fmul float 2.000000e+00, %193
  %197 = fmul float 2.000000e+00, %194
  %198 = fmul float 2.000000e+00, %195
  %199 = fsub float -0.000000e+00, %196
  %200 = fadd float %72, %199
  %201 = fsub float -0.000000e+00, %197
  %202 = fadd float %73, %201
  %203 = fsub float -0.000000e+00, %198
  %204 = fadd float %74, %203
  %205 = insertelement <4 x float> undef, float %200, i32 0
  %206 = insertelement <4 x float> %205, float %202, i32 1
  %207 = insertelement <4 x float> %206, float %204, i32 2
  %208 = insertelement <4 x float> %207, float 0.000000e+00, i32 3
  %209 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %208)
  %210 = extractelement <4 x float> %209, i32 0
  %211 = extractelement <4 x float> %209, i32 1
  %212 = extractelement <4 x float> %209, i32 2
  %213 = extractelement <4 x float> %209, i32 3
  %214 = call float @fabs(float %212)
  %215 = fdiv float 1.000000e+00, %214
  %216 = fmul float %210, %215
  %217 = fadd float %216, 1.500000e+00
  %218 = fmul float %211, %215
  %219 = fadd float %218, 1.500000e+00
  %220 = bitcast float %219 to i32
  %221 = bitcast float %217 to i32
  %222 = bitcast float %213 to i32
  %223 = insertelement <4 x i32> undef, i32 %220, i32 0
  %224 = insertelement <4 x i32> %223, i32 %221, i32 1
  %225 = insertelement <4 x i32> %224, i32 %222, i32 2
  %226 = insertelement <4 x i32> %225, i32 undef, i32 3
  %227 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %226, <32 x i8> %59, <16 x i8> %61, i32 4)
  %228 = extractelement <4 x float> %227, i32 0
  %229 = extractelement <4 x float> %227, i32 1
  %230 = extractelement <4 x float> %227, i32 2
  %231 = extractelement <4 x float> %227, i32 3
  %232 = fmul float %117, %40
  %233 = fmul float %118, %41
  %234 = fadd float %232, %233
  %235 = fmul float %119, %42
  %236 = fadd float %234, %235
  %237 = fmul float %120, %43
  %238 = fadd float %236, %237
  %239 = fadd float %238, %31
  %240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00)
  %241 = fmul float %231, %240
  %242 = call float @llvm.AMDGPU.lrp(float %241, float %228, float %109)
  %243 = call float @llvm.AMDGPU.lrp(float %241, float %229, float %110)
  %244 = call float @llvm.AMDGPU.lrp(float %241, float %230, float %111)
  %245 = fmul float %117, %36
  %246 = fmul float %118, %37
  %247 = fadd float %245, %246
  %248 = fmul float %119, %38
  %249 = fadd float %247, %248
  %250 = fmul float %120, %39
  %251 = fadd float %249, %250
  %252 = fadd float %251, %30
  %253 = call float @llvm.AMDIL.clamp.(float %252, float 0.000000e+00, float 1.000000e+00)
  %254 = call float @llvm.AMDGPU.lrp(float %253, float %68, float 1.000000e+00)
  %255 = call float @llvm.AMDGPU.lrp(float %253, float %69, float 1.000000e+00)
  %256 = call float @llvm.AMDGPU.lrp(float %253, float %70, float 1.000000e+00)
  %257 = call float @llvm.AMDGPU.lrp(float %253, float %71, float 1.000000e+00)
  %258 = fmul float %242, %254
  %259 = fmul float %243, %255
  %260 = fmul float %244, %256
  %261 = fmul float %103, %257
  %262 = fmul float %12, %22
  %263 = fmul float %88, %23
  %264 = bitcast float %262 to i32
  %265 = bitcast float %263 to i32
  %266 = insertelement <2 x i32> undef, i32 %264, i32 0
  %267 = insertelement <2 x i32> %266, i32 %265, i32 1
  %268 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %267, <32 x i8> %63, <16 x i8> %65, i32 2)
  %269 = extractelement <4 x float> %268, i32 0
  %270 = extractelement <4 x float> %268, i32 1
  %271 = extractelement <4 x float> %268, i32 2
  %272 = extractelement <4 x float> %268, i32 3
  %273 = fmul float %117, %32
  %274 = fmul float %118, %33
  %275 = fadd float %273, %274
  %276 = fmul float %119, %34
  %277 = fadd float %275, %276
  %278 = fmul float %120, %35
  %279 = fadd float %277, %278
  %280 = fadd float %279, %29
  %281 = call float @llvm.AMDIL.clamp.(float %280, float 0.000000e+00, float 1.000000e+00)
  %282 = fmul float %281, %272
  %283 = fmul float %269, 0x3FCB367A00000000
  %284 = fmul float %270, 0x3FE6E2EB20000000
  %285 = fadd float %284, %283
  %286 = fmul float %271, 0x3FB27BB300000000
  %287 = fadd float %285, %286
  %288 = fcmp uge float %287, 0x3F50624DE0000000
  %289 = select i1 %288, float %287, float 0x3F50624DE0000000
  %290 = fdiv float 1.000000e+00, %289
  %291 = fmul float %269, %290
  %292 = fmul float %270, %290
  %293 = fmul float %271, %290
  %294 = fmul float %258, %269
  %295 = fmul float %259, %270
  %296 = fmul float %260, %271
  %297 = fmul float %282, %291
  %298 = fadd float %297, %294
  %299 = fmul float %282, %292
  %300 = fadd float %299, %295
  %301 = fmul float %282, %293
  %302 = fadd float %301, %296
  %303 = fmul float %298, 4.000000e+00
  %304 = fmul float %300, 4.000000e+00
  %305 = fmul float %302, 4.000000e+00
  %306 = fcmp uge float %75, %27
  %307 = select i1 %306, float %75, float %27
  %308 = call float @llvm.AMDIL.clamp.(float %307, float 0.000000e+00, float 1.000000e+00)
  %309 = call float @llvm.AMDGPU.lrp(float %308, float %303, float %24)
  %310 = call float @llvm.AMDGPU.lrp(float %308, float %304, float %25)
  %311 = call float @llvm.AMDGPU.lrp(float %308, float %305, float %26)
  %312 = call i32 @llvm.SI.packf16(float %309, float %310)
  %313 = bitcast i32 %312 to float
  %314 = call i32 @llvm.SI.packf16(float %311, float %261)
  %315 = bitcast i32 %314 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %313, float %315, float %313, float %315)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c8140800
c8150801
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340b00
c8350b01
c8300a00
c8310a01
1014190c
d282000b
042a1b0d
c8280c00
c8290c01
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441100
c8451101
c8401000
c8411001
101a2110
d282000e
04362311
c8341200
c8351201
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8540e00
c8550e01
c8500d00
c8510d01
10222914
d2820012
04462b15
c8440f00
c8450f01
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0400
c83d0401
10281f0c
c8400500
c8410501
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200600
c8210601
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c084030c
c0c60518
bf8c007f
f0800f00
00430708
c0840308
c0c60510
bf8c0070
f0800f00
00431205
c0840100
bf8c0070
c2000931
bf8c007f
10082600
c2000930
bf8c007f
d2820004
04100112
c2000932
bf8c007f
d2820004
04100114
c2000933
bf8c007f
d2820004
04100115
c2000926
bf8c007f
06080800
d2060804
02010104
1008090a
081e08f2
c0860300
c0c80500
bf8c007f
f0800f00
00640b05
bf8c0770
100a16ff
3e59b3d0
7e3402ff
3f371759
d2820005
0416350c
7e3602ff
3d93dd98
d2820010
0416370d
c200091c
bf8c007f
d2080011
020000f2
100a1911
d2820005
04162000
100a0b0f
d2820016
04161104
c200892d
bf8c007f
100a2601
c200892c
bf8c007f
d2820005
04140312
c200892e
bf8c007f
d2820005
04140314
c200892f
bf8c007f
d2820005
04140315
c2008925
bf8c007f
060a0a01
d2060805
02010105
080c0af2
c85c0100
c85d0101
d2820017
041a2f05
10382f16
c2008938
c2038939
bf8c007f
7e2c0207
d2820003
04580303
c2008915
bf8c007f
102e0601
c2008914
bf8c007f
102c0401
c0860310
c0c80520
bf8c007f
f0800f00
00641616
bf8c0770
10382f1c
10042cff
3e59b3d0
d2820002
040a3517
d2820002
040a3718
7e0602ff
3a83126f
d00c0002
02020702
d2000002
000a0503
7e045502
10340517
c2008929
bf8c007f
10062601
c2008928
bf8c007f
d2820003
040c0312
c200892a
bf8c007f
d2820003
040c0314
c200892b
bf8c007f
d2820003
040c0315
c2008924
bf8c007f
06060601
d2060803
02010103
10063303
d2820012
04723503
102824f6
c8480700
c8490701
c200891b
bf8c007f
d00c0002
02000312
7e260201
d2000012
000a2513
d2060812
02010112
082624f2
c2008919
bf8c007f
102a2601
d2820014
04562912
102a1711
d2820015
04562000
102a2b0f
d2820015
04560f04
c8680000
c8690001
d282001a
041a3505
102a3515
102a2d15
10340516
d2820015
04563503
102a2af6
c2008918
bf8c007f
10342601
d2820015
046a2b12
5e282915
10221b11
d2820010
04462000
101e210f
d2820004
043e1304
c81c0200
c81d0201
d2820007
041a0f05
10080f04
10083104
10040518
d2820002
04120503
100404f6
c200091a
bf8c007f
10062600
d2820002
040e0512
c80c0300
c80d0301
d2820000
041a0705
1000010e
5e000102
f8001c0f
00140014
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL CONST[0..14]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3].xyz, IN[1].xyzz, CONST[10].wwww
  8: MUL TEMP[4], CONST[0], TEMP[3].xxxx
  9: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 10: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 11: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 14: MUL TEMP[4].xyz, IN[3].xyzz, CONST[10].wwww
 15: MUL TEMP[5], CONST[0], TEMP[4].xxxx
 16: MAD TEMP[5], CONST[1], TEMP[4].yyyy, TEMP[5]
 17: MAD TEMP[4].xyz, CONST[2], TEMP[4].zzzz, TEMP[5]
 18: MUL TEMP[5], CONST[0], TEMP[0].xxxx
 19: MAD TEMP[5], CONST[1], TEMP[0].yyyy, TEMP[5]
 20: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[5]
 21: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 22: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[14].xyzz
 23: MAD TEMP[5].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 24: MOV TEMP[0].w, TEMP[5].xxxx
 25: MAD TEMP[5].xy, IN[2].xyyy, CONST[9].xyyy, CONST[9].zwww
 26: MOV TEMP[5].zw, TEMP[3].yyxy
 27: MOV TEMP[6].x, TEMP[3].zzzz
 28: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 29: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 30: MOV TEMP[6].yzw, TEMP[3].yxyz
 31: MOV TEMP[3].xyz, TEMP[4].xyzx
 32: MOV OUT[1], TEMP[2]
 33: MOV OUT[3], TEMP[5]
 34: MOV OUT[5], TEMP[3]
 35: MOV OUT[4], TEMP[6]
 36: MOV OUT[2], TEMP[0]
 37: MOV OUT[0], TEMP[1]
 38: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = fmul float %61, %50
  %82 = fadd float %81, %47
  %83 = fmul float %62, %51
  %84 = fadd float %83, %48
  %85 = fmul float %63, %52
  %86 = fadd float %85, %49
  %87 = fmul float %23, %82
  %88 = fmul float %24, %82
  %89 = fmul float %25, %82
  %90 = fmul float %26, %82
  %91 = fmul float %27, %84
  %92 = fadd float %91, %87
  %93 = fmul float %28, %84
  %94 = fadd float %93, %88
  %95 = fmul float %29, %84
  %96 = fadd float %95, %89
  %97 = fmul float %30, %84
  %98 = fadd float %97, %90
  %99 = fmul float %31, %86
  %100 = fadd float %99, %92
  %101 = fmul float %32, %86
  %102 = fadd float %101, %94
  %103 = fmul float %33, %86
  %104 = fadd float %103, %96
  %105 = fmul float %34, %86
  %106 = fadd float %105, %98
  %107 = fadd float %100, %35
  %108 = fadd float %102, %36
  %109 = fadd float %104, %37
  %110 = fadd float %106, %38
  %111 = fmul float %67, %46
  %112 = fmul float %68, %46
  %113 = fmul float %69, %46
  %114 = fmul float %11, %111
  %115 = fmul float %12, %111
  %116 = fmul float %13, %111
  %117 = fmul float %14, %112
  %118 = fadd float %117, %114
  %119 = fmul float %15, %112
  %120 = fadd float %119, %115
  %121 = fmul float %16, %112
  %122 = fadd float %121, %116
  %123 = fmul float %17, %113
  %124 = fadd float %123, %118
  %125 = fmul float %18, %113
  %126 = fadd float %125, %120
  %127 = fmul float %19, %113
  %128 = fadd float %127, %122
  %129 = fmul float %124, %124
  %130 = fmul float %126, %126
  %131 = fadd float %130, %129
  %132 = fmul float %128, %128
  %133 = fadd float %131, %132
  %134 = call float @llvm.AMDGPU.rsq(float %133)
  %135 = fmul float %124, %134
  %136 = fmul float %126, %134
  %137 = fmul float %128, %134
  %138 = fmul float %78, %46
  %139 = fmul float %79, %46
  %140 = fmul float %80, %46
  %141 = fmul float %11, %138
  %142 = fmul float %12, %138
  %143 = fmul float %13, %138
  %144 = fmul float %14, %139
  %145 = fadd float %144, %141
  %146 = fmul float %15, %139
  %147 = fadd float %146, %142
  %148 = fmul float %16, %139
  %149 = fadd float %148, %143
  %150 = fmul float %17, %140
  %151 = fadd float %150, %145
  %152 = fmul float %18, %140
  %153 = fadd float %152, %147
  %154 = fmul float %19, %140
  %155 = fadd float %154, %149
  %156 = fmul float %11, %82
  %157 = fmul float %12, %82
  %158 = fmul float %13, %82
  %159 = fmul float %14, %84
  %160 = fadd float %159, %156
  %161 = fmul float %15, %84
  %162 = fadd float %161, %157
  %163 = fmul float %16, %84
  %164 = fadd float %163, %158
  %165 = fmul float %17, %86
  %166 = fadd float %165, %160
  %167 = fmul float %18, %86
  %168 = fadd float %167, %162
  %169 = fmul float %19, %86
  %170 = fadd float %169, %164
  %171 = fadd float %166, %20
  %172 = fadd float %168, %21
  %173 = fadd float %170, %22
  %174 = fsub float -0.000000e+00, %55
  %175 = fadd float %171, %174
  %176 = fsub float -0.000000e+00, %56
  %177 = fadd float %172, %176
  %178 = fsub float -0.000000e+00, %57
  %179 = fadd float %173, %178
  %180 = fmul float %109, %53
  %181 = fadd float %180, %54
  %182 = fmul float %73, %42
  %183 = fadd float %182, %44
  %184 = fmul float %74, %43
  %185 = fadd float %184, %45
  %186 = fmul float %155, %136
  %187 = fmul float %151, %137
  %188 = fmul float %153, %135
  %189 = fsub float -0.000000e+00, %186
  %190 = fmul float %153, %137
  %191 = fadd float %190, %189
  %192 = fsub float -0.000000e+00, %187
  %193 = fmul float %155, %135
  %194 = fadd float %193, %192
  %195 = fsub float -0.000000e+00, %188
  %196 = fmul float %151, %136
  %197 = fadd float %196, %195
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %175, float %177, float %179, float %181)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %183, float %185, float %135, float %136)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %137, float %191, float %194, float %197)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %151, float %153, float %155, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %107, float %108, float %109, float %110)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c2020131
c202812d
bf8c0070
7e020205
d2820001
04040904
c2020130
c202812c
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c2020132
c202812e
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2028106
bf8c007f
d2820006
041a0205
c204010a
bf8c007f
d2820006
041a0608
c204810e
bf8c007f
060c0c09
c204813a
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060139
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078138
bf8c007f
0a10100f
f800021f
05060708
c0880704
bf8c000f
e00c2000
80040700
c207812b
bf8c0070
100a100f
100c0e0f
10160c09
d282000b
042e0a0b
100e120f
d2820009
042e0e0a
10100c0c
d2820008
04220a0d
d282000a
04220e0e
1010150a
d2820008
04221309
100c0c04
d2820005
041a0a05
d2820007
04160e08
d2820005
04220f07
7e105b05
100a1109
100c110a
c0880708
bf8c007f
e00c2000
80040900
c2080125
c2088127
bf8c0070
7e1a0211
d282000d
0434210a
c2080124
c2088126
bf8c007f
7e1c0211
d2820009
04382109
f800022f
05060d09
c088070c
bf8c000f
e00c2000
80040c00
bf8c0770
10121a0f
1016180f
10001609
d2820000
0402120b
10181c0f
d2820000
0402180a
101a0d00
1014160c
d282000a
042a120d
d282000a
042a180e
101c0b0a
081a1b0e
100e1107
101c0f0a
10101604
d2820008
04221205
d2820008
04221808
100c0d08
080c1d06
100a0b08
10120f00
080a0b09
f800023f
0d060507
bf8c070f
7e0a0280
f800024f
0508000a
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MOV OUT[0], TEMP[1]
 28: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %42 = load <32 x i8> addrspace(2)* %41, !tbaa !0
  %43 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %56 = fmul float %13, %39
  %57 = fadd float %56, %40
  %58 = bitcast float %54 to i32
  %59 = bitcast float %55 to i32
  %60 = insertelement <2 x i32> undef, i32 %58, i32 0
  %61 = insertelement <2 x i32> %60, i32 %59, i32 1
  %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %42, <16 x i8> %44, i32 2)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = fmul float %63, 0x3FCB367A00000000
  %68 = fmul float %64, 0x3FE6E2EB20000000
  %69 = fadd float %68, %67
  %70 = fmul float %65, 0x3FB27BB300000000
  %71 = fadd float %69, %70
  %72 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %63)
  %73 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %64)
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %65)
  %75 = fmul float %72, %35
  %76 = fmul float %73, %36
  %77 = fadd float %75, %76
  %78 = fmul float %74, %37
  %79 = fadd float %77, %78
  %80 = fmul float %66, %38
  %81 = fadd float %79, %80
  %82 = fadd float %81, %30
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDGPU.lrp(float %83, float %49, float 1.000000e+00)
  %85 = call float @llvm.AMDGPU.lrp(float %83, float %50, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %83, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %83, float %52, float 1.000000e+00)
  %88 = fmul float %72, %84
  %89 = fmul float %73, %85
  %90 = fmul float %74, %86
  %91 = fmul float %66, %87
  %92 = fmul float %12, %22
  %93 = fmul float %57, %23
  %94 = bitcast float %92 to i32
  %95 = bitcast float %93 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %46, <16 x i8> %48, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = extractelement <4 x float> %98, i32 3
  %103 = fmul float %72, %31
  %104 = fmul float %73, %32
  %105 = fadd float %103, %104
  %106 = fmul float %74, %33
  %107 = fadd float %105, %106
  %108 = fmul float %66, %34
  %109 = fadd float %107, %108
  %110 = fadd float %109, %29
  %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00)
  %112 = fmul float %111, %102
  %113 = fmul float %99, 0x3FCB367A00000000
  %114 = fmul float %100, 0x3FE6E2EB20000000
  %115 = fadd float %114, %113
  %116 = fmul float %101, 0x3FB27BB300000000
  %117 = fadd float %115, %116
  %118 = fcmp uge float %117, 0x3F50624DE0000000
  %119 = select i1 %118, float %117, float 0x3F50624DE0000000
  %120 = fdiv float 1.000000e+00, %119
  %121 = fmul float %99, %120
  %122 = fmul float %100, %120
  %123 = fmul float %101, %120
  %124 = fmul float %88, %99
  %125 = fmul float %89, %100
  %126 = fmul float %90, %101
  %127 = fmul float %112, %121
  %128 = fadd float %127, %124
  %129 = fmul float %112, %122
  %130 = fadd float %129, %125
  %131 = fmul float %112, %123
  %132 = fadd float %131, %126
  %133 = fmul float %128, 4.000000e+00
  %134 = fmul float %130, 4.000000e+00
  %135 = fmul float %132, 4.000000e+00
  %136 = fcmp uge float %53, %27
  %137 = select i1 %136, float %53, float %27
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.lrp(float %138, float %133, float %24)
  %140 = call float @llvm.AMDGPU.lrp(float %138, float %134, float %25)
  %141 = call float @llvm.AMDGPU.lrp(float %138, float %135, float %26)
  %142 = call i32 @llvm.SI.packf16(float %139, float %140)
  %143 = bitcast i32 %142 to float
  %144 = call i32 @llvm.SI.packf16(float %141, float %91)
  %145 = bitcast i32 %144 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %143, float %145, float %143, float %145)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430504
bf8c0770
10080aff
3e59b3d0
7e2002ff
3f371759
d2820004
04122106
7e2202ff
3d93dd98
d282000a
04122307
c0840100
bf8c007f
c2000910
bf8c007f
d208000b
020000f2
10080b0b
d2820004
04121400
10120d0b
d2820009
04261400
c2008921
bf8c007f
10181201
c2008920
bf8c007f
d282000c
04300304
10160f0b
d2820012
042e1400
c2000922
bf8c007f
d282000a
04300112
c2000923
bf8c007f
d282000a
04280108
c2000919
bf8c007f
06141400
d206080a
0201010a
081614f2
c8300200
c8310201
d282000c
042e190a
10261912
c200092c
c200892d
bf8c007f
7e180201
d2820003
04300103
c2000909
bf8c007f
101a0600
c2000908
bf8c007f
10180400
c0800304
c0c60508
bf8c007f
f0800f00
00030c0c
bf8c0770
10261d13
c200091d
bf8c007f
10041200
c200091c
bf8c007f
d2820002
04080104
c200091e
bf8c007f
d2820002
04080112
c200091f
bf8c007f
d2820002
04080108
c2000918
bf8c007f
06040400
d2060802
02010102
10041f02
100618ff
3e59b3d0
d2820003
040e210d
d2820003
040e230e
7e2002ff
3a83126f
d00c0000
02022103
d2000003
00020710
7e065503
1020070e
d2820010
044e2102
102420f6
c8400700
c8410701
c200090f
bf8c007f
d00c0002
02000110
7e220200
d2000010
000a2111
d2060810
02010110
082220f2
c200090e
bf8c007f
10262200
d2820012
044e2510
c84c0300
c84d0301
d2820013
042e270a
100a2708
5e0a0b12
c8180100
c8190101
d2820006
042e0d0a
100c0d09
100c1b06
100e070d
d2820006
041a0f02
100c0cf6
c200090d
bf8c007f
100e2200
d2820006
041e0d10
c81c0000
c81d0001
d2820000
042e0f0a
10000104
10001900
1002070c
d2820000
04020302
100000f6
c200090c
bf8c007f
10022200
d2820000
04060110
5e000d00
f8001c0f
05000500
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[3], TEMP[3]
 16: MOV OUT[1], TEMP[2]
 17: MOV OUT[2], TEMP[0]
 18: MOV OUT[0], TEMP[1]
 19: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = fmul float %62, %51
  %71 = fadd float %70, %48
  %72 = fmul float %63, %52
  %73 = fadd float %72, %49
  %74 = fmul float %64, %53
  %75 = fadd float %74, %50
  %76 = fmul float %25, %71
  %77 = fmul float %26, %71
  %78 = fmul float %27, %71
  %79 = fmul float %28, %71
  %80 = fmul float %29, %73
  %81 = fadd float %80, %76
  %82 = fmul float %30, %73
  %83 = fadd float %82, %77
  %84 = fmul float %31, %73
  %85 = fadd float %84, %78
  %86 = fmul float %32, %73
  %87 = fadd float %86, %79
  %88 = fmul float %33, %75
  %89 = fadd float %88, %81
  %90 = fmul float %34, %75
  %91 = fadd float %90, %83
  %92 = fmul float %35, %75
  %93 = fadd float %92, %85
  %94 = fmul float %36, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %37
  %97 = fadd float %91, %38
  %98 = fadd float %93, %39
  %99 = fadd float %95, %40
  %100 = fmul float %11, %71
  %101 = fmul float %12, %71
  %102 = fmul float %13, %71
  %103 = fmul float %14, %71
  %104 = fmul float %15, %73
  %105 = fadd float %104, %100
  %106 = fmul float %16, %73
  %107 = fadd float %106, %101
  %108 = fmul float %17, %73
  %109 = fadd float %108, %102
  %110 = fmul float %18, %73
  %111 = fadd float %110, %103
  %112 = fmul float %19, %75
  %113 = fadd float %112, %105
  %114 = fmul float %20, %75
  %115 = fadd float %114, %107
  %116 = fmul float %21, %75
  %117 = fadd float %116, %109
  %118 = fadd float %113, %22
  %119 = fadd float %115, %23
  %120 = fadd float %117, %24
  %121 = fsub float -0.000000e+00, %56
  %122 = fadd float %118, %121
  %123 = fsub float -0.000000e+00, %57
  %124 = fadd float %119, %123
  %125 = fsub float -0.000000e+00, %58
  %126 = fadd float %120, %125
  %127 = fmul float %98, %54
  %128 = fadd float %127, %55
  %129 = fmul float %68, %44
  %130 = fadd float %129, %46
  %131 = fmul float %69, %45
  %132 = fadd float %131, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %130, float %132, float %109, float %111)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MAD TEMP[2].x, TEMP[1].wwww, CONST[4].yyyy, CONST[4].zzzz
 28: SLT TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz
 29: F2I TEMP[2].x, -TEMP[2]
 30: UIF TEMP[2].xxxx :2
 31:   KILL
 32: ENDIF
 33: MOV OUT[0], TEMP[1]
 34: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %43 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %44 = load <32 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %48 = load <32 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %58 = fmul float %13, %41
  %59 = fadd float %58, %42
  %60 = bitcast float %56 to i32
  %61 = bitcast float %57 to i32
  %62 = insertelement <2 x i32> undef, i32 %60, i32 0
  %63 = insertelement <2 x i32> %62, i32 %61, i32 1
  %64 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %44, <16 x i8> %46, i32 2)
  %65 = extractelement <4 x float> %64, i32 0
  %66 = extractelement <4 x float> %64, i32 1
  %67 = extractelement <4 x float> %64, i32 2
  %68 = extractelement <4 x float> %64, i32 3
  %69 = fmul float %65, 0x3FCB367A00000000
  %70 = fmul float %66, 0x3FE6E2EB20000000
  %71 = fadd float %70, %69
  %72 = fmul float %67, 0x3FB27BB300000000
  %73 = fadd float %71, %72
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %65)
  %75 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %66)
  %76 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %67)
  %77 = fmul float %74, %37
  %78 = fmul float %75, %38
  %79 = fadd float %77, %78
  %80 = fmul float %76, %39
  %81 = fadd float %79, %80
  %82 = fmul float %68, %40
  %83 = fadd float %81, %82
  %84 = fadd float %83, %32
  %85 = call float @llvm.AMDIL.clamp.(float %84, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %85, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %85, float %52, float 1.000000e+00)
  %88 = call float @llvm.AMDGPU.lrp(float %85, float %53, float 1.000000e+00)
  %89 = call float @llvm.AMDGPU.lrp(float %85, float %54, float 1.000000e+00)
  %90 = fmul float %74, %86
  %91 = fmul float %75, %87
  %92 = fmul float %76, %88
  %93 = fmul float %68, %89
  %94 = fmul float %12, %22
  %95 = fmul float %59, %23
  %96 = bitcast float %94 to i32
  %97 = bitcast float %95 to i32
  %98 = insertelement <2 x i32> undef, i32 %96, i32 0
  %99 = insertelement <2 x i32> %98, i32 %97, i32 1
  %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %48, <16 x i8> %50, i32 2)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = extractelement <4 x float> %100, i32 3
  %105 = fmul float %74, %33
  %106 = fmul float %75, %34
  %107 = fadd float %105, %106
  %108 = fmul float %76, %35
  %109 = fadd float %107, %108
  %110 = fmul float %68, %36
  %111 = fadd float %109, %110
  %112 = fadd float %111, %31
  %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00)
  %114 = fmul float %113, %104
  %115 = fmul float %101, 0x3FCB367A00000000
  %116 = fmul float %102, 0x3FE6E2EB20000000
  %117 = fadd float %116, %115
  %118 = fmul float %103, 0x3FB27BB300000000
  %119 = fadd float %117, %118
  %120 = fcmp uge float %119, 0x3F50624DE0000000
  %121 = select i1 %120, float %119, float 0x3F50624DE0000000
  %122 = fdiv float 1.000000e+00, %121
  %123 = fmul float %101, %122
  %124 = fmul float %102, %122
  %125 = fmul float %103, %122
  %126 = fmul float %90, %101
  %127 = fmul float %91, %102
  %128 = fmul float %92, %103
  %129 = fmul float %114, %123
  %130 = fadd float %129, %126
  %131 = fmul float %114, %124
  %132 = fadd float %131, %127
  %133 = fmul float %114, %125
  %134 = fadd float %133, %128
  %135 = fmul float %130, 4.000000e+00
  %136 = fmul float %132, 4.000000e+00
  %137 = fmul float %134, 4.000000e+00
  %138 = fcmp uge float %55, %27
  %139 = select i1 %138, float %55, float %27
  %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
  %141 = call float @llvm.AMDGPU.lrp(float %140, float %135, float %24)
  %142 = call float @llvm.AMDGPU.lrp(float %140, float %136, float %25)
  %143 = call float @llvm.AMDGPU.lrp(float %140, float %137, float %26)
  %144 = fmul float %93, %29
  %145 = fadd float %144, %30
  %146 = fcmp ult float %145, 0.000000e+00
  %147 = select i1 %146, float 1.000000e+00, float 0.000000e+00
  %148 = fsub float -0.000000e+00, %147
  %149 = fptosi float %148 to i32
  %150 = bitcast i32 %149 to float
  %151 = bitcast float %150 to i32
  %152 = icmp ne i32 %151, 0
  br i1 %152, label %IF, label %ENDIF

IF:                                               ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF

ENDIF:                                            ; preds = %main_body, %IF
  %153 = call i32 @llvm.SI.packf16(float %141, float %142)
  %154 = bitcast i32 %153 to float
  %155 = call i32 @llvm.SI.packf16(float %143, float %93)
  %156 = bitcast i32 %155 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %154, float %156, float %154, float %156)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1802ff
3f371759
d2820008
04221905
7e1a02ff
3d93dd98
d2820009
04221b06
c0840100
bf8c007f
c2000910
bf8c007f
d208000a
020000f2
1010090a
d2820008
04221200
10160b0a
d282000b
042e1200
c2008921
bf8c007f
101c1601
c2008920
bf8c007f
d282000e
04380308
10140d0a
d2820012
042a1200
c2000922
bf8c007f
d2820009
04380112
c2000923
bf8c007f
d2820009
04240107
c2000919
bf8c007f
06121200
d2060809
02010109
081412f2
c8380200
c8390201
d282000e
042a1d09
10261d12
c200092c
c200892d
bf8c007f
7e1c0201
d2820003
04380103
c2000909
bf8c007f
101e0600
c2000908
bf8c007f
101c0400
c0800304
c0c60508
bf8c007f
f0800f00
00030e0e
bf8c0770
10042113
c200091d
bf8c007f
10061600
c200091c
bf8c007f
d2820003
040c0108
c200091e
bf8c007f
d2820003
040c0112
c200091f
bf8c007f
d2820003
040c0107
c2000918
bf8c007f
06060600
d2060803
02010103
10062303
10241cff
3e59b3d0
d282000c
044a190f
d282000c
04321b10
7e1a02ff
3a83126f
d00c0000
02021b0c
d200000c
0002190d
7e18550c
101a1910
d2820002
040a1b03
100404f6
c8340700
c8350701
c200090f
bf8c007f
d00c0002
0200010d
7e240200
d200000d
000a1b12
d206080d
0201010d
08241af2
c200090e
bf8c007f
10262400
d2820002
044e050d
c84c0100
c84d0101
d2820013
042a2709
1016270b
10161f0b
1026190f
d282000b
042e2703
101616f6
c200090d
bf8c007f
10262400
d282000b
044e170d
c84c0000
c84d0001
d2820013
042a2709
10102708
10101d08
1018190e
d2820003
04221903
100606f6
c200090c
bf8c007f
10102400
d2820003
0422070d
c8200300
c8210301
d2820000
042a1109
10000107
c2000911
c2008912
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000102
5e021703
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[3], TEMP[3]
 16: MOV OUT[1], TEMP[2]
 17: MOV OUT[2], TEMP[0]
 18: MOV OUT[0], TEMP[1]
 19: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = fmul float %62, %51
  %71 = fadd float %70, %48
  %72 = fmul float %63, %52
  %73 = fadd float %72, %49
  %74 = fmul float %64, %53
  %75 = fadd float %74, %50
  %76 = fmul float %25, %71
  %77 = fmul float %26, %71
  %78 = fmul float %27, %71
  %79 = fmul float %28, %71
  %80 = fmul float %29, %73
  %81 = fadd float %80, %76
  %82 = fmul float %30, %73
  %83 = fadd float %82, %77
  %84 = fmul float %31, %73
  %85 = fadd float %84, %78
  %86 = fmul float %32, %73
  %87 = fadd float %86, %79
  %88 = fmul float %33, %75
  %89 = fadd float %88, %81
  %90 = fmul float %34, %75
  %91 = fadd float %90, %83
  %92 = fmul float %35, %75
  %93 = fadd float %92, %85
  %94 = fmul float %36, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %37
  %97 = fadd float %91, %38
  %98 = fadd float %93, %39
  %99 = fadd float %95, %40
  %100 = fmul float %11, %71
  %101 = fmul float %12, %71
  %102 = fmul float %13, %71
  %103 = fmul float %14, %71
  %104 = fmul float %15, %73
  %105 = fadd float %104, %100
  %106 = fmul float %16, %73
  %107 = fadd float %106, %101
  %108 = fmul float %17, %73
  %109 = fadd float %108, %102
  %110 = fmul float %18, %73
  %111 = fadd float %110, %103
  %112 = fmul float %19, %75
  %113 = fadd float %112, %105
  %114 = fmul float %20, %75
  %115 = fadd float %114, %107
  %116 = fmul float %21, %75
  %117 = fadd float %116, %109
  %118 = fadd float %113, %22
  %119 = fadd float %115, %23
  %120 = fadd float %117, %24
  %121 = fsub float -0.000000e+00, %56
  %122 = fadd float %118, %121
  %123 = fsub float -0.000000e+00, %57
  %124 = fadd float %119, %123
  %125 = fsub float -0.000000e+00, %58
  %126 = fadd float %120, %125
  %127 = fmul float %98, %54
  %128 = fadd float %127, %55
  %129 = fmul float %68, %44
  %130 = fadd float %129, %46
  %131 = fmul float %69, %45
  %132 = fadd float %131, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %130, float %132, float %109, float %111)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[0]
DCL CONST[2]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].w, TEMP[0].wwww
  3: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[0].xyzz
  4: LRP TEMP[1].xyz, CONST[0].wwww, TEMP[2].xxxx, TEMP[0].xyzz
  5: MUL TEMP[0], IN[0], TEMP[1]
  6: MAX TEMP[1].x, IN[2].xxxx, CONST[2].wwww
  7: MOV_SAT TEMP[1].x, TEMP[1].xxxx
  8: MUL TEMP[0], TEMP[0], TEMP[1].xxxx
  9: MOV OUT[0], TEMP[0]
 10: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %24 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !0
  %26 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %35 = bitcast float %32 to i32
  %36 = bitcast float %33 to i32
  %37 = insertelement <2 x i32> undef, i32 %35, i32 0
  %38 = insertelement <2 x i32> %37, i32 %36, i32 1
  %39 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %25, <16 x i8> %27, i32 2)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = extractelement <4 x float> %39, i32 2
  %43 = extractelement <4 x float> %39, i32 3
  %44 = fmul float %40, 0x3FCB367A00000000
  %45 = fmul float %41, 0x3FE6E2EB20000000
  %46 = fadd float %45, %44
  %47 = fmul float %42, 0x3FB27BB300000000
  %48 = fadd float %46, %47
  %49 = call float @llvm.AMDGPU.lrp(float %22, float %48, float %40)
  %50 = call float @llvm.AMDGPU.lrp(float %22, float %48, float %41)
  %51 = call float @llvm.AMDGPU.lrp(float %22, float %48, float %42)
  %52 = fmul float %28, %49
  %53 = fmul float %29, %50
  %54 = fmul float %30, %51
  %55 = fmul float %31, %43
  %56 = fcmp uge float %34, %23
  %57 = select i1 %56, float %34, float %23
  %58 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00)
  %59 = fmul float %52, %58
  %60 = fmul float %53, %58
  %61 = fmul float %54, %58
  %62 = fmul float %55, %58
  %63 = fcmp ugt float %62, 0x3F80101020000000
  %64 = sext i1 %63 to i32
  %65 = trunc i32 %64 to i1
  %66 = select i1 %65, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %66)
  %67 = call i32 @llvm.SI.packf16(float %59, float %60)
  %68 = bitcast i32 %67 to float
  %69 = call i32 @llvm.SI.packf16(float %61, float %62)
  %70 = bitcast i32 %69 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %68, float %70, float %68, float %70)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kill(float)

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0840300
c0c60500
bf8c007f
f0800f00
00430202
c8180300
c8190301
bf8c0770
100e0b06
c8180800
c8190801
c0800100
bf8c007f
c202010b
bf8c007f
d00c0008
02000906
7e100204
d2000006
00220d08
d2060806
02010106
100e0d07
7e1002ff
3c008081
d0080004
02021107
d2000008
0011e4f3
7c261080
101004ff
3e59b3d0
7e1202ff
3f371759
d2820008
04221303
7e1202ff
3d93dd98
d2820008
04221304
c2000103
bf8c007f
d2080009
020000f2
10140909
d282000a
042a1000
c82c0200
c82d0201
1014150b
10140d0a
5e0e0f0a
10140709
d282000a
042a1000
c82c0100
c82d0101
1014150b
10140d0a
10040509
d2820002
040a1000
c80c0000
c80d0001
10000503
10000d00
5e001500
f8001c0f
07000700
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..19]
DCL TEMP[0..5], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     6.2820,    -1.0000,     0.0000}
IMM[1] INT32 {0, 1, 2, 3}
  0: SGE TEMP[0].x, IN[0].wwww, CONST[6].yyyy
  1: F2I TEMP[0].x, -TEMP[0]
  2: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
  3: SGE TEMP[1].x, IN[0].wwww, CONST[6].zzzz
  4: F2I TEMP[1].x, -TEMP[1]
  5: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx
  6: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
  7: F2I TEMP[0].x, TEMP[0].xxxx
  8: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
  9: I2F TEMP[2].y, TEMP[1].yyyy
 10: CMP TEMP[2].x, TEMP[2].yyyy, CONST[15].yyyy, CONST[15].xxxx
 11: I2F TEMP[3].z, TEMP[1].zzzz
 12: CMP TEMP[2].x, TEMP[3].zzzz, CONST[15].zzzz, TEMP[2].xxxx
 13: I2F TEMP[1].w, TEMP[1].wwww
 14: CMP TEMP[2].x, TEMP[1].wwww, CONST[15].wwww, TEMP[2].xxxx
 15: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
 16: I2F TEMP[4].y, TEMP[1].yyyy
 17: CMP TEMP[3].x, TEMP[4].yyyy, CONST[6].yyyy, CONST[6].xxxx
 18: I2F TEMP[4].z, TEMP[1].zzzz
 19: CMP TEMP[3].x, TEMP[4].zzzz, CONST[6].zzzz, TEMP[3].xxxx
 20: I2F TEMP[1].w, TEMP[1].wwww
 21: CMP TEMP[3].x, TEMP[1].wwww, CONST[6].wwww, TEMP[3].xxxx
 22: ADD TEMP[1].x, IN[0].wwww, -TEMP[3].xxxx
 23: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 24: USEQ TEMP[2].yzw, TEMP[0].xxxx, IMM[1]
 25: I2F TEMP[4].y, TEMP[2].yyyy
 26: CMP TEMP[3].x, TEMP[4].yyyy, CONST[11].yyyy, CONST[11].xxxx
 27: I2F TEMP[4].z, TEMP[2].zzzz
 28: CMP TEMP[3].x, TEMP[4].zzzz, CONST[11].zzzz, TEMP[3].xxxx
 29: I2F TEMP[2].w, TEMP[2].wwww
 30: CMP TEMP[3].x, TEMP[2].wwww, CONST[11].wwww, TEMP[3].xxxx
 31: UADD TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy
 32: USEQ TEMP[2].yzw, TEMP[2].xxxx, IMM[1]
 33: I2F TEMP[5].y, TEMP[2].yyyy
 34: CMP TEMP[4].x, TEMP[5].yyyy, CONST[11].yyyy, CONST[11].xxxx
 35: I2F TEMP[5].z, TEMP[2].zzzz
 36: CMP TEMP[4].x, TEMP[5].zzzz, CONST[11].zzzz, TEMP[4].xxxx
 37: I2F TEMP[2].w, TEMP[2].wwww
 38: CMP TEMP[4].x, TEMP[2].wwww, CONST[11].wwww, TEMP[4].xxxx
 39: ADD TEMP[2].xy, IN[1].xyyy, -CONST[12].xyyy
 40: LRP TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx
 41: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xxxx
 42: ADD TEMP[3].x, IMM[0].xxxx, -IN[2].xxxx
 43: MUL TEMP[3].xy, CONST[13].xyyy, TEMP[3].xxxx
 44: MAD TEMP[3].xy, CONST[13].zwww, IN[2].xxxx, TEMP[3].xyyy
 45: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy
 46: LRP TEMP[3].x, IN[2].zzzz, CONST[12].wwww, CONST[12].zzzz
 47: MUL TEMP[4].x, IMM[0].yyyy, IN[2].yyyy
 48: MAD TEMP[3].x, TEMP[3].xxxx, IN[0].wwww, TEMP[4].xxxx
 49: SIN TEMP[4].x, TEMP[3].xxxx
 50: COS TEMP[3].x, TEMP[3].xxxx
 51: MOV TEMP[4].y, TEMP[3].xxxx
 52: MUL TEMP[5].xy, TEMP[4].xyyy, IMM[0].zzzz
 53: MOV TEMP[4].z, TEMP[5].yyxy
 54: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[3].xxxx
 55: MAD TEMP[2].xy, TEMP[2].yxxx, TEMP[4].xzzz, TEMP[3].xyyy
 56: MUL TEMP[3].xyz, CONST[16].xyzz, TEMP[2].xxxx
 57: MAD TEMP[3].xyz, CONST[17].xyzz, TEMP[2].yyyy, TEMP[3].xyzz
 58: ADD TEMP[2].xyz, IN[0].xyzz, TEMP[3].xyzz
 59: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 60: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 61: MAD TEMP[2], CONST[2], TEMP[2].zzzz, TEMP[3]
 62: ADD TEMP[2], TEMP[2], CONST[3]
 63: UADD TEMP[3].x, TEMP[0].xxxx, IMM[1].yyyy
 64: UADD TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz
 65: UARL ADDR[0].x, TEMP[3].xxxx
 66: UARL ADDR[0].x, TEMP[3].xxxx
 67: MOV TEMP[3], CONST[ADDR[0].x+6]
 68: UARL ADDR[0].x, TEMP[0].xxxx
 69: LRP TEMP[0], TEMP[1].xxxx, CONST[ADDR[0].x+6], TEMP[3]
 70: MUL TEMP[0], TEMP[0], CONST[4]
 71: MUL TEMP[1].x, IN[2].wwww, CONST[14].zzzz
 72: FLR TEMP[1].xy, TEMP[1].xxxx
 73: MUL TEMP[1].xy, CONST[14].xxxx, TEMP[1].xyyy
 74: FLR TEMP[3].xy, TEMP[1].xyyy
 75: MOV TEMP[4].yw, TEMP[3].yxyy
 76: ADD TEMP[1].xy, TEMP[1].xyyy, -TEMP[3].xyyy
 77: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[14].wwww
 78: FLR TEMP[1].xy, TEMP[1].xyyy
 79: MOV TEMP[4].xz, TEMP[1].xxyx
 80: ADD TEMP[1], IN[1].xyxy, TEMP[4]
 81: MUL TEMP[1], TEMP[1], CONST[14].xyxy
 82: MAD TEMP[3].x, TEMP[2].zzzz, CONST[5].xxxx, CONST[5].yyyy
 83: MOV OUT[2], TEMP[1]
 84: MOV OUT[3], TEMP[3]
 85: MOV OUT[1], TEMP[0]
 86: MOV OUT[0], TEMP[2]
 87: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 204)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 280)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = extractelement <4 x float> %65, i32 3
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = extractelement <4 x float> %77, i32 3
  %82 = fcmp uge float %69, %34
  %83 = select i1 %82, float 1.000000e+00, float 0.000000e+00
  %84 = fsub float -0.000000e+00, %83
  %85 = fptosi float %84 to i32
  %86 = bitcast i32 %85 to float
  %87 = bitcast float %86 to i32
  %88 = and i32 %87, 1065353216
  %89 = bitcast i32 %88 to float
  %90 = fcmp uge float %69, %35
  %91 = select i1 %90, float 1.000000e+00, float 0.000000e+00
  %92 = fsub float -0.000000e+00, %91
  %93 = fptosi float %92 to i32
  %94 = bitcast i32 %93 to float
  %95 = bitcast float %94 to i32
  %96 = and i32 %95, 1065353216
  %97 = bitcast i32 %96 to float
  %98 = fadd float %89, %97
  %99 = fptosi float %98 to i32
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %100 to i32
  %102 = icmp eq i32 %101, 1
  %103 = sext i1 %102 to i32
  %104 = bitcast float %100 to i32
  %105 = icmp eq i32 %104, 2
  %106 = sext i1 %105 to i32
  %107 = bitcast float %100 to i32
  %108 = icmp eq i32 %107, 3
  %109 = sext i1 %108 to i32
  %110 = bitcast i32 %103 to float
  %111 = bitcast i32 %106 to float
  %112 = bitcast i32 %109 to float
  %113 = bitcast float %110 to i32
  %114 = sitofp i32 %113 to float
  %115 = call float @llvm.AMDGPU.cndlt(float %114, float %54, float %53)
  %116 = bitcast float %111 to i32
  %117 = sitofp i32 %116 to float
  %118 = call float @llvm.AMDGPU.cndlt(float %117, float %55, float %115)
  %119 = bitcast float %112 to i32
  %120 = sitofp i32 %119 to float
  %121 = call float @llvm.AMDGPU.cndlt(float %120, float %56, float %118)
  %122 = bitcast float %100 to i32
  %123 = icmp eq i32 %122, 1
  %124 = sext i1 %123 to i32
  %125 = bitcast float %100 to i32
  %126 = icmp eq i32 %125, 2
  %127 = sext i1 %126 to i32
  %128 = bitcast float %100 to i32
  %129 = icmp eq i32 %128, 3
  %130 = sext i1 %129 to i32
  %131 = bitcast i32 %124 to float
  %132 = bitcast i32 %127 to float
  %133 = bitcast i32 %130 to float
  %134 = bitcast float %131 to i32
  %135 = sitofp i32 %134 to float
  %136 = call float @llvm.AMDGPU.cndlt(float %135, float %34, float %33)
  %137 = bitcast float %132 to i32
  %138 = sitofp i32 %137 to float
  %139 = call float @llvm.AMDGPU.cndlt(float %138, float %35, float %136)
  %140 = bitcast float %133 to i32
  %141 = sitofp i32 %140 to float
  %142 = call float @llvm.AMDGPU.cndlt(float %141, float %36, float %139)
  %143 = fsub float -0.000000e+00, %142
  %144 = fadd float %69, %143
  %145 = fmul float %144, %121
  %146 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00)
  %147 = bitcast float %100 to i32
  %148 = icmp eq i32 %147, 1
  %149 = sext i1 %148 to i32
  %150 = bitcast float %100 to i32
  %151 = icmp eq i32 %150, 2
  %152 = sext i1 %151 to i32
  %153 = bitcast float %100 to i32
  %154 = icmp eq i32 %153, 3
  %155 = sext i1 %154 to i32
  %156 = bitcast i32 %149 to float
  %157 = bitcast i32 %152 to float
  %158 = bitcast i32 %155 to float
  %159 = bitcast float %156 to i32
  %160 = sitofp i32 %159 to float
  %161 = call float @llvm.AMDGPU.cndlt(float %160, float %38, float %37)
  %162 = bitcast float %157 to i32
  %163 = sitofp i32 %162 to float
  %164 = call float @llvm.AMDGPU.cndlt(float %163, float %39, float %161)
  %165 = bitcast float %158 to i32
  %166 = sitofp i32 %165 to float
  %167 = call float @llvm.AMDGPU.cndlt(float %166, float %40, float %164)
  %168 = bitcast float %100 to i32
  %169 = add i32 %168, 1
  %170 = bitcast i32 %169 to float
  %171 = bitcast float %170 to i32
  %172 = icmp eq i32 %171, 1
  %173 = sext i1 %172 to i32
  %174 = bitcast float %170 to i32
  %175 = icmp eq i32 %174, 2
  %176 = sext i1 %175 to i32
  %177 = bitcast float %170 to i32
  %178 = icmp eq i32 %177, 3
  %179 = sext i1 %178 to i32
  %180 = bitcast i32 %173 to float
  %181 = bitcast i32 %176 to float
  %182 = bitcast i32 %179 to float
  %183 = bitcast float %180 to i32
  %184 = sitofp i32 %183 to float
  %185 = call float @llvm.AMDGPU.cndlt(float %184, float %38, float %37)
  %186 = bitcast float %181 to i32
  %187 = sitofp i32 %186 to float
  %188 = call float @llvm.AMDGPU.cndlt(float %187, float %39, float %185)
  %189 = bitcast float %182 to i32
  %190 = sitofp i32 %189 to float
  %191 = call float @llvm.AMDGPU.cndlt(float %190, float %40, float %188)
  %192 = fsub float -0.000000e+00, %41
  %193 = fadd float %73, %192
  %194 = fsub float -0.000000e+00, %42
  %195 = fadd float %74, %194
  %196 = call float @llvm.AMDGPU.lrp(float %146, float %191, float %167)
  %197 = fmul float %193, %196
  %198 = fmul float %195, %196
  %199 = fsub float -0.000000e+00, %78
  %200 = fadd float 1.000000e+00, %199
  %201 = fmul float %45, %200
  %202 = fmul float %46, %200
  %203 = fmul float %47, %78
  %204 = fadd float %203, %201
  %205 = fmul float %48, %78
  %206 = fadd float %205, %202
  %207 = fmul float %197, %204
  %208 = fmul float %198, %206
  %209 = call float @llvm.AMDGPU.lrp(float %80, float %44, float %43)
  %210 = fmul float 0x401920C4A0000000, %79
  %211 = fmul float %209, %69
  %212 = fadd float %211, %210
  %213 = call float @llvm.sin.f32(float %212)
  %214 = call float @llvm.cos.f32(float %212)
  %215 = fmul float %213, -1.000000e+00
  %216 = fmul float %207, %214
  %217 = fmul float %208, %214
  %218 = fmul float %208, %213
  %219 = fadd float %218, %216
  %220 = fmul float %207, %215
  %221 = fadd float %220, %217
  %222 = fmul float %57, %219
  %223 = fmul float %58, %219
  %224 = fmul float %59, %219
  %225 = fmul float %60, %221
  %226 = fadd float %225, %222
  %227 = fmul float %61, %221
  %228 = fadd float %227, %223
  %229 = fmul float %62, %221
  %230 = fadd float %229, %224
  %231 = fadd float %66, %226
  %232 = fadd float %67, %228
  %233 = fadd float %68, %230
  %234 = fmul float %11, %231
  %235 = fmul float %12, %231
  %236 = fmul float %13, %231
  %237 = fmul float %14, %231
  %238 = fmul float %15, %232
  %239 = fadd float %238, %234
  %240 = fmul float %16, %232
  %241 = fadd float %240, %235
  %242 = fmul float %17, %232
  %243 = fadd float %242, %236
  %244 = fmul float %18, %232
  %245 = fadd float %244, %237
  %246 = fmul float %19, %233
  %247 = fadd float %246, %239
  %248 = fmul float %20, %233
  %249 = fadd float %248, %241
  %250 = fmul float %21, %233
  %251 = fadd float %250, %243
  %252 = fmul float %22, %233
  %253 = fadd float %252, %245
  %254 = fadd float %247, %23
  %255 = fadd float %249, %24
  %256 = fadd float %251, %25
  %257 = fadd float %253, %26
  %258 = bitcast float %100 to i32
  %259 = add i32 %258, 1
  %260 = bitcast i32 %259 to float
  %261 = bitcast float %100 to i32
  %262 = add i32 %261, 2
  %263 = bitcast i32 %262 to float
  %264 = bitcast float %260 to i32
  %265 = shl i32 %264, 4
  %266 = add i32 %265, 96
  %267 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %266)
  %268 = shl i32 %264, 4
  %269 = add i32 %268, 100
  %270 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %269)
  %271 = shl i32 %264, 4
  %272 = add i32 %271, 104
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = shl i32 %264, 4
  %275 = add i32 %274, 108
  %276 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %275)
  %277 = bitcast float %263 to i32
  %278 = shl i32 %277, 4
  %279 = add i32 %278, 96
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = call float @llvm.AMDGPU.lrp(float %146, float %280, float %267)
  %282 = shl i32 %277, 4
  %283 = add i32 %282, 100
  %284 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %283)
  %285 = call float @llvm.AMDGPU.lrp(float %146, float %284, float %270)
  %286 = shl i32 %277, 4
  %287 = add i32 %286, 104
  %288 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %287)
  %289 = call float @llvm.AMDGPU.lrp(float %146, float %288, float %273)
  %290 = shl i32 %277, 4
  %291 = add i32 %290, 108
  %292 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %291)
  %293 = call float @llvm.AMDGPU.lrp(float %146, float %292, float %276)
  %294 = fmul float %281, %27
  %295 = fmul float %285, %28
  %296 = fmul float %289, %29
  %297 = fmul float %293, %30
  %298 = fmul float %81, %51
  %299 = call float @floor(float %298)
  %300 = call float @floor(float %298)
  %301 = fmul float %49, %299
  %302 = fmul float %49, %300
  %303 = call float @floor(float %301)
  %304 = call float @floor(float %302)
  %305 = fsub float -0.000000e+00, %303
  %306 = fadd float %301, %305
  %307 = fsub float -0.000000e+00, %304
  %308 = fadd float %302, %307
  %309 = fmul float %306, %52
  %310 = fmul float %308, %52
  %311 = call float @floor(float %309)
  %312 = call float @floor(float %310)
  %313 = fadd float %73, %311
  %314 = fadd float %74, %303
  %315 = fadd float %73, %312
  %316 = fadd float %74, %304
  %317 = fmul float %313, %49
  %318 = fmul float %314, %50
  %319 = fmul float %315, %49
  %320 = fmul float %316, %50
  %321 = fmul float %256, %31
  %322 = fadd float %321, %32
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %294, float %295, float %296, float %297)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %317, float %318, float %319, float %320)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %322, float %304, float %273, float %276)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %254, float %255, float %256, float %257)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.sin.f32(float) #3

; Function Attrs: nounwind readonly
declare float @llvm.cos.f32(float) #3

; Function Attrs: readonly
declare float @floor(float) #4

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }
attributes #4 = { readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840700
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c206011a
bf8c007f
d00c0004
02001904
d2000005
0011e480
d2060005
22010105
7e0a1105
360a0af2
c2068119
bf8c007f
d00c0004
02001b04
d2000006
0011e480
d2060006
22010106
7e0c1106
360c0cf2
060a0b06
7e0c1105
d1040004
02010306
d2000005
00118280
7e0a0b05
d0080004
02020a80
c204013d
bf8c007f
7e0a0208
c204013c
bf8c007f
7e0e0208
d2000005
00120b07
d1040008
02010506
d2000007
00218280
7e0e0b07
d0080008
02020e80
c205013e
bf8c007f
7e0e020a
d2000005
00220f05
d104000a
02010706
d2000007
00298280
7e0e0b07
d008000a
02020e80
c207013f
bf8c007f
7e0e020e
d2000005
002a0f05
7e0e020d
c2068118
bf8c007f
7e10020d
d2000007
00120f08
7e10020c
d2000007
00221107
c206011b
bf8c007f
7e10020c
d2000007
002a1107
080e0f04
100a0b07
d2060807
02010105
08220ef2
4a100c81
34121084
4a0a12ff
0000006c
e0301000
80000505
bf8c0770
10160b11
4a0c0c82
34140c84
4a0c14ff
0000006c
e0301000
80000606
bf8c0770
d2820006
042e0d07
c2060113
bf8c007f
10160c0c
4a0c12ff
00000068
e0301000
80000606
bf8c0770
10180d11
4a1a14ff
00000068
e0301000
80000d0d
bf8c0770
d282000c
04321b07
c2060112
bf8c007f
1018180c
4a1a12ff
00000064
e0301000
80000d0d
bf8c0770
101a1b11
4a1c14ff
00000064
e0301000
80000e0e
bf8c0770
d282000d
04361d07
c2060111
bf8c007f
101a1a0c
4a1212ff
00000060
e0301000
80000909
bf8c0770
10121311
4a1414ff
00000060
e0301000
80000a0a
bf8c0770
d2820009
04261507
c2060110
bf8c007f
1012120c
f800020f
0b0c0d09
c0860704
bf8c000f
e00c2000
80030d00
c0860708
bf8c0070
e00c2000
80030900
c203013a
bf8c0070
10001806
7e004900
c2030138
bf8c007f
10260006
7e004913
0624010e
c2038139
bf8c007f
10242407
08260113
c203813b
bf8c007f
10262607
7e264913
0626270d
10262606
f800021f
12131213
c203012d
bf8c000f
7e260206
c203012c
bf8c007f
7e2a0206
d2000012
00122715
c202012e
bf8c007f
7e280204
d2000016
00222912
c202012f
bf8c007f
7e240204
d2000016
002a2516
10222d11
d1040004
02010308
d2000016
00118280
7e2c0b16
d0080004
02022c80
d2000013
00122715
d1040004
02010508
d2000015
00118280
7e2a0b15
d0080004
02022a80
d2000013
00122913
d1040004
02010708
d2000008
00118280
7e100b08
d0080004
02021080
d2000008
00122513
d2820007
04461107
c2020130
bf8c007f
0a101a04
10100f08
082212f2
c2020134
bf8c007f
10242204
c2020136
bf8c007f
d2820012
044a1204
10102508
082416f2
c2020132
bf8c007f
10242404
c2020133
bf8c007f
d2820012
0448090b
102614ff
40c90625
d2820012
044e0912
102624ff
3e22f983
7e246b13
10282508
c2020131
bf8c007f
0a1a1c04
100e0f0d
c2020135
bf8c007f
101a2204
c2020137
bf8c007f
d2820009
04361204
100e1307
7e146d13
10121507
08122909
10101508
d282000a
04222507
c2020141
bf8c007f
100e1404
c2020145
bf8c007f
d2820007
041e1204
060e0f02
c2020140
bf8c007f
10101404
c2020144
bf8c007f
d2820008
04221204
06101101
c2020102
bf8c007f
10161004
c2020106
bf8c007f
d282000b
042e0e04
c2020142
bf8c007f
10141404
c2020146
bf8c007f
d2820009
042a1204
06021303
c202010a
bf8c007f
d2820002
042e0204
c202010e
bf8c007f
06040404
c2020114
c2028115
bf8c007f
7e060205
d2820003
040c0902
f800022f
05060003
c2020103
bf8c000f
10001004
c2020107
bf8c007f
d2820000
04020e04
c202010b
bf8c007f
d2820000
04020204
c202010f
bf8c007f
06000004
c2020101
bf8c007f
10061004
c2020105
bf8c007f
d2820003
040e0e04
c2020109
bf8c007f
d2820003
040e0204
c202010d
bf8c007f
06060604
c2020100
bf8c007f
10081004
c2020104
bf8c007f
d2820004
04120e04
c2020108
bf8c007f
d2820001
04120204
c200010c
bf8c007f
06020200
f80008cf
00020301
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[0]
DCL CONST[2]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].w, TEMP[0].wwww
  3: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[0].xyzz
  4: LRP TEMP[1].xyz, CONST[0].wwww, TEMP[2].xxxx, TEMP[0].xyzz
  5: MUL TEMP[0], IN[0], TEMP[1]
  6: MAX TEMP[1].x, IN[2].xxxx, CONST[2].wwww
  7: MOV_SAT TEMP[1].x, TEMP[1].xxxx
  8: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz
  9: MOV OUT[0], TEMP[0]
 10: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %27 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %28 = load <32 x i8> addrspace(2)* %27, !tbaa !0
  %29 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = bitcast float %35 to i32
  %39 = bitcast float %36 to i32
  %40 = insertelement <2 x i32> undef, i32 %38, i32 0
  %41 = insertelement <2 x i32> %40, i32 %39, i32 1
  %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %28, <16 x i8> %30, i32 2)
  %43 = extractelement <4 x float> %42, i32 0
  %44 = extractelement <4 x float> %42, i32 1
  %45 = extractelement <4 x float> %42, i32 2
  %46 = extractelement <4 x float> %42, i32 3
  %47 = fmul float %43, 0x3FCB367A00000000
  %48 = fmul float %44, 0x3FE6E2EB20000000
  %49 = fadd float %48, %47
  %50 = fmul float %45, 0x3FB27BB300000000
  %51 = fadd float %49, %50
  %52 = call float @llvm.AMDGPU.lrp(float %22, float %51, float %43)
  %53 = call float @llvm.AMDGPU.lrp(float %22, float %51, float %44)
  %54 = call float @llvm.AMDGPU.lrp(float %22, float %51, float %45)
  %55 = fmul float %31, %52
  %56 = fmul float %32, %53
  %57 = fmul float %33, %54
  %58 = fmul float %34, %46
  %59 = fcmp uge float %37, %26
  %60 = select i1 %59, float %37, float %26
  %61 = call float @llvm.AMDIL.clamp.(float %60, float 0.000000e+00, float 1.000000e+00)
  %62 = call float @llvm.AMDGPU.lrp(float %61, float %55, float %23)
  %63 = call float @llvm.AMDGPU.lrp(float %61, float %56, float %24)
  %64 = call float @llvm.AMDGPU.lrp(float %61, float %57, float %25)
  %65 = fcmp ugt float %58, 0x3F80101020000000
  %66 = sext i1 %65 to i32
  %67 = trunc i32 %66 to i1
  %68 = select i1 %67, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %68)
  %69 = call i32 @llvm.SI.packf16(float %62, float %63)
  %70 = bitcast i32 %69 to float
  %71 = call i32 @llvm.SI.packf16(float %64, float %58)
  %72 = bitcast i32 %71 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %70, float %72, float %70, float %72)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kill(float)

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0840300
c0c60500
bf8c007f
f0800f00
00430302
c8080300
c8090301
bf8c0770
10040d02
7e0e02ff
3c008081
d0080002
02020f02
d2000007
0009e4f3
7c260e80
100e06ff
3e59b3d0
7e1002ff
3f371759
d2820007
041e1104
7e1002ff
3d93dd98
d2820007
041e1105
c0800100
bf8c007f
c2020103
bf8c007f
d2080008
020008f2
10120908
d2820009
04260e04
c8280100
c8290101
1016130a
c8240800
c8250801
c202810b
bf8c007f
d00c0008
02000b09
7e140205
d2000009
0022130a
d2060809
02010109
081412f2
c2028109
bf8c007f
10181405
d282000b
04321709
10180708
d282000c
04320e04
c8340000
c8350001
1018190d
c2028108
bf8c007f
101a1405
d282000c
04361909
5e16170c
10060b08
d2820003
040e0e04
c8100200
c8110201
10000704
c200010a
bf8c007f
10021400
d2820000
04060109
5e000500
f8001c0f
000b000b
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..19]
DCL TEMP[0..5], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     6.2820,    -1.0000,     0.0000}
IMM[1] INT32 {0, 1, 2, 3}
  0: SGE TEMP[0].x, IN[0].wwww, CONST[6].yyyy
  1: F2I TEMP[0].x, -TEMP[0]
  2: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
  3: SGE TEMP[1].x, IN[0].wwww, CONST[6].zzzz
  4: F2I TEMP[1].x, -TEMP[1]
  5: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx
  6: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
  7: F2I TEMP[0].x, TEMP[0].xxxx
  8: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
  9: I2F TEMP[2].y, TEMP[1].yyyy
 10: CMP TEMP[2].x, TEMP[2].yyyy, CONST[15].yyyy, CONST[15].xxxx
 11: I2F TEMP[3].z, TEMP[1].zzzz
 12: CMP TEMP[2].x, TEMP[3].zzzz, CONST[15].zzzz, TEMP[2].xxxx
 13: I2F TEMP[1].w, TEMP[1].wwww
 14: CMP TEMP[2].x, TEMP[1].wwww, CONST[15].wwww, TEMP[2].xxxx
 15: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
 16: I2F TEMP[4].y, TEMP[1].yyyy
 17: CMP TEMP[3].x, TEMP[4].yyyy, CONST[6].yyyy, CONST[6].xxxx
 18: I2F TEMP[4].z, TEMP[1].zzzz
 19: CMP TEMP[3].x, TEMP[4].zzzz, CONST[6].zzzz, TEMP[3].xxxx
 20: I2F TEMP[1].w, TEMP[1].wwww
 21: CMP TEMP[3].x, TEMP[1].wwww, CONST[6].wwww, TEMP[3].xxxx
 22: ADD TEMP[1].x, IN[0].wwww, -TEMP[3].xxxx
 23: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 24: USEQ TEMP[2].yzw, TEMP[0].xxxx, IMM[1]
 25: I2F TEMP[4].y, TEMP[2].yyyy
 26: CMP TEMP[3].x, TEMP[4].yyyy, CONST[11].yyyy, CONST[11].xxxx
 27: I2F TEMP[4].z, TEMP[2].zzzz
 28: CMP TEMP[3].x, TEMP[4].zzzz, CONST[11].zzzz, TEMP[3].xxxx
 29: I2F TEMP[2].w, TEMP[2].wwww
 30: CMP TEMP[3].x, TEMP[2].wwww, CONST[11].wwww, TEMP[3].xxxx
 31: UADD TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy
 32: USEQ TEMP[2].yzw, TEMP[2].xxxx, IMM[1]
 33: I2F TEMP[5].y, TEMP[2].yyyy
 34: CMP TEMP[4].x, TEMP[5].yyyy, CONST[11].yyyy, CONST[11].xxxx
 35: I2F TEMP[5].z, TEMP[2].zzzz
 36: CMP TEMP[4].x, TEMP[5].zzzz, CONST[11].zzzz, TEMP[4].xxxx
 37: I2F TEMP[2].w, TEMP[2].wwww
 38: CMP TEMP[4].x, TEMP[2].wwww, CONST[11].wwww, TEMP[4].xxxx
 39: ADD TEMP[2].xy, IN[1].xyyy, -CONST[12].xyyy
 40: LRP TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx
 41: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xxxx
 42: ADD TEMP[3].x, IMM[0].xxxx, -IN[2].xxxx
 43: MUL TEMP[3].xy, CONST[13].xyyy, TEMP[3].xxxx
 44: MAD TEMP[3].xy, CONST[13].zwww, IN[2].xxxx, TEMP[3].xyyy
 45: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy
 46: LRP TEMP[3].x, IN[2].zzzz, CONST[12].wwww, CONST[12].zzzz
 47: MUL TEMP[4].x, IMM[0].yyyy, IN[2].yyyy
 48: MAD TEMP[3].x, TEMP[3].xxxx, IN[0].wwww, TEMP[4].xxxx
 49: SIN TEMP[4].x, TEMP[3].xxxx
 50: COS TEMP[3].x, TEMP[3].xxxx
 51: MOV TEMP[4].y, TEMP[3].xxxx
 52: MUL TEMP[5].xy, TEMP[4].xyyy, IMM[0].zzzz
 53: MOV TEMP[4].z, TEMP[5].yyxy
 54: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[3].xxxx
 55: MAD TEMP[2].xy, TEMP[2].yxxx, TEMP[4].xzzz, TEMP[3].xyyy
 56: MUL TEMP[3].xyz, CONST[16].xyzz, TEMP[2].xxxx
 57: MAD TEMP[3].xyz, CONST[17].xyzz, TEMP[2].yyyy, TEMP[3].xyzz
 58: ADD TEMP[2].xyz, IN[0].xyzz, TEMP[3].xyzz
 59: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 60: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 61: MAD TEMP[2], CONST[2], TEMP[2].zzzz, TEMP[3]
 62: ADD TEMP[2], TEMP[2], CONST[3]
 63: UADD TEMP[3].x, TEMP[0].xxxx, IMM[1].yyyy
 64: UADD TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz
 65: UARL ADDR[0].x, TEMP[3].xxxx
 66: UARL ADDR[0].x, TEMP[3].xxxx
 67: MOV TEMP[3], CONST[ADDR[0].x+6]
 68: UARL ADDR[0].x, TEMP[0].xxxx
 69: LRP TEMP[0], TEMP[1].xxxx, CONST[ADDR[0].x+6], TEMP[3]
 70: MUL TEMP[0], TEMP[0], CONST[4]
 71: MUL TEMP[1].x, IN[2].wwww, CONST[14].zzzz
 72: FLR TEMP[1].xy, TEMP[1].xxxx
 73: MUL TEMP[1].xy, CONST[14].xxxx, TEMP[1].xyyy
 74: FLR TEMP[3].xy, TEMP[1].xyyy
 75: MOV TEMP[4].yw, TEMP[3].yxyy
 76: ADD TEMP[1].xy, TEMP[1].xyyy, -TEMP[3].xyyy
 77: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[14].wwww
 78: FLR TEMP[1].xy, TEMP[1].xyyy
 79: MOV TEMP[4].xz, TEMP[1].xxyx
 80: ADD TEMP[1], IN[1].xyxy, TEMP[4]
 81: MUL TEMP[1], TEMP[1], CONST[14].xyxy
 82: MAD TEMP[3].x, TEMP[2].zzzz, CONST[5].xxxx, CONST[5].yyyy
 83: MOV OUT[2], TEMP[1]
 84: MOV OUT[3], TEMP[3]
 85: MOV OUT[1], TEMP[0]
 86: MOV OUT[0], TEMP[2]
 87: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 204)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 280)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = extractelement <4 x float> %65, i32 3
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = extractelement <4 x float> %77, i32 3
  %82 = fcmp uge float %69, %34
  %83 = select i1 %82, float 1.000000e+00, float 0.000000e+00
  %84 = fsub float -0.000000e+00, %83
  %85 = fptosi float %84 to i32
  %86 = bitcast i32 %85 to float
  %87 = bitcast float %86 to i32
  %88 = and i32 %87, 1065353216
  %89 = bitcast i32 %88 to float
  %90 = fcmp uge float %69, %35
  %91 = select i1 %90, float 1.000000e+00, float 0.000000e+00
  %92 = fsub float -0.000000e+00, %91
  %93 = fptosi float %92 to i32
  %94 = bitcast i32 %93 to float
  %95 = bitcast float %94 to i32
  %96 = and i32 %95, 1065353216
  %97 = bitcast i32 %96 to float
  %98 = fadd float %89, %97
  %99 = fptosi float %98 to i32
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %100 to i32
  %102 = icmp eq i32 %101, 1
  %103 = sext i1 %102 to i32
  %104 = bitcast float %100 to i32
  %105 = icmp eq i32 %104, 2
  %106 = sext i1 %105 to i32
  %107 = bitcast float %100 to i32
  %108 = icmp eq i32 %107, 3
  %109 = sext i1 %108 to i32
  %110 = bitcast i32 %103 to float
  %111 = bitcast i32 %106 to float
  %112 = bitcast i32 %109 to float
  %113 = bitcast float %110 to i32
  %114 = sitofp i32 %113 to float
  %115 = call float @llvm.AMDGPU.cndlt(float %114, float %54, float %53)
  %116 = bitcast float %111 to i32
  %117 = sitofp i32 %116 to float
  %118 = call float @llvm.AMDGPU.cndlt(float %117, float %55, float %115)
  %119 = bitcast float %112 to i32
  %120 = sitofp i32 %119 to float
  %121 = call float @llvm.AMDGPU.cndlt(float %120, float %56, float %118)
  %122 = bitcast float %100 to i32
  %123 = icmp eq i32 %122, 1
  %124 = sext i1 %123 to i32
  %125 = bitcast float %100 to i32
  %126 = icmp eq i32 %125, 2
  %127 = sext i1 %126 to i32
  %128 = bitcast float %100 to i32
  %129 = icmp eq i32 %128, 3
  %130 = sext i1 %129 to i32
  %131 = bitcast i32 %124 to float
  %132 = bitcast i32 %127 to float
  %133 = bitcast i32 %130 to float
  %134 = bitcast float %131 to i32
  %135 = sitofp i32 %134 to float
  %136 = call float @llvm.AMDGPU.cndlt(float %135, float %34, float %33)
  %137 = bitcast float %132 to i32
  %138 = sitofp i32 %137 to float
  %139 = call float @llvm.AMDGPU.cndlt(float %138, float %35, float %136)
  %140 = bitcast float %133 to i32
  %141 = sitofp i32 %140 to float
  %142 = call float @llvm.AMDGPU.cndlt(float %141, float %36, float %139)
  %143 = fsub float -0.000000e+00, %142
  %144 = fadd float %69, %143
  %145 = fmul float %144, %121
  %146 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00)
  %147 = bitcast float %100 to i32
  %148 = icmp eq i32 %147, 1
  %149 = sext i1 %148 to i32
  %150 = bitcast float %100 to i32
  %151 = icmp eq i32 %150, 2
  %152 = sext i1 %151 to i32
  %153 = bitcast float %100 to i32
  %154 = icmp eq i32 %153, 3
  %155 = sext i1 %154 to i32
  %156 = bitcast i32 %149 to float
  %157 = bitcast i32 %152 to float
  %158 = bitcast i32 %155 to float
  %159 = bitcast float %156 to i32
  %160 = sitofp i32 %159 to float
  %161 = call float @llvm.AMDGPU.cndlt(float %160, float %38, float %37)
  %162 = bitcast float %157 to i32
  %163 = sitofp i32 %162 to float
  %164 = call float @llvm.AMDGPU.cndlt(float %163, float %39, float %161)
  %165 = bitcast float %158 to i32
  %166 = sitofp i32 %165 to float
  %167 = call float @llvm.AMDGPU.cndlt(float %166, float %40, float %164)
  %168 = bitcast float %100 to i32
  %169 = add i32 %168, 1
  %170 = bitcast i32 %169 to float
  %171 = bitcast float %170 to i32
  %172 = icmp eq i32 %171, 1
  %173 = sext i1 %172 to i32
  %174 = bitcast float %170 to i32
  %175 = icmp eq i32 %174, 2
  %176 = sext i1 %175 to i32
  %177 = bitcast float %170 to i32
  %178 = icmp eq i32 %177, 3
  %179 = sext i1 %178 to i32
  %180 = bitcast i32 %173 to float
  %181 = bitcast i32 %176 to float
  %182 = bitcast i32 %179 to float
  %183 = bitcast float %180 to i32
  %184 = sitofp i32 %183 to float
  %185 = call float @llvm.AMDGPU.cndlt(float %184, float %38, float %37)
  %186 = bitcast float %181 to i32
  %187 = sitofp i32 %186 to float
  %188 = call float @llvm.AMDGPU.cndlt(float %187, float %39, float %185)
  %189 = bitcast float %182 to i32
  %190 = sitofp i32 %189 to float
  %191 = call float @llvm.AMDGPU.cndlt(float %190, float %40, float %188)
  %192 = fsub float -0.000000e+00, %41
  %193 = fadd float %73, %192
  %194 = fsub float -0.000000e+00, %42
  %195 = fadd float %74, %194
  %196 = call float @llvm.AMDGPU.lrp(float %146, float %191, float %167)
  %197 = fmul float %193, %196
  %198 = fmul float %195, %196
  %199 = fsub float -0.000000e+00, %78
  %200 = fadd float 1.000000e+00, %199
  %201 = fmul float %45, %200
  %202 = fmul float %46, %200
  %203 = fmul float %47, %78
  %204 = fadd float %203, %201
  %205 = fmul float %48, %78
  %206 = fadd float %205, %202
  %207 = fmul float %197, %204
  %208 = fmul float %198, %206
  %209 = call float @llvm.AMDGPU.lrp(float %80, float %44, float %43)
  %210 = fmul float 0x401920C4A0000000, %79
  %211 = fmul float %209, %69
  %212 = fadd float %211, %210
  %213 = call float @llvm.sin.f32(float %212)
  %214 = call float @llvm.cos.f32(float %212)
  %215 = fmul float %213, -1.000000e+00
  %216 = fmul float %207, %214
  %217 = fmul float %208, %214
  %218 = fmul float %208, %213
  %219 = fadd float %218, %216
  %220 = fmul float %207, %215
  %221 = fadd float %220, %217
  %222 = fmul float %57, %219
  %223 = fmul float %58, %219
  %224 = fmul float %59, %219
  %225 = fmul float %60, %221
  %226 = fadd float %225, %222
  %227 = fmul float %61, %221
  %228 = fadd float %227, %223
  %229 = fmul float %62, %221
  %230 = fadd float %229, %224
  %231 = fadd float %66, %226
  %232 = fadd float %67, %228
  %233 = fadd float %68, %230
  %234 = fmul float %11, %231
  %235 = fmul float %12, %231
  %236 = fmul float %13, %231
  %237 = fmul float %14, %231
  %238 = fmul float %15, %232
  %239 = fadd float %238, %234
  %240 = fmul float %16, %232
  %241 = fadd float %240, %235
  %242 = fmul float %17, %232
  %243 = fadd float %242, %236
  %244 = fmul float %18, %232
  %245 = fadd float %244, %237
  %246 = fmul float %19, %233
  %247 = fadd float %246, %239
  %248 = fmul float %20, %233
  %249 = fadd float %248, %241
  %250 = fmul float %21, %233
  %251 = fadd float %250, %243
  %252 = fmul float %22, %233
  %253 = fadd float %252, %245
  %254 = fadd float %247, %23
  %255 = fadd float %249, %24
  %256 = fadd float %251, %25
  %257 = fadd float %253, %26
  %258 = bitcast float %100 to i32
  %259 = add i32 %258, 1
  %260 = bitcast i32 %259 to float
  %261 = bitcast float %100 to i32
  %262 = add i32 %261, 2
  %263 = bitcast i32 %262 to float
  %264 = bitcast float %260 to i32
  %265 = shl i32 %264, 4
  %266 = add i32 %265, 96
  %267 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %266)
  %268 = shl i32 %264, 4
  %269 = add i32 %268, 100
  %270 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %269)
  %271 = shl i32 %264, 4
  %272 = add i32 %271, 104
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = shl i32 %264, 4
  %275 = add i32 %274, 108
  %276 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %275)
  %277 = bitcast float %263 to i32
  %278 = shl i32 %277, 4
  %279 = add i32 %278, 96
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = call float @llvm.AMDGPU.lrp(float %146, float %280, float %267)
  %282 = shl i32 %277, 4
  %283 = add i32 %282, 100
  %284 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %283)
  %285 = call float @llvm.AMDGPU.lrp(float %146, float %284, float %270)
  %286 = shl i32 %277, 4
  %287 = add i32 %286, 104
  %288 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %287)
  %289 = call float @llvm.AMDGPU.lrp(float %146, float %288, float %273)
  %290 = shl i32 %277, 4
  %291 = add i32 %290, 108
  %292 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %291)
  %293 = call float @llvm.AMDGPU.lrp(float %146, float %292, float %276)
  %294 = fmul float %281, %27
  %295 = fmul float %285, %28
  %296 = fmul float %289, %29
  %297 = fmul float %293, %30
  %298 = fmul float %81, %51
  %299 = call float @floor(float %298)
  %300 = call float @floor(float %298)
  %301 = fmul float %49, %299
  %302 = fmul float %49, %300
  %303 = call float @floor(float %301)
  %304 = call float @floor(float %302)
  %305 = fsub float -0.000000e+00, %303
  %306 = fadd float %301, %305
  %307 = fsub float -0.000000e+00, %304
  %308 = fadd float %302, %307
  %309 = fmul float %306, %52
  %310 = fmul float %308, %52
  %311 = call float @floor(float %309)
  %312 = call float @floor(float %310)
  %313 = fadd float %73, %311
  %314 = fadd float %74, %303
  %315 = fadd float %73, %312
  %316 = fadd float %74, %304
  %317 = fmul float %313, %49
  %318 = fmul float %314, %50
  %319 = fmul float %315, %49
  %320 = fmul float %316, %50
  %321 = fmul float %256, %31
  %322 = fadd float %321, %32
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %294, float %295, float %296, float %297)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %317, float %318, float %319, float %320)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %322, float %304, float %273, float %276)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %254, float %255, float %256, float %257)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.sin.f32(float) #3

; Function Attrs: nounwind readonly
declare float @llvm.cos.f32(float) #3

; Function Attrs: readonly
declare float @floor(float) #4

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }
attributes #4 = { readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840700
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c206011a
bf8c007f
d00c0004
02001904
d2000005
0011e480
d2060005
22010105
7e0a1105
360a0af2
c2068119
bf8c007f
d00c0004
02001b04
d2000006
0011e480
d2060006
22010106
7e0c1106
360c0cf2
060a0b06
7e0c1105
d1040004
02010306
d2000005
00118280
7e0a0b05
d0080004
02020a80
c204013d
bf8c007f
7e0a0208
c204013c
bf8c007f
7e0e0208
d2000005
00120b07
d1040008
02010506
d2000007
00218280
7e0e0b07
d0080008
02020e80
c205013e
bf8c007f
7e0e020a
d2000005
00220f05
d104000a
02010706
d2000007
00298280
7e0e0b07
d008000a
02020e80
c207013f
bf8c007f
7e0e020e
d2000005
002a0f05
7e0e020d
c2068118
bf8c007f
7e10020d
d2000007
00120f08
7e10020c
d2000007
00221107
c206011b
bf8c007f
7e10020c
d2000007
002a1107
080e0f04
100a0b07
d2060807
02010105
08220ef2
4a100c81
34121084
4a0a12ff
0000006c
e0301000
80000505
bf8c0770
10160b11
4a0c0c82
34140c84
4a0c14ff
0000006c
e0301000
80000606
bf8c0770
d2820006
042e0d07
c2060113
bf8c007f
10160c0c
4a0c12ff
00000068
e0301000
80000606
bf8c0770
10180d11
4a1a14ff
00000068
e0301000
80000d0d
bf8c0770
d282000c
04321b07
c2060112
bf8c007f
1018180c
4a1a12ff
00000064
e0301000
80000d0d
bf8c0770
101a1b11
4a1c14ff
00000064
e0301000
80000e0e
bf8c0770
d282000d
04361d07
c2060111
bf8c007f
101a1a0c
4a1212ff
00000060
e0301000
80000909
bf8c0770
10121311
4a1414ff
00000060
e0301000
80000a0a
bf8c0770
d2820009
04261507
c2060110
bf8c007f
1012120c
f800020f
0b0c0d09
c0860704
bf8c000f
e00c2000
80030d00
c0860708
bf8c0070
e00c2000
80030900
c203013a
bf8c0070
10001806
7e004900
c2030138
bf8c007f
10260006
7e004913
0624010e
c2038139
bf8c007f
10242407
08260113
c203813b
bf8c007f
10262607
7e264913
0626270d
10262606
f800021f
12131213
c203012d
bf8c000f
7e260206
c203012c
bf8c007f
7e2a0206
d2000012
00122715
c202012e
bf8c007f
7e280204
d2000016
00222912
c202012f
bf8c007f
7e240204
d2000016
002a2516
10222d11
d1040004
02010308
d2000016
00118280
7e2c0b16
d0080004
02022c80
d2000013
00122715
d1040004
02010508
d2000015
00118280
7e2a0b15
d0080004
02022a80
d2000013
00122913
d1040004
02010708
d2000008
00118280
7e100b08
d0080004
02021080
d2000008
00122513
d2820007
04461107
c2020130
bf8c007f
0a101a04
10100f08
082212f2
c2020134
bf8c007f
10242204
c2020136
bf8c007f
d2820012
044a1204
10102508
082416f2
c2020132
bf8c007f
10242404
c2020133
bf8c007f
d2820012
0448090b
102614ff
40c90625
d2820012
044e0912
102624ff
3e22f983
7e246b13
10282508
c2020131
bf8c007f
0a1a1c04
100e0f0d
c2020135
bf8c007f
101a2204
c2020137
bf8c007f
d2820009
04361204
100e1307
7e146d13
10121507
08122909
10101508
d282000a
04222507
c2020141
bf8c007f
100e1404
c2020145
bf8c007f
d2820007
041e1204
060e0f02
c2020140
bf8c007f
10101404
c2020144
bf8c007f
d2820008
04221204
06101101
c2020102
bf8c007f
10161004
c2020106
bf8c007f
d282000b
042e0e04
c2020142
bf8c007f
10141404
c2020146
bf8c007f
d2820009
042a1204
06021303
c202010a
bf8c007f
d2820002
042e0204
c202010e
bf8c007f
06040404
c2020114
c2028115
bf8c007f
7e060205
d2820003
040c0902
f800022f
05060003
c2020103
bf8c000f
10001004
c2020107
bf8c007f
d2820000
04020e04
c202010b
bf8c007f
d2820000
04020204
c202010f
bf8c007f
06000004
c2020101
bf8c007f
10061004
c2020105
bf8c007f
d2820003
040e0e04
c2020109
bf8c007f
d2820003
040e0204
c202010d
bf8c007f
06060604
c2020100
bf8c007f
10081004
c2020104
bf8c007f
d2820004
04120e04
c2020108
bf8c007f
d2820001
04120204
c200010c
bf8c007f
06020200
f80008cf
00020301
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IN[2].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: DP4 TEMP[1].x, TEMP[0], CONST[5]
  3: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[3].yyyy
  4: LRP TEMP[1], TEMP[1].xxxx, IN[0], IMM[0].xxxx
  5: MUL TEMP[0], TEMP[0], TEMP[1]
  6: MAX TEMP[1].x, IN[1].wwww, CONST[1].wwww
  7: MOV_SAT TEMP[1].x, TEMP[1].xxxx
  8: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[1].xyzz
  9: MOV OUT[0], TEMP[0]
 10: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %31 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %32 = load <32 x i8> addrspace(2)* %31, !tbaa !0
  %33 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %34 = load <16 x i8> addrspace(2)* %33, !tbaa !0
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %42 = bitcast float %40 to i32
  %43 = bitcast float %41 to i32
  %44 = insertelement <2 x i32> undef, i32 %42, i32 0
  %45 = insertelement <2 x i32> %44, i32 %43, i32 1
  %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %32, <16 x i8> %34, i32 2)
  %47 = extractelement <4 x float> %46, i32 0
  %48 = extractelement <4 x float> %46, i32 1
  %49 = extractelement <4 x float> %46, i32 2
  %50 = extractelement <4 x float> %46, i32 3
  %51 = fmul float %47, %27
  %52 = fmul float %48, %28
  %53 = fadd float %51, %52
  %54 = fmul float %49, %29
  %55 = fadd float %53, %54
  %56 = fmul float %50, %30
  %57 = fadd float %55, %56
  %58 = fadd float %57, %26
  %59 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %60 = call float @llvm.AMDGPU.lrp(float %59, float %35, float 1.000000e+00)
  %61 = call float @llvm.AMDGPU.lrp(float %59, float %36, float 1.000000e+00)
  %62 = call float @llvm.AMDGPU.lrp(float %59, float %37, float 1.000000e+00)
  %63 = call float @llvm.AMDGPU.lrp(float %59, float %38, float 1.000000e+00)
  %64 = fmul float %47, %60
  %65 = fmul float %48, %61
  %66 = fmul float %49, %62
  %67 = fmul float %50, %63
  %68 = fcmp uge float %39, %25
  %69 = select i1 %68, float %39, float %25
  %70 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %71 = call float @llvm.AMDGPU.lrp(float %70, float %64, float %22)
  %72 = call float @llvm.AMDGPU.lrp(float %70, float %65, float %23)
  %73 = call float @llvm.AMDGPU.lrp(float %70, float %66, float %24)
  %74 = call i32 @llvm.SI.packf16(float %71, float %72)
  %75 = bitcast i32 %74 to float
  %76 = call i32 @llvm.SI.packf16(float %73, float %67)
  %77 = bitcast i32 %76 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %75, float %77, float %75, float %77)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0900
c80d0901
c8080800
c8090801
c0840300
c0c60500
bf8c007f
f0800f00
00430202
c0800100
bf8c0070
c2020115
bf8c007f
100c0604
c2020114
bf8c007f
d2820006
04180902
c2020116
bf8c007f
d2820006
04180904
c2020117
bf8c007f
d2820006
04180905
c202010d
bf8c007f
060c0c04
d2060806
02010106
080e0cf2
c8200200
c8210201
d2820008
041e1106
10141104
c8200700
c8210701
c2020107
bf8c007f
d00c0008
02000908
7e120204
d2000008
00221109
d2060808
02010108
081210f2
c2020106
bf8c007f
10161204
d282000a
042e1508
c82c0300
c82d0301
d282000b
041e1706
10161705
5e14170a
c82c0100
c82d0101
d282000b
041e1706
10161703
c2020105
bf8c007f
10181204
d282000b
04321708
c8300000
c8310001
d2820000
041e1906
10000102
c2000104
bf8c007f
10021200
d2820000
04060108
5e001700
f8001c0f
0a000a00
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..2], LOCAL
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2], CONST[0], TEMP[0].xxxx
  6: MAD TEMP[2], CONST[1], TEMP[0].yyyy, TEMP[2]
  7: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[2]
  8: ADD TEMP[0].xyz, TEMP[0], CONST[3]
  9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[12].xyzz
 10: MAD TEMP[2].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 11: MOV TEMP[0].w, TEMP[2].xxxx
 12: MAD TEMP[2].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 13: MOV OUT[3], TEMP[2]
 14: MOV OUT[1], CONST[8]
 15: MOV OUT[2], TEMP[0]
 16: MOV OUT[0], TEMP[1]
 17: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = fmul float %63, %52
  %72 = fadd float %71, %49
  %73 = fmul float %64, %53
  %74 = fadd float %73, %50
  %75 = fmul float %65, %54
  %76 = fadd float %75, %51
  %77 = fmul float %25, %72
  %78 = fmul float %26, %72
  %79 = fmul float %27, %72
  %80 = fmul float %28, %72
  %81 = fmul float %29, %74
  %82 = fadd float %81, %77
  %83 = fmul float %30, %74
  %84 = fadd float %83, %78
  %85 = fmul float %31, %74
  %86 = fadd float %85, %79
  %87 = fmul float %32, %74
  %88 = fadd float %87, %80
  %89 = fmul float %33, %76
  %90 = fadd float %89, %82
  %91 = fmul float %34, %76
  %92 = fadd float %91, %84
  %93 = fmul float %35, %76
  %94 = fadd float %93, %86
  %95 = fmul float %36, %76
  %96 = fadd float %95, %88
  %97 = fadd float %90, %37
  %98 = fadd float %92, %38
  %99 = fadd float %94, %39
  %100 = fadd float %96, %40
  %101 = fmul float %11, %72
  %102 = fmul float %12, %72
  %103 = fmul float %13, %72
  %104 = fmul float %14, %72
  %105 = fmul float %15, %74
  %106 = fadd float %105, %101
  %107 = fmul float %16, %74
  %108 = fadd float %107, %102
  %109 = fmul float %17, %74
  %110 = fadd float %109, %103
  %111 = fmul float %18, %74
  %112 = fadd float %111, %104
  %113 = fmul float %19, %76
  %114 = fadd float %113, %106
  %115 = fmul float %20, %76
  %116 = fadd float %115, %108
  %117 = fmul float %21, %76
  %118 = fadd float %117, %110
  %119 = fadd float %114, %22
  %120 = fadd float %116, %23
  %121 = fadd float %118, %24
  %122 = fsub float -0.000000e+00, %55
  %123 = fadd float %119, %122
  %124 = fsub float -0.000000e+00, %56
  %125 = fadd float %120, %124
  %126 = fsub float -0.000000e+00, %57
  %127 = fadd float %121, %126
  %128 = fmul float %99, %58
  %129 = fadd float %128, %59
  %130 = fmul float %69, %45
  %131 = fadd float %130, %47
  %132 = fmul float %70, %46
  %133 = fadd float %132, %48
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %125, float %127, float %129)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %131, float %133, float %110, float %112)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %98, float %99, float %100)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020132
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020131
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020130
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %21 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %22 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %23 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %24 = call i32 @llvm.SI.packf16(float %20, float %21)
  %25 = bitcast i32 %24 to float
  %26 = call i32 @llvm.SI.packf16(float %22, float %23)
  %27 = bitcast i32 %26 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %25, float %27, float %25, float %27)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8080300
c8090301
c80c0200
c80d0201
5e040503
c80c0100
c80d0101
c8100000
c8110001
5e000704
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..69]
DCL TEMP[0..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    4.0000,     1.0000,     2.0000,     3.0000}
IMM[1] FLT32 {   -1.0000,     5.0000,     0.5000,     1.0000}
IMM[2] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MUL TEMP[0].x, IMM[0].xxxx, IN[0].zzzz
  1: F2I TEMP[0].x, TEMP[0].xxxx
  2: UARL ADDR[0].x, TEMP[0].xxxx
  3: MOV TEMP[0].xyz, CONST[ADDR[0].x+6]
  4: MAD TEMP[1].x, IMM[0].xxxx, IN[0].zzzz, IMM[0].yyyy
  5: F2I TEMP[1].x, TEMP[1].xxxx
  6: UARL ADDR[0].x, TEMP[1].xxxx
  7: MOV TEMP[1].xyz, CONST[ADDR[0].x+6]
  8: MAD TEMP[2].x, IMM[0].xxxx, IN[0].zzzz, IMM[0].zzzz
  9: F2I TEMP[2].x, TEMP[2].xxxx
 10: UARL ADDR[0].x, TEMP[2].xxxx
 11: MOV TEMP[2].xyz, CONST[ADDR[0].x+6]
 12: MAD TEMP[3].x, IMM[0].xxxx, IN[0].zzzz, IMM[0].wwww
 13: F2I TEMP[3].x, TEMP[3].xxxx
 14: UARL ADDR[0].x, TEMP[3].xxxx
 15: MOV TEMP[3].xyz, CONST[ADDR[0].x+6]
 16: MUL TEMP[4].x, IN[0].xxxx, IN[0].xxxx
 17: MUL TEMP[5].xyz, TEMP[0].xyzz, IMM[1].xxxx
 18: MAD TEMP[5].xyz, TEMP[1].xyzz, IMM[0].wwww, TEMP[5].xyzz
 19: MUL TEMP[6].xyz, TEMP[2].xyzz, IMM[0].wwww
 20: ADD TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xyzz
 21: ADD TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].xyzz
 22: MUL TEMP[5].xyz, IMM[0].wwww, TEMP[5].xyzz
 23: MUL TEMP[6].x, IN[0].xxxx, IN[0].xxxx
 24: MUL TEMP[7].xyz, TEMP[1].xyzz, IMM[1].yyyy
 25: MAD TEMP[7].xyz, TEMP[0].xyzz, IMM[0].zzzz, -TEMP[7].xyzz
 26: MAD TEMP[7].xyz, TEMP[2].xyzz, IMM[0].xxxx, TEMP[7].xyzz
 27: ADD TEMP[7].xyz, TEMP[7].xyzz, -TEMP[3].xyzz
 28: MUL TEMP[7].xyz, IMM[0].zzzz, TEMP[7].xyzz
 29: ADD TEMP[8].xyz, TEMP[2].xyzz, -TEMP[0].xyzz
 30: MAD TEMP[7].xyz, TEMP[7].xyzz, IN[0].xxxx, TEMP[8].xyzz
 31: MAD TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[7].xyzz
 32: MUL TEMP[5].xyz, TEMP[5].xyzz, IMM[1].zzzz
 33: MUL TEMP[6].xyz, TEMP[5].zxyy, IMM[2].yxxx
 34: MAD TEMP[5].xyz, TEMP[5].yzxx, IMM[2].xxyy, -TEMP[6].xyzz
 35: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz
 36: RSQ TEMP[6].x, TEMP[6].xxxx
 37: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
 38: MUL TEMP[6].x, CONST[0].xxxx, IN[0].yyyy
 39: MAD TEMP[7].xyz, TEMP[1].xyzz, IMM[0].wwww, -TEMP[0].xyzz
 40: MUL TEMP[8].xyz, TEMP[2].xyzz, IMM[0].wwww
 41: ADD TEMP[7].xyz, TEMP[7].xyzz, -TEMP[8].xyzz
 42: ADD TEMP[7].xyz, TEMP[7].xyzz, TEMP[3].xyzz
 43: MUL TEMP[8].x, TEMP[4].xxxx, IN[0].xxxx
 44: MUL TEMP[9].xyz, TEMP[1].xyzz, IMM[1].yyyy
 45: MAD TEMP[9].xyz, TEMP[0].xyzz, IMM[0].zzzz, -TEMP[9].xyzz
 46: MAD TEMP[9].xyz, TEMP[2].xyzz, IMM[0].xxxx, TEMP[9].xyzz
 47: ADD TEMP[3].xyz, TEMP[9].xyzz, -TEMP[3].xyzz
 48: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[0].xyzz
 49: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz
 50: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[0].xxxx, TEMP[1].xyzz
 51: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[4].xxxx, TEMP[1].xyzz
 52: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[8].xxxx, TEMP[1].xyzz
 53: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[1].zzzz
 54: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[1].xyzz
 55: MUL TEMP[1], CONST[2], TEMP[0].xxxx
 56: MAD TEMP[1], CONST[3], TEMP[0].yyyy, TEMP[1]
 57: MAD TEMP[0], CONST[4], TEMP[0].zzzz, TEMP[1]
 58: ADD TEMP[0], TEMP[0], CONST[5]
 59: MOV OUT[1], CONST[1]
 60: MOV OUT[0], TEMP[0]
 61: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %32 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0
  %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %5)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = fmul float 4.000000e+00, %37
  %39 = fptosi float %38 to i32
  %40 = bitcast i32 %39 to float
  %41 = bitcast float %40 to i32
  %42 = shl i32 %41, 4
  %43 = add i32 %42, 96
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %43)
  %45 = shl i32 %41, 4
  %46 = add i32 %45, 100
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %46)
  %48 = shl i32 %41, 4
  %49 = add i32 %48, 104
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %49)
  %51 = fmul float 4.000000e+00, %37
  %52 = fadd float %51, 1.000000e+00
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = shl i32 %55, 4
  %57 = add i32 %56, 96
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %57)
  %59 = shl i32 %55, 4
  %60 = add i32 %59, 100
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %60)
  %62 = shl i32 %55, 4
  %63 = add i32 %62, 104
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %63)
  %65 = fmul float 4.000000e+00, %37
  %66 = fadd float %65, 2.000000e+00
  %67 = fptosi float %66 to i32
  %68 = bitcast i32 %67 to float
  %69 = bitcast float %68 to i32
  %70 = shl i32 %69, 4
  %71 = add i32 %70, 96
  %72 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %71)
  %73 = shl i32 %69, 4
  %74 = add i32 %73, 100
  %75 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %74)
  %76 = shl i32 %69, 4
  %77 = add i32 %76, 104
  %78 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %77)
  %79 = fmul float 4.000000e+00, %37
  %80 = fadd float %79, 3.000000e+00
  %81 = fptosi float %80 to i32
  %82 = bitcast i32 %81 to float
  %83 = bitcast float %82 to i32
  %84 = shl i32 %83, 4
  %85 = add i32 %84, 96
  %86 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %85)
  %87 = shl i32 %83, 4
  %88 = add i32 %87, 100
  %89 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %88)
  %90 = shl i32 %83, 4
  %91 = add i32 %90, 104
  %92 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %91)
  %93 = fmul float %35, %35
  %94 = fmul float %44, -1.000000e+00
  %95 = fmul float %47, -1.000000e+00
  %96 = fmul float %50, -1.000000e+00
  %97 = fmul float %58, 3.000000e+00
  %98 = fadd float %97, %94
  %99 = fmul float %61, 3.000000e+00
  %100 = fadd float %99, %95
  %101 = fmul float %64, 3.000000e+00
  %102 = fadd float %101, %96
  %103 = fmul float %72, 3.000000e+00
  %104 = fmul float %75, 3.000000e+00
  %105 = fmul float %78, 3.000000e+00
  %106 = fsub float -0.000000e+00, %103
  %107 = fadd float %98, %106
  %108 = fsub float -0.000000e+00, %104
  %109 = fadd float %100, %108
  %110 = fsub float -0.000000e+00, %105
  %111 = fadd float %102, %110
  %112 = fadd float %107, %86
  %113 = fadd float %109, %89
  %114 = fadd float %111, %92
  %115 = fmul float 3.000000e+00, %112
  %116 = fmul float 3.000000e+00, %113
  %117 = fmul float 3.000000e+00, %114
  %118 = fmul float %35, %35
  %119 = fmul float %58, 5.000000e+00
  %120 = fmul float %61, 5.000000e+00
  %121 = fmul float %64, 5.000000e+00
  %122 = fsub float -0.000000e+00, %119
  %123 = fmul float %44, 2.000000e+00
  %124 = fadd float %123, %122
  %125 = fsub float -0.000000e+00, %120
  %126 = fmul float %47, 2.000000e+00
  %127 = fadd float %126, %125
  %128 = fsub float -0.000000e+00, %121
  %129 = fmul float %50, 2.000000e+00
  %130 = fadd float %129, %128
  %131 = fmul float %72, 4.000000e+00
  %132 = fadd float %131, %124
  %133 = fmul float %75, 4.000000e+00
  %134 = fadd float %133, %127
  %135 = fmul float %78, 4.000000e+00
  %136 = fadd float %135, %130
  %137 = fsub float -0.000000e+00, %86
  %138 = fadd float %132, %137
  %139 = fsub float -0.000000e+00, %89
  %140 = fadd float %134, %139
  %141 = fsub float -0.000000e+00, %92
  %142 = fadd float %136, %141
  %143 = fmul float 2.000000e+00, %138
  %144 = fmul float 2.000000e+00, %140
  %145 = fmul float 2.000000e+00, %142
  %146 = fsub float -0.000000e+00, %44
  %147 = fadd float %72, %146
  %148 = fsub float -0.000000e+00, %47
  %149 = fadd float %75, %148
  %150 = fsub float -0.000000e+00, %50
  %151 = fadd float %78, %150
  %152 = fmul float %143, %35
  %153 = fadd float %152, %147
  %154 = fmul float %144, %35
  %155 = fadd float %154, %149
  %156 = fmul float %145, %35
  %157 = fadd float %156, %151
  %158 = fmul float %115, %118
  %159 = fadd float %158, %153
  %160 = fmul float %116, %118
  %161 = fadd float %160, %155
  %162 = fmul float %117, %118
  %163 = fadd float %162, %157
  %164 = fmul float %159, 5.000000e-01
  %165 = fmul float %161, 5.000000e-01
  %166 = fmul float %163, 5.000000e-01
  %167 = fmul float %166, 1.000000e+00
  %168 = fmul float %164, 0.000000e+00
  %169 = fmul float %165, 0.000000e+00
  %170 = fsub float -0.000000e+00, %167
  %171 = fmul float %165, 0.000000e+00
  %172 = fadd float %171, %170
  %173 = fsub float -0.000000e+00, %168
  %174 = fmul float %166, 0.000000e+00
  %175 = fadd float %174, %173
  %176 = fsub float -0.000000e+00, %169
  %177 = fmul float %164, 1.000000e+00
  %178 = fadd float %177, %176
  %179 = fmul float %172, %172
  %180 = fmul float %175, %175
  %181 = fadd float %180, %179
  %182 = fmul float %178, %178
  %183 = fadd float %181, %182
  %184 = call float @llvm.AMDGPU.rsq(float %183)
  %185 = fmul float %172, %184
  %186 = fmul float %175, %184
  %187 = fmul float %178, %184
  %188 = fmul float %11, %36
  %189 = fsub float -0.000000e+00, %44
  %190 = fmul float %58, 3.000000e+00
  %191 = fadd float %190, %189
  %192 = fsub float -0.000000e+00, %47
  %193 = fmul float %61, 3.000000e+00
  %194 = fadd float %193, %192
  %195 = fsub float -0.000000e+00, %50
  %196 = fmul float %64, 3.000000e+00
  %197 = fadd float %196, %195
  %198 = fmul float %72, 3.000000e+00
  %199 = fmul float %75, 3.000000e+00
  %200 = fmul float %78, 3.000000e+00
  %201 = fsub float -0.000000e+00, %198
  %202 = fadd float %191, %201
  %203 = fsub float -0.000000e+00, %199
  %204 = fadd float %194, %203
  %205 = fsub float -0.000000e+00, %200
  %206 = fadd float %197, %205
  %207 = fadd float %202, %86
  %208 = fadd float %204, %89
  %209 = fadd float %206, %92
  %210 = fmul float %93, %35
  %211 = fmul float %58, 5.000000e+00
  %212 = fmul float %61, 5.000000e+00
  %213 = fmul float %64, 5.000000e+00
  %214 = fsub float -0.000000e+00, %211
  %215 = fmul float %44, 2.000000e+00
  %216 = fadd float %215, %214
  %217 = fsub float -0.000000e+00, %212
  %218 = fmul float %47, 2.000000e+00
  %219 = fadd float %218, %217
  %220 = fsub float -0.000000e+00, %213
  %221 = fmul float %50, 2.000000e+00
  %222 = fadd float %221, %220
  %223 = fmul float %72, 4.000000e+00
  %224 = fadd float %223, %216
  %225 = fmul float %75, 4.000000e+00
  %226 = fadd float %225, %219
  %227 = fmul float %78, 4.000000e+00
  %228 = fadd float %227, %222
  %229 = fsub float -0.000000e+00, %86
  %230 = fadd float %224, %229
  %231 = fsub float -0.000000e+00, %89
  %232 = fadd float %226, %231
  %233 = fsub float -0.000000e+00, %92
  %234 = fadd float %228, %233
  %235 = fsub float -0.000000e+00, %44
  %236 = fadd float %72, %235
  %237 = fsub float -0.000000e+00, %47
  %238 = fadd float %75, %237
  %239 = fsub float -0.000000e+00, %50
  %240 = fadd float %78, %239
  %241 = fmul float %58, 2.000000e+00
  %242 = fmul float %61, 2.000000e+00
  %243 = fmul float %64, 2.000000e+00
  %244 = fmul float %236, %35
  %245 = fadd float %244, %241
  %246 = fmul float %238, %35
  %247 = fadd float %246, %242
  %248 = fmul float %240, %35
  %249 = fadd float %248, %243
  %250 = fmul float %230, %93
  %251 = fadd float %250, %245
  %252 = fmul float %232, %93
  %253 = fadd float %252, %247
  %254 = fmul float %234, %93
  %255 = fadd float %254, %249
  %256 = fmul float %207, %210
  %257 = fadd float %256, %251
  %258 = fmul float %208, %210
  %259 = fadd float %258, %253
  %260 = fmul float %209, %210
  %261 = fadd float %260, %255
  %262 = fmul float %257, 5.000000e-01
  %263 = fmul float %259, 5.000000e-01
  %264 = fmul float %261, 5.000000e-01
  %265 = fmul float %185, %188
  %266 = fadd float %265, %262
  %267 = fmul float %186, %188
  %268 = fadd float %267, %263
  %269 = fmul float %187, %188
  %270 = fadd float %269, %264
  %271 = fmul float %16, %266
  %272 = fmul float %17, %266
  %273 = fmul float %18, %266
  %274 = fmul float %19, %266
  %275 = fmul float %20, %268
  %276 = fadd float %275, %271
  %277 = fmul float %21, %268
  %278 = fadd float %277, %272
  %279 = fmul float %22, %268
  %280 = fadd float %279, %273
  %281 = fmul float %23, %268
  %282 = fadd float %281, %274
  %283 = fmul float %24, %270
  %284 = fadd float %283, %276
  %285 = fmul float %25, %270
  %286 = fadd float %285, %278
  %287 = fmul float %26, %270
  %288 = fadd float %287, %280
  %289 = fmul float %27, %270
  %290 = fadd float %289, %282
  %291 = fadd float %284, %28
  %292 = fadd float %286, %29
  %293 = fadd float %288, %30
  %294 = fadd float %290, %31
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %12, float %13, float %14, float %15)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %291, float %292, float %293, float %294)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020107
c2028106
c2040105
c2048104
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0820700
bf8c000f
e00c2000
80010000
bf8c0770
100804f6
7e081104
34220884
4a0822ff
00000060
e0301000
80000804
d2820004
03d1ed02
7e081104
34240884
4a0824ff
00000060
e0301000
80000404
bf8c0770
080a1104
d2820006
03c9ed02
7e0c1106
34260c84
4a0c26ff
00000060
e0301000
80000606
bf8c0770
100e0cff
40a00000
06121108
080e0f09
d2820009
041ded04
7e0e02ff
40400000
d2820007
041ded02
7e0e1107
342a0e84
4a0e2aff
00000060
e0301000
80000707
bf8c0770
08120f09
06141309
d282000a
0416010a
10160cff
40400000
0816110b
7e2c02ff
c0400000
d2820008
042e2d04
06100f08
101810ff
40400000
10100100
d282000a
042a110c
102814f0
102e2880
4a1422ff
00000068
e0301000
8000100a
4a1424ff
00000068
e0301000
80000a0a
bf8c0770
0818210a
4a1a26ff
00000068
e0301000
80000d0d
bf8c0770
101c1aff
40a00000
061e2110
081c1d0f
d282000f
0439ed0a
4a1c2aff
00000068
e0301000
80000e0e
bf8c0770
081e1d0f
06301f0f
d2820018
04320118
10321aff
40400000
08202119
d2820019
04422d0a
06321d19
103232ff
40400000
d2820018
04621119
103230f0
10303280
082e2f18
4a2222ff
00000064
e0301000
80001b11
4a2224ff
00000064
e0301000
80001111
bf8c0770
08303711
4a2426ff
00000064
e0301000
80001312
bf8c0770
102426ff
40a00000
0634371b
0824251a
d2820012
0449ed11
4a2a2aff
00000064
e0301000
80001515
bf8c0770
08342b12
0624351a
d2820012
04620112
103826ff
40400000
0836371c
d2820016
046e2d11
062c2b16
102c2cff
40400000
d2820012
044a1116
102424f0
10242480
082c3312
10322d16
d2820019
04662f17
08242514
d2820014
04662512
7e285b14
102e2917
06262713
d2820013
044e0118
d2820018
044e111a
10260108
102222ff
40400000
0822231b
06222b11
d2820011
04622711
102222f0
c2020100
bf8c007f
102a0204
d2820011
04462b17
102c2916
060c0d06
d2820005
041a0105
d2820005
04161109
100808ff
40400000
0808090b
06080f04
d2820004
04162704
100808f0
d2820004
04122b16
c202010b
bf8c007f
100a0804
c202010f
bf8c007f
d2820005
04162204
100c2912
060e1b0d
d2820000
041e010c
d2820000
0402110f
100214ff
40400000
08020310
06021d01
d2820000
04022701
100000f0
d2820000
04022b06
c2020113
bf8c007f
d2820001
04160004
c2020117
bf8c007f
06020204
c202010a
bf8c007f
10040804
c202010e
bf8c007f
d2820002
040a2204
c2020112
bf8c007f
d2820002
040a0004
c2020116
bf8c007f
06040404
c2020109
bf8c007f
10060804
c202010d
bf8c007f
d2820003
040e2204
c2020111
bf8c007f
d2820003
040e0004
c2020115
bf8c007f
06060604
c2020108
bf8c007f
10080804
c202010c
bf8c007f
d2820004
04122204
c2020110
bf8c007f
d2820000
04120004
c2000114
bf8c007f
06000000
f80008cf
01020300
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %21 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %22 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %23 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %24 = fcmp ugt float %23, 0x3F80101020000000
  %25 = sext i1 %24 to i32
  %26 = trunc i32 %25 to i1
  %27 = select i1 %26, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %27)
  %28 = call i32 @llvm.SI.packf16(float %20, float %21)
  %29 = bitcast i32 %28 to float
  %30 = call i32 @llvm.SI.packf16(float %22, float %23)
  %31 = bitcast i32 %30 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

declare void @llvm.AMDGPU.kill(float)

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
SI CODE:
befe0a7e
befc0306
c8080300
c8090301
7e0602ff
3c008081
d0080000
02020702
d2000003
0001e4f3
7c260680
c80c0200
c80d0201
5e040503
c80c0100
c80d0101
c8100000
c8110001
5e000704
f8001c0f
02000200
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..10]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[8].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MOV OUT[1], TEMP[1]
 13: MOV OUT[0], TEMP[0]
 14: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = fmul float %46, %40
  %56 = fadd float %55, %37
  %57 = fmul float %47, %41
  %58 = fadd float %57, %38
  %59 = fmul float %48, %42
  %60 = fadd float %59, %39
  %61 = fmul float %20, %56
  %62 = fmul float %21, %56
  %63 = fmul float %22, %56
  %64 = fmul float %23, %56
  %65 = fmul float %24, %58
  %66 = fadd float %65, %61
  %67 = fmul float %25, %58
  %68 = fadd float %67, %62
  %69 = fmul float %26, %58
  %70 = fadd float %69, %63
  %71 = fmul float %27, %58
  %72 = fadd float %71, %64
  %73 = fmul float %28, %60
  %74 = fadd float %73, %66
  %75 = fmul float %29, %60
  %76 = fadd float %75, %68
  %77 = fmul float %30, %60
  %78 = fadd float %77, %70
  %79 = fmul float %31, %60
  %80 = fadd float %79, %72
  %81 = fadd float %74, %32
  %82 = fadd float %76, %33
  %83 = fadd float %78, %34
  %84 = fadd float %80, %35
  %85 = fmul float %52, %36
  %86 = fmul float %53, %36
  %87 = fmul float %54, %36
  %88 = fmul float %11, %85
  %89 = fmul float %12, %85
  %90 = fmul float %13, %85
  %91 = fmul float %14, %86
  %92 = fadd float %91, %88
  %93 = fmul float %15, %86
  %94 = fadd float %93, %89
  %95 = fmul float %16, %86
  %96 = fadd float %95, %90
  %97 = fmul float %17, %87
  %98 = fadd float %97, %92
  %99 = fmul float %18, %87
  %100 = fadd float %99, %94
  %101 = fmul float %19, %87
  %102 = fadd float %101, %96
  %103 = fmul float %98, %98
  %104 = fmul float %100, %100
  %105 = fadd float %104, %103
  %106 = fmul float %102, %102
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %98, %108
  %110 = fmul float %100, %108
  %111 = fmul float %102, %108
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %111, float %72)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020123
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820001
04060804
d2820002
041a0301
7e045b02
100c0501
100e0503
10100505
c0820700
bf8c007f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020113
bf8c007f
10120204
c2020117
bf8c007f
d2820009
04260004
f800020f
09060708
c202012a
c2028126
bf8c000f
7e0c0205
d2820002
04180904
c202011b
bf8c007f
d2820003
04260404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[9..10]
DCL CONST[2..8]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: DP4 TEMP[2].x, TEMP[1], CONST[7]
  5: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[5].yyyy
  6: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].xxxx
  7: MUL TEMP[2], TEMP[1], TEMP[2]
  8: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[3].xy, TEMP[3].xyyy
 10: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 11: DP4 TEMP[1].x, TEMP[1], CONST[6]
 12: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[5].xxxx
 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].wwww
 14: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].yzww
 15: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 16: RCP TEMP[4].x, TEMP[4].xxxx
 17: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 18: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 19: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 20: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 21: MAX TEMP[1].x, IN[2].wwww, CONST[3].wwww
 22: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 23: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[3].xyzz
 24: MOV OUT[0], TEMP[2]
 25: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %45 = load <32 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %55 = fmul float %13, %38
  %56 = fadd float %55, %39
  %57 = bitcast float %53 to i32
  %58 = bitcast float %54 to i32
  %59 = insertelement <2 x i32> undef, i32 %57, i32 0
  %60 = insertelement <2 x i32> %59, i32 %58, i32 1
  %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %41, <16 x i8> %43, i32 2)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = fmul float %62, %34
  %67 = fmul float %63, %35
  %68 = fadd float %66, %67
  %69 = fmul float %64, %36
  %70 = fadd float %68, %69
  %71 = fmul float %65, %37
  %72 = fadd float %70, %71
  %73 = fadd float %72, %29
  %74 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00)
  %75 = call float @llvm.AMDGPU.lrp(float %74, float %48, float 1.000000e+00)
  %76 = call float @llvm.AMDGPU.lrp(float %74, float %49, float 1.000000e+00)
  %77 = call float @llvm.AMDGPU.lrp(float %74, float %50, float 1.000000e+00)
  %78 = call float @llvm.AMDGPU.lrp(float %74, float %51, float 1.000000e+00)
  %79 = fmul float %62, %75
  %80 = fmul float %63, %76
  %81 = fmul float %64, %77
  %82 = fmul float %65, %78
  %83 = fmul float %12, %22
  %84 = fmul float %56, %23
  %85 = bitcast float %83 to i32
  %86 = bitcast float %84 to i32
  %87 = insertelement <2 x i32> undef, i32 %85, i32 0
  %88 = insertelement <2 x i32> %87, i32 %86, i32 1
  %89 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %45, <16 x i8> %47, i32 2)
  %90 = extractelement <4 x float> %89, i32 0
  %91 = extractelement <4 x float> %89, i32 1
  %92 = extractelement <4 x float> %89, i32 2
  %93 = extractelement <4 x float> %89, i32 3
  %94 = fmul float %62, %30
  %95 = fmul float %63, %31
  %96 = fadd float %94, %95
  %97 = fmul float %64, %32
  %98 = fadd float %96, %97
  %99 = fmul float %65, %33
  %100 = fadd float %98, %99
  %101 = fadd float %100, %28
  %102 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00)
  %103 = fmul float %102, %93
  %104 = fmul float %90, 0x3FCB367A00000000
  %105 = fmul float %91, 0x3FE6E2EB20000000
  %106 = fadd float %105, %104
  %107 = fmul float %92, 0x3FB27BB300000000
  %108 = fadd float %106, %107
  %109 = fcmp uge float %108, 0x3F50624DE0000000
  %110 = select i1 %109, float %108, float 0x3F50624DE0000000
  %111 = fdiv float 1.000000e+00, %110
  %112 = fmul float %90, %111
  %113 = fmul float %91, %111
  %114 = fmul float %92, %111
  %115 = fmul float %79, %90
  %116 = fmul float %80, %91
  %117 = fmul float %81, %92
  %118 = fmul float %103, %112
  %119 = fadd float %118, %115
  %120 = fmul float %103, %113
  %121 = fadd float %120, %116
  %122 = fmul float %103, %114
  %123 = fadd float %122, %117
  %124 = fmul float %119, 4.000000e+00
  %125 = fmul float %121, 4.000000e+00
  %126 = fmul float %123, 4.000000e+00
  %127 = fcmp uge float %52, %27
  %128 = select i1 %127, float %52, float %27
  %129 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00)
  %130 = call float @llvm.AMDGPU.lrp(float %129, float %124, float %24)
  %131 = call float @llvm.AMDGPU.lrp(float %129, float %125, float %25)
  %132 = call float @llvm.AMDGPU.lrp(float %129, float %126, float %26)
  %133 = call i32 @llvm.SI.packf16(float %130, float %131)
  %134 = bitcast i32 %133 to float
  %135 = call i32 @llvm.SI.packf16(float %132, float %82)
  %136 = bitcast i32 %135 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %134, float %136, float %134, float %136)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
c0840100
bf8c0070
c200091d
bf8c007f
10100a00
c200091c
bf8c007f
d2820008
04200104
c200091e
bf8c007f
d2820008
04200106
c200091f
bf8c007f
d2820008
04200107
c2000915
bf8c007f
06101000
d2060808
02010108
081210f2
c8280200
c8290201
d282000a
04261508
101c1506
c2000928
c2008929
bf8c007f
7e140201
d2820003
04280103
c2000909
bf8c007f
10160600
c2000908
bf8c007f
10140400
c0800304
c0c60508
bf8c007f
f0800f00
00030a0a
bf8c0770
101c190e
100414ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070b
7e0602ff
3d93dd98
d2820002
040a070c
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
101e050c
c2000919
bf8c007f
10060a00
c2000918
bf8c007f
d2820003
040c0104
c200091a
bf8c007f
d2820003
040c0106
c200091b
bf8c007f
d2820003
040c0107
c2000914
bf8c007f
06060600
d2060803
02010103
10061b03
d282000e
043a1f03
10201cf6
c8380700
c8390701
c200090f
bf8c007f
d00c0002
0200010e
7e1e0200
d200000e
000a1d0f
d206080e
0201010e
081e1cf2
c200090e
bf8c007f
10221e00
d2820010
0446210e
c8440300
c8450301
d2820011
04262308
10222307
5e202310
c8440100
c8450101
d2820011
04262308
10222305
10221711
1024050b
d2820011
04462503
102222f6
c200090d
bf8c007f
10241e00
d2820011
044a230e
c8480000
c8490001
d2820000
04262508
10000104
10001500
1002050a
d2820000
04020303
100000f6
c200090c
bf8c007f
10021e00
d2820000
0406010e
5e002300
f8001c0f
10001000
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[3], TEMP[3]
 16: MOV OUT[1], TEMP[2]
 17: MOV OUT[2], TEMP[0]
 18: MOV OUT[0], TEMP[1]
 19: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = fmul float %62, %51
  %71 = fadd float %70, %48
  %72 = fmul float %63, %52
  %73 = fadd float %72, %49
  %74 = fmul float %64, %53
  %75 = fadd float %74, %50
  %76 = fmul float %25, %71
  %77 = fmul float %26, %71
  %78 = fmul float %27, %71
  %79 = fmul float %28, %71
  %80 = fmul float %29, %73
  %81 = fadd float %80, %76
  %82 = fmul float %30, %73
  %83 = fadd float %82, %77
  %84 = fmul float %31, %73
  %85 = fadd float %84, %78
  %86 = fmul float %32, %73
  %87 = fadd float %86, %79
  %88 = fmul float %33, %75
  %89 = fadd float %88, %81
  %90 = fmul float %34, %75
  %91 = fadd float %90, %83
  %92 = fmul float %35, %75
  %93 = fadd float %92, %85
  %94 = fmul float %36, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %37
  %97 = fadd float %91, %38
  %98 = fadd float %93, %39
  %99 = fadd float %95, %40
  %100 = fmul float %11, %71
  %101 = fmul float %12, %71
  %102 = fmul float %13, %71
  %103 = fmul float %14, %71
  %104 = fmul float %15, %73
  %105 = fadd float %104, %100
  %106 = fmul float %16, %73
  %107 = fadd float %106, %101
  %108 = fmul float %17, %73
  %109 = fadd float %108, %102
  %110 = fmul float %18, %73
  %111 = fadd float %110, %103
  %112 = fmul float %19, %75
  %113 = fadd float %112, %105
  %114 = fmul float %20, %75
  %115 = fadd float %114, %107
  %116 = fmul float %21, %75
  %117 = fadd float %116, %109
  %118 = fadd float %113, %22
  %119 = fadd float %115, %23
  %120 = fadd float %117, %24
  %121 = fsub float -0.000000e+00, %56
  %122 = fadd float %118, %121
  %123 = fsub float -0.000000e+00, %57
  %124 = fadd float %119, %123
  %125 = fsub float -0.000000e+00, %58
  %126 = fadd float %120, %125
  %127 = fmul float %98, %54
  %128 = fadd float %127, %55
  %129 = fmul float %68, %44
  %130 = fadd float %129, %46
  %131 = fmul float %69, %45
  %132 = fadd float %131, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %130, float %132, float %109, float %111)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xy, IN[2].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: DP4 TEMP[1].x, TEMP[0], CONST[5]
  3: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[3].yyyy
  4: LRP TEMP[1], TEMP[1].xxxx, IN[0], IMM[0].xxxx
  5: MUL TEMP[0], TEMP[0], TEMP[1]
  6: MAX TEMP[1].x, IN[1].wwww, CONST[1].wwww
  7: MOV_SAT TEMP[1].x, TEMP[1].xxxx
  8: MUL TEMP[0], TEMP[0], TEMP[1].xxxx
  9: MOV OUT[0], TEMP[0]
 10: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 28)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %28 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !0
  %30 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = bitcast float %37 to i32
  %40 = bitcast float %38 to i32
  %41 = insertelement <2 x i32> undef, i32 %39, i32 0
  %42 = insertelement <2 x i32> %41, i32 %40, i32 1
  %43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %29, <16 x i8> %31, i32 2)
  %44 = extractelement <4 x float> %43, i32 0
  %45 = extractelement <4 x float> %43, i32 1
  %46 = extractelement <4 x float> %43, i32 2
  %47 = extractelement <4 x float> %43, i32 3
  %48 = fmul float %44, %24
  %49 = fmul float %45, %25
  %50 = fadd float %48, %49
  %51 = fmul float %46, %26
  %52 = fadd float %50, %51
  %53 = fmul float %47, %27
  %54 = fadd float %52, %53
  %55 = fadd float %54, %23
  %56 = call float @llvm.AMDIL.clamp.(float %55, float 0.000000e+00, float 1.000000e+00)
  %57 = call float @llvm.AMDGPU.lrp(float %56, float %32, float 1.000000e+00)
  %58 = call float @llvm.AMDGPU.lrp(float %56, float %33, float 1.000000e+00)
  %59 = call float @llvm.AMDGPU.lrp(float %56, float %34, float 1.000000e+00)
  %60 = call float @llvm.AMDGPU.lrp(float %56, float %35, float 1.000000e+00)
  %61 = fmul float %44, %57
  %62 = fmul float %45, %58
  %63 = fmul float %46, %59
  %64 = fmul float %47, %60
  %65 = fcmp uge float %36, %22
  %66 = select i1 %65, float %36, float %22
  %67 = call float @llvm.AMDIL.clamp.(float %66, float 0.000000e+00, float 1.000000e+00)
  %68 = fmul float %61, %67
  %69 = fmul float %62, %67
  %70 = fmul float %63, %67
  %71 = fmul float %64, %67
  %72 = call i32 @llvm.SI.packf16(float %68, float %69)
  %73 = bitcast i32 %72 to float
  %74 = call i32 @llvm.SI.packf16(float %70, float %71)
  %75 = bitcast i32 %74 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %73, float %75, float %73, float %75)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0900
c80d0901
c8080800
c8090801
c0840300
c0c60500
bf8c007f
f0800f00
00430202
c0800100
bf8c0070
c2020115
bf8c007f
100c0604
c2020114
bf8c007f
d2820006
04180902
c2020116
bf8c007f
d2820006
04180904
c2020117
bf8c007f
d2820006
04180905
c202010d
bf8c007f
060c0c04
d2060806
02010106
080e0cf2
c8200300
c8210301
d2820008
041e1106
10121105
c8200700
c8210701
c2000107
bf8c007f
d00c0002
02000108
7e140200
d2000008
000a110a
d2060808
02010108
10121109
c8280200
c8290201
d282000a
041e1506
10141504
1014110a
5e12130a
c8280100
c8290101
d282000a
041e1506
10141503
1014110a
c82c0000
c82d0001
d2820000
041e1706
10000102
10001100
5e001500
f8001c0f
09000900
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..2], LOCAL
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2], CONST[0], TEMP[0].xxxx
  6: MAD TEMP[2], CONST[1], TEMP[0].yyyy, TEMP[2]
  7: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[2]
  8: ADD TEMP[0].xyz, TEMP[0], CONST[3]
  9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[12].xyzz
 10: MAD TEMP[2].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 11: MOV TEMP[0].w, TEMP[2].xxxx
 12: MAD TEMP[2].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 13: MOV OUT[3], TEMP[2]
 14: MOV OUT[1], CONST[8]
 15: MOV OUT[2], TEMP[0]
 16: MOV OUT[0], TEMP[1]
 17: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = fmul float %63, %52
  %72 = fadd float %71, %49
  %73 = fmul float %64, %53
  %74 = fadd float %73, %50
  %75 = fmul float %65, %54
  %76 = fadd float %75, %51
  %77 = fmul float %25, %72
  %78 = fmul float %26, %72
  %79 = fmul float %27, %72
  %80 = fmul float %28, %72
  %81 = fmul float %29, %74
  %82 = fadd float %81, %77
  %83 = fmul float %30, %74
  %84 = fadd float %83, %78
  %85 = fmul float %31, %74
  %86 = fadd float %85, %79
  %87 = fmul float %32, %74
  %88 = fadd float %87, %80
  %89 = fmul float %33, %76
  %90 = fadd float %89, %82
  %91 = fmul float %34, %76
  %92 = fadd float %91, %84
  %93 = fmul float %35, %76
  %94 = fadd float %93, %86
  %95 = fmul float %36, %76
  %96 = fadd float %95, %88
  %97 = fadd float %90, %37
  %98 = fadd float %92, %38
  %99 = fadd float %94, %39
  %100 = fadd float %96, %40
  %101 = fmul float %11, %72
  %102 = fmul float %12, %72
  %103 = fmul float %13, %72
  %104 = fmul float %14, %72
  %105 = fmul float %15, %74
  %106 = fadd float %105, %101
  %107 = fmul float %16, %74
  %108 = fadd float %107, %102
  %109 = fmul float %17, %74
  %110 = fadd float %109, %103
  %111 = fmul float %18, %74
  %112 = fadd float %111, %104
  %113 = fmul float %19, %76
  %114 = fadd float %113, %106
  %115 = fmul float %20, %76
  %116 = fadd float %115, %108
  %117 = fmul float %21, %76
  %118 = fadd float %117, %110
  %119 = fadd float %114, %22
  %120 = fadd float %116, %23
  %121 = fadd float %118, %24
  %122 = fsub float -0.000000e+00, %55
  %123 = fadd float %119, %122
  %124 = fsub float -0.000000e+00, %56
  %125 = fadd float %120, %124
  %126 = fsub float -0.000000e+00, %57
  %127 = fadd float %121, %126
  %128 = fmul float %99, %58
  %129 = fadd float %128, %59
  %130 = fmul float %69, %45
  %131 = fadd float %130, %47
  %132 = fmul float %70, %46
  %133 = fadd float %132, %48
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %125, float %127, float %129)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %131, float %133, float %110, float %112)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %98, float %99, float %100)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020132
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020131
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020130
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MUL TEMP[0], IN[0], TEMP[0]
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %24, %35
  %40 = fmul float %25, %36
  %41 = fmul float %26, %37
  %42 = fmul float %27, %38
  %43 = call i32 @llvm.SI.packf16(float %39, float %40)
  %44 = bitcast i32 %43 to float
  %45 = call i32 @llvm.SI.packf16(float %41, float %42)
  %46 = bitcast i32 %45 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0b06
c81c0200
c81d0201
100e0907
5e0c0d07
c81c0100
c81d0101
100e0707
c8200000
c8210001
10000508
5e000f00
f8001c0f
06000600
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..6]
DCL TEMP[0..1], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MAD TEMP[1], IN[1].xyxy, CONST[5], CONST[6]
  5: MOV OUT[2], TEMP[1]
  6: MOV OUT[1], CONST[4]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %39 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0
  %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %5)
  %42 = extractelement <4 x float> %41, i32 0
  %43 = extractelement <4 x float> %41, i32 1
  %44 = extractelement <4 x float> %41, i32 2
  %45 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %5)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = fmul float %11, %42
  %51 = fmul float %12, %42
  %52 = fmul float %13, %42
  %53 = fmul float %14, %42
  %54 = fmul float %15, %43
  %55 = fadd float %54, %50
  %56 = fmul float %16, %43
  %57 = fadd float %56, %51
  %58 = fmul float %17, %43
  %59 = fadd float %58, %52
  %60 = fmul float %18, %43
  %61 = fadd float %60, %53
  %62 = fmul float %19, %44
  %63 = fadd float %62, %55
  %64 = fmul float %20, %44
  %65 = fadd float %64, %57
  %66 = fmul float %21, %44
  %67 = fadd float %66, %59
  %68 = fmul float %22, %44
  %69 = fadd float %68, %61
  %70 = fadd float %63, %23
  %71 = fadd float %65, %24
  %72 = fadd float %67, %25
  %73 = fadd float %69, %26
  %74 = fmul float %48, %31
  %75 = fadd float %74, %35
  %76 = fmul float %49, %32
  %77 = fadd float %76, %36
  %78 = fmul float %48, %33
  %79 = fadd float %78, %37
  %80 = fmul float %49, %34
  %81 = fadd float %80, %38
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float %28, float %29, float %30)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %77, float %79, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %71, float %72, float %73)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020113
c2028112
c2040111
c2048110
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840704
bf8c000f
e00c2000
80020100
c2020117
c202811b
bf8c0070
7e0a0205
d2820005
04140902
c2020116
c202811a
bf8c007f
7e0c0205
d2820006
04180901
c2020115
c2028119
bf8c007f
7e0e0205
d2820007
041c0902
c2020114
c2028118
bf8c007f
7e100205
d2820001
04200901
f800021f
05060701
c0820700
bf8c000f
e00c2000
80010000
c2020103
bf8c0070
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[4..5]
DCL CONST[2..3]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[5].xxxx, CONST[5].yyyy
  2: MOV TEMP[1].xy, IN[2].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MUL TEMP[1], IN[1], TEMP[1]
  5: MAX TEMP[2].x, IN[3].zzzz, CONST[3].wwww
  6: MOV_SAT TEMP[2].x, TEMP[2].xxxx
  7: MUL TEMP[1], TEMP[1], TEMP[2].xxxx
  8: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[2].xy, TEMP[2].xyyy
 10: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
 11: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].zzzz, CONST[4].wwww
 12: RCP TEMP[2].x, TEMP[2].xxxx
 13: ADD TEMP[2].x, TEMP[2].xxxx, -IN[3].xxxx
 14: MUL_SAT TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy
 15: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx
 16: MOV TEMP[1].w, TEMP[2].xxxx
 17: MOV OUT[0], TEMP[1]
 18: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %29 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %30 = load <32 x i8> addrspace(2)* %29, !tbaa !0
  %31 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %34 = load <32 x i8> addrspace(2)* %33, !tbaa !0
  %35 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %36 = load <16 x i8> addrspace(2)* %35, !tbaa !0
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %46 = fmul float %13, %27
  %47 = fadd float %46, %28
  %48 = bitcast float %41 to i32
  %49 = bitcast float %42 to i32
  %50 = insertelement <2 x i32> undef, i32 %48, i32 0
  %51 = insertelement <2 x i32> %50, i32 %49, i32 1
  %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %30, <16 x i8> %32, i32 2)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = extractelement <4 x float> %52, i32 1
  %55 = extractelement <4 x float> %52, i32 2
  %56 = extractelement <4 x float> %52, i32 3
  %57 = fmul float %37, %53
  %58 = fmul float %38, %54
  %59 = fmul float %39, %55
  %60 = fmul float %40, %56
  %61 = fcmp uge float %45, %24
  %62 = select i1 %61, float %45, float %24
  %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
  %64 = fmul float %57, %63
  %65 = fmul float %58, %63
  %66 = fmul float %59, %63
  %67 = fmul float %60, %63
  %68 = fmul float %12, %22
  %69 = fmul float %47, %23
  %70 = bitcast float %68 to i32
  %71 = bitcast float %69 to i32
  %72 = insertelement <2 x i32> undef, i32 %70, i32 0
  %73 = insertelement <2 x i32> %72, i32 %71, i32 1
  %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %34, <16 x i8> %36, i32 2)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = fmul float %75, %25
  %77 = fadd float %76, %26
  %78 = fdiv float 1.000000e+00, %77
  %79 = fsub float -0.000000e+00, %43
  %80 = fadd float %78, %79
  %81 = fmul float %80, %44
  %82 = call float @llvm.AMDIL.clamp.(float %81, float 0.000000e+00, float 1.000000e+00)
  %83 = fmul float %67, %82
  %84 = call i32 @llvm.SI.packf16(float %64, float %65)
  %85 = bitcast i32 %84 to float
  %86 = call i32 @llvm.SI.packf16(float %66, float %83)
  %87 = bitcast i32 %86 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %85, float %87, float %85, float %87)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140500
c8150501
c8100400
c8110401
c0840300
c0c60500
bf8c007f
f0800f00
00430504
c8100100
c8110101
bf8c0770
10080d04
c8240a00
c8250a01
c0840100
bf8c007f
c200090f
bf8c007f
d00c000c
02000109
7e140200
d2000009
0032130a
d206080a
02010109
10081504
c8240000
c8250001
10120b09
10121509
5e080909
c8240200
c8250201
10120f09
10121509
c82c0300
c82d0301
100a110b
100a1505
c8180800
c8190801
c2000914
c2008915
bf8c007f
7e0e0201
d2820003
041c0103
c2000909
bf8c007f
10100600
c2000908
bf8c007f
100e0400
c0800304
c0c60508
bf8c007f
f0800100
00030207
c2000912
c2008913
bf8c0070
7e060201
d2820002
040c0102
7e045502
08040d02
c80c0900
c80d0901
10000702
d2060800
02010100
10000105
5e000109
f8001c0f
00040004
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..24]
DCL TEMP[0..5], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     6.2820,    -1.0000,     0.0000}
IMM[1] INT32 {0, 1, 2, 3}
  0: SGE TEMP[0].x, IN[0].wwww, CONST[11].yyyy
  1: F2I TEMP[0].x, -TEMP[0]
  2: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
  3: SGE TEMP[1].x, IN[0].wwww, CONST[11].zzzz
  4: F2I TEMP[1].x, -TEMP[1]
  5: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx
  6: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
  7: F2I TEMP[0].x, TEMP[0].xxxx
  8: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
  9: I2F TEMP[2].y, TEMP[1].yyyy
 10: CMP TEMP[2].x, TEMP[2].yyyy, CONST[20].yyyy, CONST[20].xxxx
 11: I2F TEMP[3].z, TEMP[1].zzzz
 12: CMP TEMP[2].x, TEMP[3].zzzz, CONST[20].zzzz, TEMP[2].xxxx
 13: I2F TEMP[1].w, TEMP[1].wwww
 14: CMP TEMP[2].x, TEMP[1].wwww, CONST[20].wwww, TEMP[2].xxxx
 15: USEQ TEMP[1].yzw, TEMP[0].xxxx, IMM[1]
 16: I2F TEMP[4].y, TEMP[1].yyyy
 17: CMP TEMP[3].x, TEMP[4].yyyy, CONST[11].yyyy, CONST[11].xxxx
 18: I2F TEMP[4].z, TEMP[1].zzzz
 19: CMP TEMP[3].x, TEMP[4].zzzz, CONST[11].zzzz, TEMP[3].xxxx
 20: I2F TEMP[1].w, TEMP[1].wwww
 21: CMP TEMP[3].x, TEMP[1].wwww, CONST[11].wwww, TEMP[3].xxxx
 22: ADD TEMP[1].x, IN[0].wwww, -TEMP[3].xxxx
 23: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 24: USEQ TEMP[2].yzw, TEMP[0].xxxx, IMM[1]
 25: I2F TEMP[4].y, TEMP[2].yyyy
 26: CMP TEMP[3].x, TEMP[4].yyyy, CONST[16].yyyy, CONST[16].xxxx
 27: I2F TEMP[4].z, TEMP[2].zzzz
 28: CMP TEMP[3].x, TEMP[4].zzzz, CONST[16].zzzz, TEMP[3].xxxx
 29: I2F TEMP[2].w, TEMP[2].wwww
 30: CMP TEMP[3].x, TEMP[2].wwww, CONST[16].wwww, TEMP[3].xxxx
 31: UADD TEMP[2].x, TEMP[0].xxxx, IMM[1].yyyy
 32: USEQ TEMP[2].yzw, TEMP[2].xxxx, IMM[1]
 33: I2F TEMP[5].y, TEMP[2].yyyy
 34: CMP TEMP[4].x, TEMP[5].yyyy, CONST[16].yyyy, CONST[16].xxxx
 35: I2F TEMP[5].z, TEMP[2].zzzz
 36: CMP TEMP[4].x, TEMP[5].zzzz, CONST[16].zzzz, TEMP[4].xxxx
 37: I2F TEMP[2].w, TEMP[2].wwww
 38: CMP TEMP[4].x, TEMP[2].wwww, CONST[16].wwww, TEMP[4].xxxx
 39: ADD TEMP[2].xy, IN[1].xyyy, -CONST[17].xyyy
 40: LRP TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx, TEMP[3].xxxx
 41: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xxxx
 42: ADD TEMP[3].x, IMM[0].xxxx, -IN[2].xxxx
 43: MUL TEMP[3].xy, CONST[18].xyyy, TEMP[3].xxxx
 44: MAD TEMP[3].xy, CONST[18].zwww, IN[2].xxxx, TEMP[3].xyyy
 45: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[3].xyyy
 46: LRP TEMP[3].x, IN[2].zzzz, CONST[17].wwww, CONST[17].zzzz
 47: MUL TEMP[4].x, IMM[0].yyyy, IN[2].yyyy
 48: MAD TEMP[3].x, TEMP[3].xxxx, IN[0].wwww, TEMP[4].xxxx
 49: SIN TEMP[4].x, TEMP[3].xxxx
 50: COS TEMP[3].x, TEMP[3].xxxx
 51: MOV TEMP[4].y, TEMP[3].xxxx
 52: MUL TEMP[5].xy, TEMP[4].xyyy, IMM[0].zzzz
 53: MOV TEMP[4].z, TEMP[5].yyxy
 54: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[3].xxxx
 55: MAD TEMP[2].xy, TEMP[2].yxxx, TEMP[4].xzzz, TEMP[3].xyyy
 56: MUL TEMP[3].xyz, CONST[21].xyzz, TEMP[2].xxxx
 57: MAD TEMP[2].xyz, CONST[22].xyzz, TEMP[2].yyyy, TEMP[3].xyzz
 58: ADD TEMP[2].xyz, IN[0].xyzz, TEMP[2].xyzz
 59: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 60: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 61: MAD TEMP[3], CONST[2], TEMP[2].zzzz, TEMP[3]
 62: ADD TEMP[3], TEMP[3], CONST[3]
 63: UADD TEMP[4].x, TEMP[0].xxxx, IMM[1].yyyy
 64: UADD TEMP[0].x, TEMP[0].xxxx, IMM[1].zzzz
 65: UARL ADDR[0].x, TEMP[4].xxxx
 66: UARL ADDR[0].x, TEMP[4].xxxx
 67: MOV TEMP[4], CONST[ADDR[0].x+11]
 68: UARL ADDR[0].x, TEMP[0].xxxx
 69: LRP TEMP[0], TEMP[1].xxxx, CONST[ADDR[0].x+11], TEMP[4]
 70: MUL TEMP[0], TEMP[0], CONST[9]
 71: MUL TEMP[1].x, IN[2].wwww, CONST[19].zzzz
 72: FLR TEMP[1].xy, TEMP[1].xxxx
 73: MUL TEMP[1].xy, CONST[19].xxxx, TEMP[1].xyyy
 74: FLR TEMP[4].xy, TEMP[1].xyyy
 75: MOV TEMP[5].yw, TEMP[4].yxyy
 76: ADD TEMP[1].xy, TEMP[1].xyyy, -TEMP[4].xyyy
 77: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[19].wwww
 78: FLR TEMP[1].xy, TEMP[1].xyyy
 79: MOV TEMP[5].xz, TEMP[1].xxyx
 80: ADD TEMP[1], IN[1].xyxy, TEMP[5]
 81: MUL TEMP[1], TEMP[1], CONST[19].xyxy
 82: MUL TEMP[4], CONST[4], TEMP[2].xxxx
 83: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4]
 84: MAD TEMP[2], CONST[6], TEMP[2].zzzz, TEMP[4]
 85: ADD TEMP[2].z, TEMP[2], CONST[7]
 86: MAD TEMP[2].x, TEMP[2].zzzz, CONST[8].xxxx, CONST[8].yyyy
 87: MOV TEMP[2].y, CONST[8].zzzz
 88: MAD TEMP[4].x, TEMP[3].zzzz, CONST[10].xxxx, CONST[10].yyyy
 89: MOV TEMP[4].z, TEMP[4].xxxx
 90: MOV TEMP[4].xy, TEMP[2].xyxx
 91: MOV OUT[2], TEMP[1]
 92: MOV OUT[3], TEMP[4]
 93: MOV OUT[1], TEMP[0]
 94: MOV OUT[0], TEMP[3]
 95: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 268)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 280)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 284)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 288)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 292)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 296)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 300)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 304)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 308)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 312)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 316)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 320)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 324)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 328)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 332)
  %66 = call float @llvm.SI.load.const(<16 x i8> %10, i32 336)
  %67 = call float @llvm.SI.load.const(<16 x i8> %10, i32 340)
  %68 = call float @llvm.SI.load.const(<16 x i8> %10, i32 344)
  %69 = call float @llvm.SI.load.const(<16 x i8> %10, i32 352)
  %70 = call float @llvm.SI.load.const(<16 x i8> %10, i32 356)
  %71 = call float @llvm.SI.load.const(<16 x i8> %10, i32 360)
  %72 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %5)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = extractelement <4 x float> %74, i32 1
  %77 = extractelement <4 x float> %74, i32 2
  %78 = extractelement <4 x float> %74, i32 3
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0
  %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %5)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = extractelement <4 x float> %86, i32 2
  %90 = extractelement <4 x float> %86, i32 3
  %91 = fcmp uge float %78, %43
  %92 = select i1 %91, float 1.000000e+00, float 0.000000e+00
  %93 = fsub float -0.000000e+00, %92
  %94 = fptosi float %93 to i32
  %95 = bitcast i32 %94 to float
  %96 = bitcast float %95 to i32
  %97 = and i32 %96, 1065353216
  %98 = bitcast i32 %97 to float
  %99 = fcmp uge float %78, %44
  %100 = select i1 %99, float 1.000000e+00, float 0.000000e+00
  %101 = fsub float -0.000000e+00, %100
  %102 = fptosi float %101 to i32
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %103 to i32
  %105 = and i32 %104, 1065353216
  %106 = bitcast i32 %105 to float
  %107 = fadd float %98, %106
  %108 = fptosi float %107 to i32
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = icmp eq i32 %110, 1
  %112 = sext i1 %111 to i32
  %113 = bitcast float %109 to i32
  %114 = icmp eq i32 %113, 2
  %115 = sext i1 %114 to i32
  %116 = bitcast float %109 to i32
  %117 = icmp eq i32 %116, 3
  %118 = sext i1 %117 to i32
  %119 = bitcast i32 %112 to float
  %120 = bitcast i32 %115 to float
  %121 = bitcast i32 %118 to float
  %122 = bitcast float %119 to i32
  %123 = sitofp i32 %122 to float
  %124 = call float @llvm.AMDGPU.cndlt(float %123, float %63, float %62)
  %125 = bitcast float %120 to i32
  %126 = sitofp i32 %125 to float
  %127 = call float @llvm.AMDGPU.cndlt(float %126, float %64, float %124)
  %128 = bitcast float %121 to i32
  %129 = sitofp i32 %128 to float
  %130 = call float @llvm.AMDGPU.cndlt(float %129, float %65, float %127)
  %131 = bitcast float %109 to i32
  %132 = icmp eq i32 %131, 1
  %133 = sext i1 %132 to i32
  %134 = bitcast float %109 to i32
  %135 = icmp eq i32 %134, 2
  %136 = sext i1 %135 to i32
  %137 = bitcast float %109 to i32
  %138 = icmp eq i32 %137, 3
  %139 = sext i1 %138 to i32
  %140 = bitcast i32 %133 to float
  %141 = bitcast i32 %136 to float
  %142 = bitcast i32 %139 to float
  %143 = bitcast float %140 to i32
  %144 = sitofp i32 %143 to float
  %145 = call float @llvm.AMDGPU.cndlt(float %144, float %43, float %42)
  %146 = bitcast float %141 to i32
  %147 = sitofp i32 %146 to float
  %148 = call float @llvm.AMDGPU.cndlt(float %147, float %44, float %145)
  %149 = bitcast float %142 to i32
  %150 = sitofp i32 %149 to float
  %151 = call float @llvm.AMDGPU.cndlt(float %150, float %45, float %148)
  %152 = fsub float -0.000000e+00, %151
  %153 = fadd float %78, %152
  %154 = fmul float %153, %130
  %155 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00)
  %156 = bitcast float %109 to i32
  %157 = icmp eq i32 %156, 1
  %158 = sext i1 %157 to i32
  %159 = bitcast float %109 to i32
  %160 = icmp eq i32 %159, 2
  %161 = sext i1 %160 to i32
  %162 = bitcast float %109 to i32
  %163 = icmp eq i32 %162, 3
  %164 = sext i1 %163 to i32
  %165 = bitcast i32 %158 to float
  %166 = bitcast i32 %161 to float
  %167 = bitcast i32 %164 to float
  %168 = bitcast float %165 to i32
  %169 = sitofp i32 %168 to float
  %170 = call float @llvm.AMDGPU.cndlt(float %169, float %47, float %46)
  %171 = bitcast float %166 to i32
  %172 = sitofp i32 %171 to float
  %173 = call float @llvm.AMDGPU.cndlt(float %172, float %48, float %170)
  %174 = bitcast float %167 to i32
  %175 = sitofp i32 %174 to float
  %176 = call float @llvm.AMDGPU.cndlt(float %175, float %49, float %173)
  %177 = bitcast float %109 to i32
  %178 = add i32 %177, 1
  %179 = bitcast i32 %178 to float
  %180 = bitcast float %179 to i32
  %181 = icmp eq i32 %180, 1
  %182 = sext i1 %181 to i32
  %183 = bitcast float %179 to i32
  %184 = icmp eq i32 %183, 2
  %185 = sext i1 %184 to i32
  %186 = bitcast float %179 to i32
  %187 = icmp eq i32 %186, 3
  %188 = sext i1 %187 to i32
  %189 = bitcast i32 %182 to float
  %190 = bitcast i32 %185 to float
  %191 = bitcast i32 %188 to float
  %192 = bitcast float %189 to i32
  %193 = sitofp i32 %192 to float
  %194 = call float @llvm.AMDGPU.cndlt(float %193, float %47, float %46)
  %195 = bitcast float %190 to i32
  %196 = sitofp i32 %195 to float
  %197 = call float @llvm.AMDGPU.cndlt(float %196, float %48, float %194)
  %198 = bitcast float %191 to i32
  %199 = sitofp i32 %198 to float
  %200 = call float @llvm.AMDGPU.cndlt(float %199, float %49, float %197)
  %201 = fsub float -0.000000e+00, %50
  %202 = fadd float %82, %201
  %203 = fsub float -0.000000e+00, %51
  %204 = fadd float %83, %203
  %205 = call float @llvm.AMDGPU.lrp(float %155, float %200, float %176)
  %206 = fmul float %202, %205
  %207 = fmul float %204, %205
  %208 = fsub float -0.000000e+00, %87
  %209 = fadd float 1.000000e+00, %208
  %210 = fmul float %54, %209
  %211 = fmul float %55, %209
  %212 = fmul float %56, %87
  %213 = fadd float %212, %210
  %214 = fmul float %57, %87
  %215 = fadd float %214, %211
  %216 = fmul float %206, %213
  %217 = fmul float %207, %215
  %218 = call float @llvm.AMDGPU.lrp(float %89, float %53, float %52)
  %219 = fmul float 0x401920C4A0000000, %88
  %220 = fmul float %218, %78
  %221 = fadd float %220, %219
  %222 = call float @llvm.sin.f32(float %221)
  %223 = call float @llvm.cos.f32(float %221)
  %224 = fmul float %222, -1.000000e+00
  %225 = fmul float %216, %223
  %226 = fmul float %217, %223
  %227 = fmul float %217, %222
  %228 = fadd float %227, %225
  %229 = fmul float %216, %224
  %230 = fadd float %229, %226
  %231 = fmul float %66, %228
  %232 = fmul float %67, %228
  %233 = fmul float %68, %228
  %234 = fmul float %69, %230
  %235 = fadd float %234, %231
  %236 = fmul float %70, %230
  %237 = fadd float %236, %232
  %238 = fmul float %71, %230
  %239 = fadd float %238, %233
  %240 = fadd float %75, %235
  %241 = fadd float %76, %237
  %242 = fadd float %77, %239
  %243 = fmul float %11, %240
  %244 = fmul float %12, %240
  %245 = fmul float %13, %240
  %246 = fmul float %14, %240
  %247 = fmul float %15, %241
  %248 = fadd float %247, %243
  %249 = fmul float %16, %241
  %250 = fadd float %249, %244
  %251 = fmul float %17, %241
  %252 = fadd float %251, %245
  %253 = fmul float %18, %241
  %254 = fadd float %253, %246
  %255 = fmul float %19, %242
  %256 = fadd float %255, %248
  %257 = fmul float %20, %242
  %258 = fadd float %257, %250
  %259 = fmul float %21, %242
  %260 = fadd float %259, %252
  %261 = fmul float %22, %242
  %262 = fadd float %261, %254
  %263 = fadd float %256, %23
  %264 = fadd float %258, %24
  %265 = fadd float %260, %25
  %266 = fadd float %262, %26
  %267 = bitcast float %109 to i32
  %268 = add i32 %267, 1
  %269 = bitcast i32 %268 to float
  %270 = bitcast float %109 to i32
  %271 = add i32 %270, 2
  %272 = bitcast i32 %271 to float
  %273 = bitcast float %269 to i32
  %274 = shl i32 %273, 4
  %275 = add i32 %274, 176
  %276 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %275)
  %277 = shl i32 %273, 4
  %278 = add i32 %277, 180
  %279 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %278)
  %280 = shl i32 %273, 4
  %281 = add i32 %280, 184
  %282 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %281)
  %283 = shl i32 %273, 4
  %284 = add i32 %283, 188
  %285 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %284)
  %286 = bitcast float %272 to i32
  %287 = shl i32 %286, 4
  %288 = add i32 %287, 176
  %289 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %288)
  %290 = call float @llvm.AMDGPU.lrp(float %155, float %289, float %276)
  %291 = shl i32 %286, 4
  %292 = add i32 %291, 180
  %293 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %292)
  %294 = call float @llvm.AMDGPU.lrp(float %155, float %293, float %279)
  %295 = shl i32 %286, 4
  %296 = add i32 %295, 184
  %297 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %296)
  %298 = call float @llvm.AMDGPU.lrp(float %155, float %297, float %282)
  %299 = shl i32 %286, 4
  %300 = add i32 %299, 188
  %301 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %300)
  %302 = call float @llvm.AMDGPU.lrp(float %155, float %301, float %285)
  %303 = fmul float %290, %36
  %304 = fmul float %294, %37
  %305 = fmul float %298, %38
  %306 = fmul float %302, %39
  %307 = fmul float %90, %60
  %308 = call float @floor(float %307)
  %309 = call float @floor(float %307)
  %310 = fmul float %58, %308
  %311 = fmul float %58, %309
  %312 = call float @floor(float %310)
  %313 = call float @floor(float %311)
  %314 = fsub float -0.000000e+00, %312
  %315 = fadd float %310, %314
  %316 = fsub float -0.000000e+00, %313
  %317 = fadd float %311, %316
  %318 = fmul float %315, %61
  %319 = fmul float %317, %61
  %320 = call float @floor(float %318)
  %321 = call float @floor(float %319)
  %322 = fadd float %82, %320
  %323 = fadd float %83, %312
  %324 = fadd float %82, %321
  %325 = fadd float %83, %313
  %326 = fmul float %322, %58
  %327 = fmul float %323, %59
  %328 = fmul float %324, %58
  %329 = fmul float %325, %59
  %330 = fmul float %27, %240
  %331 = fmul float %28, %240
  %332 = fmul float %29, %241
  %333 = fadd float %332, %330
  %334 = fmul float %30, %241
  %335 = fadd float %334, %331
  %336 = fmul float %31, %242
  %337 = fadd float %336, %333
  %338 = fadd float %337, %32
  %339 = fmul float %338, %33
  %340 = fadd float %339, %34
  %341 = fmul float %265, %40
  %342 = fadd float %341, %41
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %303, float %304, float %305, float %306)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %326, float %327, float %328, float %329)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %340, float %35, float %342, float %335)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %263, float %264, float %265, float %266)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.sin.f32(float) #3

; Function Attrs: nounwind readonly
declare float @llvm.cos.f32(float) #3

; Function Attrs: readonly
declare float @floor(float) #4

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }
attributes #4 = { readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840700
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c206012e
bf8c007f
d00c0004
02001904
d2000005
0011e480
d2060005
22010105
7e0a1105
360a0af2
c206812d
bf8c007f
d00c0004
02001b04
d2000006
0011e480
d2060006
22010106
7e0c1106
360c0cf2
060a0b06
7e0e1105
d1040004
02010307
d2000005
00118280
7e0a0b05
d0080004
02020a80
c2040151
bf8c007f
7e0a0208
c2040150
bf8c007f
7e0c0208
d2000005
00120b06
d1040008
02010507
d2000006
00218280
7e0c0b06
d0080008
02020c80
c2050152
bf8c007f
7e0c020a
d2000005
00220d05
d104000a
02010707
d2000006
00298280
7e0c0b06
d008000a
02020c80
c2070153
bf8c007f
7e0c020e
d2000005
002a0d05
7e0c020d
c206812c
bf8c007f
7e10020d
d2000006
00120d08
7e10020c
d2000006
00221106
c206012f
bf8c007f
7e10020c
d2000006
002a1106
080c0d04
100a0b06
d2060805
02010105
081e0af2
4a0c0e81
34100c84
4a1210ff
000000bc
e0301000
80000909
bf8c0770
1012130f
4a0e0e82
340e0e84
4a140eff
000000bc
e0301000
80000a0a
bf8c0770
d2820009
04261505
c2060127
bf8c007f
1012120c
4a1410ff
000000b8
e0301000
80000a0a
bf8c0770
1014150f
4a160eff
000000b8
e0301000
80000b0b
bf8c0770
d282000a
042a1705
c2060126
bf8c007f
1014140c
4a1610ff
000000b4
e0301000
80000b0b
bf8c0770
1016170f
4a180eff
000000b4
e0301000
80000c0c
bf8c0770
d282000b
042e1905
c2060125
bf8c007f
1016160c
4a1010ff
000000b0
e0301000
80000808
bf8c0770
1010110f
4a0e0eff
000000b0
e0301000
80000707
bf8c0770
d2820007
04220f05
c2060124
bf8c007f
100e0e0c
f800020f
090a0b07
c0860704
bf8c000f
e00c2000
80030b00
c0860708
bf8c0070
e00c2000
80030700
c203014e
bf8c0070
10001406
7e004900
c203014c
bf8c007f
10200006
7e224910
0600230c
c203814d
bf8c007f
10000007
08202310
c203814f
bf8c007f
10202007
7e204910
0620210b
10202006
f800021f
00100010
c2030141
bf8c000f
7e200206
c2030140
bf8c007f
7e240206
d2000000
00122112
c2020142
bf8c007f
7e220204
d2000013
00222300
c2020143
bf8c007f
7e000204
d2000013
002a0113
101e270f
d1040004
02010306
d2000013
00118280
7e260b13
d0080004
02022680
d2000010
00122112
d1040004
02010506
d2000012
00118280
7e240b12
d0080004
02022480
d2000010
00122310
d1040004
02010706
d2000006
00118280
7e0c0b06
d0080004
02020c80
d2000000
00120110
d2820000
043e0105
c2020144
bf8c007f
0a0a1604
100a0105
080c0ef2
c2020148
bf8c007f
101e0c04
c202014a
bf8c007f
d282000f
043e0e04
100a1f05
081e12f2
c2020146
bf8c007f
101e1e04
c2020147
bf8c007f
d282000f
043c0909
102010ff
40c90625
d282000f
0442090f
10201eff
3e22f983
7e1e6b10
10221f05
c2020145
bf8c007f
0a161804
1000010b
c2020149
bf8c007f
100c0c04
c202014b
bf8c007f
d2820006
041a0e04
10000d00
7e0c6d10
100e0d00
080e2307
100a0d05
d2820008
04161f00
c2020155
bf8c007f
10001004
c2020159
bf8c007f
d2820000
04020e04
06000102
c2020154
bf8c007f
100a1004
c2020158
bf8c007f
d2820005
04160e04
060a0b01
c2020113
bf8c007f
100c0a04
c2020117
bf8c007f
d2820006
041a0004
c2020102
bf8c007f
10120a04
c2020106
bf8c007f
d2820009
04260004
c2020156
bf8c007f
10101004
c202015a
bf8c007f
d2820007
04220e04
06020f03
c202010a
bf8c007f
d2820002
04260204
c202010e
bf8c007f
06040404
c2020128
c2028129
bf8c007f
7e060205
d2820003
040c0902
c2020112
bf8c007f
10080a04
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120204
c202011e
bf8c007f
06080804
c2020120
c2028121
bf8c007f
7e0e0205
d2820004
041c0904
c2020122
bf8c007f
7e0e0204
f800022f
06030704
c2020103
bf8c000f
10060a04
c2020107
bf8c007f
d2820003
040e0004
c202010b
bf8c007f
d2820003
040e0204
c202010f
bf8c007f
06060604
c2020101
bf8c007f
10080a04
c2020105
bf8c007f
d2820004
04120004
c2020109
bf8c007f
d2820004
04120204
c202010d
bf8c007f
06080804
c2020100
bf8c007f
100a0a04
c2020104
bf8c007f
d2820000
04160004
c2020108
bf8c007f
d2820000
04020204
c200010c
bf8c007f
06000000
f80008cf
03020400
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[14..15]
DCL CONST[5..13]
DCL TEMP[0..1]
DCL TEMP[2..6], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,    -0.0100}
IMM[1] FLT32 {    0.0000,    -0.5000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[15].xxxx, CONST[15].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: UIF TEMP[1].xxxx :3
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :3
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[4].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: DP3 TEMP[4].x, IN[5].xyzz, IN[5].xyzz
 11: RSQ TEMP[4].x, TEMP[4].xxxx
 12: MUL TEMP[4].xyz, IN[5].xyzz, TEMP[4].xxxx
 13: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xxxx
 14: DP3 TEMP[4].x, TEMP[2].xyzz, IN[3].xyzz
 15: MUL TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz
 16: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 17: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 18: MOV TEMP[2].xyz, TEMP[2].xyzz
 19: TEX TEMP[2], TEMP[2], SAMP[1], CUBE
 20: DP4 TEMP[4].x, TEMP[3], CONST[13]
 21: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[11].yyyy
 22: LRP TEMP[4], TEMP[4].xxxx, IN[2], IMM[0].yyyy
 23: MUL TEMP[4], TEMP[3], TEMP[4]
 24: MUL TEMP[5].xyz, TEMP[2].xyzz, TEMP[2].wwww
 25: DP4 TEMP[3].x, TEMP[3], CONST[14]
 26: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[11].zzzz
 27: MAD TEMP[2].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 28: MAX TEMP[3].x, IN[3].wwww, CONST[7].wwww
 29: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 30: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[7].xyzz
 31: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 32: MOV TEMP[5].xy, TEMP[3].xyyy
 33: TEX TEMP[5].x, TEMP[5], SAMP[2], 2D
 34: MAD TEMP[5].x, TEMP[5].xxxx, CONST[6].zzzz, CONST[6].wwww
 35: RCP TEMP[5].x, TEMP[5].xxxx
 36: ADD TEMP[5].x, TEMP[5].xxxx, -IN[4].zzzz
 37: MUL_SAT TEMP[5].x, TEMP[5].xxxx, IN[4].wwww
 38: MUL TEMP[4].x, TEMP[4].wwww, TEMP[5].xxxx
 39: MOV TEMP[2].w, TEMP[4].xxxx
 40: MAD TEMP[5].xy, CONST[9].xzzz, TEMP[4].xxxx, CONST[9].ywww
 41: ADD TEMP[6].x, TEMP[5].xxxx, IMM[0].wwww
 42: SLT TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx
 43: F2I TEMP[6].x, -TEMP[6]
 44: UIF TEMP[6].xxxx :3
 45:   KILL
 46: ENDIF
 47: MUL TEMP[6].xy, TEMP[3].xyyy, CONST[8].yzzz
 48: MOV TEMP[6].xy, TEMP[6].xyyy
 49: TEX TEMP[6].xy, TEMP[6], SAMP[3], 2D
 50: ADD TEMP[6].xy, TEMP[6].xyyy, IMM[1].yyyy
 51: MUL TEMP[4].x, CONST[8].xxxx, TEMP[4].xxxx
 52: MAD TEMP[3].xy, TEMP[6].xyyy, TEMP[4].xxxx, TEMP[3].xyyy
 53: MOV TEMP[3].xy, TEMP[3].xyyy
 54: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 55: MUL TEMP[2], TEMP[2], TEMP[5].xxxx
 56: MAD TEMP[2], TEMP[3], TEMP[5].yyyy, TEMP[2]
 57: MOV OUT[0], TEMP[2]
 58: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %58 = load <32 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
  %63 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
  %67 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = fcmp ugt float %16, 0.000000e+00
  %70 = select i1 %69, float 1.000000e+00, float 0.000000e+00
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %86 = fmul float %13, %47
  %87 = fadd float %86, %48
  %88 = call float @llvm.AMDIL.clamp.(float %70, float 0.000000e+00, float 1.000000e+00)
  %89 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %90 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %91 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %92 = bitcast float %88 to i32
  %93 = icmp ne i32 %92, 0
  %. = select i1 %93, float -1.000000e+00, float 1.000000e+00
  %94 = bitcast float %79 to i32
  %95 = bitcast float %80 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %50, <16 x i8> %52, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = extractelement <4 x float> %98, i32 3
  %103 = fmul float %83, %83
  %104 = fmul float %84, %84
  %105 = fadd float %104, %103
  %106 = fmul float %85, %85
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %83, %108
  %110 = fmul float %84, %108
  %111 = fmul float %85, %108
  %112 = fmul float %109, %.
  %113 = fmul float %110, %.
  %114 = fmul float %111, %.
  %115 = fmul float %112, %75
  %116 = fmul float %113, %76
  %117 = fadd float %116, %115
  %118 = fmul float %114, %77
  %119 = fadd float %117, %118
  %120 = fmul float %119, %112
  %121 = fmul float %119, %113
  %122 = fmul float %119, %114
  %123 = fmul float 2.000000e+00, %120
  %124 = fmul float 2.000000e+00, %121
  %125 = fmul float 2.000000e+00, %122
  %126 = fsub float -0.000000e+00, %123
  %127 = fadd float %75, %126
  %128 = fsub float -0.000000e+00, %124
  %129 = fadd float %76, %128
  %130 = fsub float -0.000000e+00, %125
  %131 = fadd float %77, %130
  %132 = insertelement <4 x float> undef, float %127, i32 0
  %133 = insertelement <4 x float> %132, float %129, i32 1
  %134 = insertelement <4 x float> %133, float %131, i32 2
  %135 = insertelement <4 x float> %134, float 0.000000e+00, i32 3
  %136 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %135)
  %137 = extractelement <4 x float> %136, i32 0
  %138 = extractelement <4 x float> %136, i32 1
  %139 = extractelement <4 x float> %136, i32 2
  %140 = extractelement <4 x float> %136, i32 3
  %141 = call float @fabs(float %139)
  %142 = fdiv float 1.000000e+00, %141
  %143 = fmul float %137, %142
  %144 = fadd float %143, 1.500000e+00
  %145 = fmul float %138, %142
  %146 = fadd float %145, 1.500000e+00
  %147 = bitcast float %146 to i32
  %148 = bitcast float %144 to i32
  %149 = bitcast float %140 to i32
  %150 = insertelement <4 x i32> undef, i32 %147, i32 0
  %151 = insertelement <4 x i32> %150, i32 %148, i32 1
  %152 = insertelement <4 x i32> %151, i32 %149, i32 2
  %153 = insertelement <4 x i32> %152, i32 undef, i32 3
  %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %153, <32 x i8> %54, <16 x i8> %56, i32 4)
  %155 = extractelement <4 x float> %154, i32 0
  %156 = extractelement <4 x float> %154, i32 1
  %157 = extractelement <4 x float> %154, i32 2
  %158 = extractelement <4 x float> %154, i32 3
  %159 = fmul float %99, %39
  %160 = fmul float %100, %40
  %161 = fadd float %159, %160
  %162 = fmul float %101, %41
  %163 = fadd float %161, %162
  %164 = fmul float %102, %42
  %165 = fadd float %163, %164
  %166 = fadd float %165, %37
  %167 = call float @llvm.AMDIL.clamp.(float %166, float 0.000000e+00, float 1.000000e+00)
  %168 = call float @llvm.AMDGPU.lrp(float %167, float %71, float 1.000000e+00)
  %169 = call float @llvm.AMDGPU.lrp(float %167, float %72, float 1.000000e+00)
  %170 = call float @llvm.AMDGPU.lrp(float %167, float %73, float 1.000000e+00)
  %171 = call float @llvm.AMDGPU.lrp(float %167, float %74, float 1.000000e+00)
  %172 = fmul float %99, %168
  %173 = fmul float %100, %169
  %174 = fmul float %101, %170
  %175 = fmul float %102, %171
  %176 = fmul float %155, %158
  %177 = fmul float %156, %158
  %178 = fmul float %157, %158
  %179 = fmul float %99, %43
  %180 = fmul float %100, %44
  %181 = fadd float %179, %180
  %182 = fmul float %101, %45
  %183 = fadd float %181, %182
  %184 = fmul float %102, %46
  %185 = fadd float %183, %184
  %186 = fadd float %185, %38
  %187 = call float @llvm.AMDIL.clamp.(float %186, float 0.000000e+00, float 1.000000e+00)
  %188 = fmul float %176, %187
  %189 = fadd float %188, %172
  %190 = fmul float %177, %187
  %191 = fadd float %190, %173
  %192 = fmul float %178, %187
  %193 = fadd float %192, %174
  %194 = fcmp uge float %78, %29
  %195 = select i1 %194, float %78, float %29
  %196 = call float @llvm.AMDIL.clamp.(float %195, float 0.000000e+00, float 1.000000e+00)
  %197 = call float @llvm.AMDGPU.lrp(float %196, float %189, float %26)
  %198 = call float @llvm.AMDGPU.lrp(float %196, float %191, float %27)
  %199 = call float @llvm.AMDGPU.lrp(float %196, float %193, float %28)
  %200 = fmul float %12, %22
  %201 = fmul float %87, %23
  %202 = bitcast float %200 to i32
  %203 = bitcast float %201 to i32
  %204 = insertelement <2 x i32> undef, i32 %202, i32 0
  %205 = insertelement <2 x i32> %204, i32 %203, i32 1
  %206 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %205, <32 x i8> %58, <16 x i8> %60, i32 2)
  %207 = extractelement <4 x float> %206, i32 0
  %208 = fmul float %207, %24
  %209 = fadd float %208, %25
  %210 = fdiv float 1.000000e+00, %209
  %211 = fsub float -0.000000e+00, %81
  %212 = fadd float %210, %211
  %213 = fmul float %212, %82
  %214 = call float @llvm.AMDIL.clamp.(float %213, float 0.000000e+00, float 1.000000e+00)
  %215 = fmul float %175, %214
  %216 = fmul float %33, %215
  %217 = fadd float %216, %34
  %218 = fmul float %35, %215
  %219 = fadd float %218, %36
  %220 = fadd float %217, 0xBF847AE140000000
  %221 = fcmp ult float %220, 0.000000e+00
  %222 = select i1 %221, float 1.000000e+00, float 0.000000e+00
  %223 = fsub float -0.000000e+00, %222
  %224 = fptosi float %223 to i32
  %225 = bitcast i32 %224 to float
  %226 = bitcast float %225 to i32
  %227 = icmp ne i32 %226, 0
  br i1 %227, label %IF29, label %ENDIF28

IF29:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF28

ENDIF28:                                          ; preds = %main_body, %IF29
  %228 = fmul float %200, %31
  %229 = fmul float %201, %32
  %230 = bitcast float %228 to i32
  %231 = bitcast float %229 to i32
  %232 = insertelement <2 x i32> undef, i32 %230, i32 0
  %233 = insertelement <2 x i32> %232, i32 %231, i32 1
  %234 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %233, <32 x i8> %62, <16 x i8> %64, i32 2)
  %235 = extractelement <4 x float> %234, i32 0
  %236 = extractelement <4 x float> %234, i32 1
  %237 = fadd float %235, -5.000000e-01
  %238 = fadd float %236, -5.000000e-01
  %239 = fmul float %30, %215
  %240 = fmul float %237, %239
  %241 = fadd float %240, %200
  %242 = fmul float %238, %239
  %243 = fadd float %242, %201
  %244 = bitcast float %241 to i32
  %245 = bitcast float %243 to i32
  %246 = insertelement <2 x i32> undef, i32 %244, i32 0
  %247 = insertelement <2 x i32> %246, i32 %245, i32 1
  %248 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %247, <32 x i8> %66, <16 x i8> %68, i32 2)
  %249 = extractelement <4 x float> %248, i32 0
  %250 = extractelement <4 x float> %248, i32 1
  %251 = extractelement <4 x float> %248, i32 2
  %252 = extractelement <4 x float> %248, i32 3
  %253 = fmul float %197, %217
  %254 = fmul float %198, %217
  %255 = fmul float %199, %217
  %256 = fmul float %215, %217
  %257 = fmul float %249, %219
  %258 = fadd float %257, %253
  %259 = fmul float %250, %219
  %260 = fadd float %259, %254
  %261 = fmul float %251, %219
  %262 = fadd float %261, %255
  %263 = fmul float %252, %219
  %264 = fadd float %263, %256
  %265 = call i32 @llvm.SI.packf16(float %258, float %260)
  %266 = bitcast i32 %265 to float
  %267 = call i32 @llvm.SI.packf16(float %262, float %264)
  %268 = bitcast i32 %267 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %266, float %268, float %266, float %268)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c81c0c00
c81d0c01
100a0f07
d2820008
04160d06
c8140e00
c8150e01
d2820008
04220b05
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000009
0021e6f2
100c1306
10081107
10081304
c81c0400
c81d0401
10160f04
c8280500
c8290501
d282000b
042e1506
100a1105
100a1305
c8200600
c8210601
d2820009
042e1105
10160d09
d2820006
042e0d09
081a0d0a
100c0909
d2820004
041a0909
08180907
10080b09
d2820004
04120b09
081c0908
7e1e0280
d28a0005
043a1b0c
d28c0004
043a1b0c
d28e0006
043a1b0c
d2880007
043a1b0c
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840304
c0c60508
bf8c007f
f0800f00
00430605
bf8c0770
10081308
c82c0900
c82d0901
c8280800
c8290801
c0840300
c0c60500
bf8c007f
f0800f00
00430a0a
c0840100
bf8c0070
c2000935
bf8c007f
100a1600
c2000934
bf8c007f
d2820005
0414010a
c2000936
bf8c007f
d2820005
0414010c
c2000937
bf8c007f
d2820005
0414010d
c200092d
bf8c007f
060a0a00
d206080e
02010105
081e1cf2
c8140200
c8150201
d2820005
043e0b0e
100a0b0c
c2000939
bf8c007f
10201600
c2000938
bf8c007f
d2820010
0440010a
c200093a
bf8c007f
d2820010
0440010c
c200093b
bf8c007f
d2820010
0440010d
c200092e
bf8c007f
06202000
d2060810
02010110
d2820004
04162104
c8140700
c8150701
c200091f
bf8c007f
d00c000c
02000105
7e220200
d2000005
00320b11
d2060811
02010105
082422f2
c200091e
bf8c007f
100a2400
d2820004
04160911
100a1307
c84c0100
c84d0101
d2820013
043e270e
1026270b
d2820005
044e2105
c200091d
bf8c007f
10262400
d2820005
044e0b11
100c1306
c81c0000
c81d0001
d2820007
043e0f0e
100e0f0a
d2820006
041e2106
c200091c
bf8c007f
100e2400
d2820006
041e0d11
c81c0300
c81d0301
d2820007
043e0f0e
10120f0d
c8280a00
c8290a01
c200093c
c200893d
bf8c007f
7e0e0201
d2820003
041c0103
c2000915
bf8c007f
10100600
c2000914
bf8c007f
100e0400
c0860308
c0c80510
bf8c007f
f0800100
00640207
c200091a
c200891b
bf8c0070
7e060201
d2820002
040c0102
7e045502
08041502
c80c0b00
c80d0b01
10000702
d2060800
02010100
10040109
c2000926
c2008927
bf8c007f
7e000201
d2820000
04020400
c2000924
c2008925
bf8c007f
7e020201
d2820001
04060400
060602ff
bc23d70a
d0020000
02010103
d2000003
0001e480
d2060003
22010103
7e061103
d10a0006
02010103
c0860310
c0c80520
c080030c
c0cc0518
c2020922
c2028921
c2040920
bf8c007f
7e140204
7e120205
7e060208
be842406
8984047e
7e1602f3
7c261680
88fe047e
10161508
10141307
f0800300
00060b0a
bf8c0770
061218f1
10060503
d282000a
04220709
061616f1
d2820009
041e070b
f0800f00
00640709
10040302
bf8c0770
d2820002
040a010a
10060304
d2820003
040e0109
5e040503
10060305
d2820003
040e0108
10020306
d2820000
04060107
5e000700
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..20]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[15].xyzz, CONST[14].xyzz
  1: ADD TEMP[1].x, TEMP[0].xxxx, TEMP[0].yyyy
  2: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[0].zzzz
  3: MOV TEMP[1].y, TEMP[0].yyyy
  4: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[20].zzzz
  5: MAD TEMP[1].xy, CONST[19].xxxx, CONST[20].xxxx, TEMP[1].xyyy
  6: SIN TEMP[2].x, TEMP[1].xxxx
  7: SIN TEMP[2].y, TEMP[1].yyyy
  8: COS TEMP[3].x, TEMP[1].xxxx
  9: COS TEMP[3].y, TEMP[1].yyyy
 10: MUL TEMP[1].xyz, IN[1].xyzz, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[20].yyyy
 12: MAD TEMP[0].xyz, TEMP[1].xyzz, TEMP[3].yyyy, TEMP[0].xyzz
 13: MAD TEMP[1].x, CONST[20].yyyy, TEMP[2].yyyy, TEMP[0].yyyy
 14: MOV TEMP[4].x, TEMP[2].xxxx
 15: MOV TEMP[4].y, TEMP[3].yyyy
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx
 17: MOV TEMP[4].z, TEMP[2].xxxx
 18: MAD TEMP[2].xyz, TEMP[4].xyzz, CONST[20].wwww, IN[1].xyzz
 19: MUL TEMP[3], CONST[8], TEMP[0].xxxx
 20: MAD TEMP[3], CONST[9], TEMP[1].xxxx, TEMP[3]
 21: MAD TEMP[3], CONST[10], TEMP[0].zzzz, TEMP[3]
 22: ADD TEMP[3], TEMP[3], CONST[11]
 23: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz
 24: RSQ TEMP[4].x, TEMP[4].xxxx
 25: MUL TEMP[4].xyz, TEMP[2].xyzz, TEMP[4].xxxx
 26: MUL TEMP[2].xyz, TEMP[4].xyzz, CONST[18].wwww
 27: MUL TEMP[4], CONST[0], TEMP[2].xxxx
 28: MAD TEMP[4], CONST[1], TEMP[2].yyyy, TEMP[4]
 29: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[4]
 30: MUL TEMP[4], CONST[0], TEMP[0].xxxx
 31: MAD TEMP[4], CONST[1], TEMP[1].xxxx, TEMP[4]
 32: MAD TEMP[4], CONST[2], TEMP[0].zzzz, TEMP[4]
 33: ADD TEMP[4].xyz, TEMP[4], CONST[3]
 34: ADD TEMP[4].xyz, TEMP[4].xyzz, -CONST[16].xyzz
 35: MAD TEMP[5].x, TEMP[3].zzzz, CONST[17].xxxx, CONST[17].yyyy
 36: MOV TEMP[4].w, TEMP[5].xxxx
 37: MUL TEMP[5], CONST[4], TEMP[0].xxxx
 38: MAD TEMP[1], CONST[5], TEMP[1].xxxx, TEMP[5]
 39: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
 40: ADD TEMP[0].z, TEMP[0], CONST[7]
 41: MAD TEMP[0].x, TEMP[0].zzzz, CONST[18].xxxx, CONST[18].yyyy
 42: MOV TEMP[0].y, CONST[18].zzzz
 43: MAD TEMP[1].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
 44: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 45: RSQ TEMP[5].x, TEMP[5].xxxx
 46: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 47: MOV TEMP[1].zw, TEMP[0].yyxy
 48: MOV OUT[4], TEMP[2]
 49: MOV OUT[3], TEMP[1]
 50: MOV OUT[1], CONST[12]
 51: MOV OUT[2], TEMP[4]
 52: MOV OUT[0], TEMP[3]
 53: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 204)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 288)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 292)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 296)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 300)
  %66 = call float @llvm.SI.load.const(<16 x i8> %10, i32 304)
  %67 = call float @llvm.SI.load.const(<16 x i8> %10, i32 320)
  %68 = call float @llvm.SI.load.const(<16 x i8> %10, i32 324)
  %69 = call float @llvm.SI.load.const(<16 x i8> %10, i32 328)
  %70 = call float @llvm.SI.load.const(<16 x i8> %10, i32 332)
  %71 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %5)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = extractelement <4 x float> %73, i32 2
  %77 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %78 = load <16 x i8> addrspace(2)* %77, !tbaa !0
  %79 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %78, i32 0, i32 %5)
  %80 = extractelement <4 x float> %79, i32 0
  %81 = extractelement <4 x float> %79, i32 1
  %82 = extractelement <4 x float> %79, i32 2
  %83 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
  %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %5)
  %86 = extractelement <4 x float> %85, i32 0
  %87 = extractelement <4 x float> %85, i32 1
  %88 = fmul float %74, %54
  %89 = fadd float %88, %51
  %90 = fmul float %75, %55
  %91 = fadd float %90, %52
  %92 = fmul float %76, %56
  %93 = fadd float %92, %53
  %94 = fadd float %89, %91
  %95 = fadd float %94, %93
  %96 = fmul float %95, %69
  %97 = fmul float %91, %69
  %98 = fmul float %66, %67
  %99 = fadd float %98, %96
  %100 = fmul float %66, %67
  %101 = fadd float %100, %97
  %102 = call float @llvm.sin.f32(float %99)
  %103 = call float @llvm.sin.f32(float %101)
  %104 = call float @llvm.cos.f32(float %99)
  %105 = call float @llvm.cos.f32(float %101)
  %106 = fmul float %80, %102
  %107 = fmul float %81, %102
  %108 = fmul float %82, %102
  %109 = fmul float %106, %68
  %110 = fmul float %107, %68
  %111 = fmul float %108, %68
  %112 = fmul float %109, %105
  %113 = fadd float %112, %89
  %114 = fmul float %110, %105
  %115 = fadd float %114, %91
  %116 = fmul float %111, %105
  %117 = fadd float %116, %93
  %118 = fmul float %68, %103
  %119 = fadd float %118, %115
  %120 = fmul float %102, %104
  %121 = fmul float %102, %70
  %122 = fadd float %121, %80
  %123 = fmul float %105, %70
  %124 = fadd float %123, %81
  %125 = fmul float %120, %70
  %126 = fadd float %125, %82
  %127 = fmul float %27, %113
  %128 = fmul float %28, %113
  %129 = fmul float %29, %113
  %130 = fmul float %30, %113
  %131 = fmul float %31, %119
  %132 = fadd float %131, %127
  %133 = fmul float %32, %119
  %134 = fadd float %133, %128
  %135 = fmul float %33, %119
  %136 = fadd float %135, %129
  %137 = fmul float %34, %119
  %138 = fadd float %137, %130
  %139 = fmul float %35, %117
  %140 = fadd float %139, %132
  %141 = fmul float %36, %117
  %142 = fadd float %141, %134
  %143 = fmul float %37, %117
  %144 = fadd float %143, %136
  %145 = fmul float %38, %117
  %146 = fadd float %145, %138
  %147 = fadd float %140, %39
  %148 = fadd float %142, %40
  %149 = fadd float %144, %41
  %150 = fadd float %146, %42
  %151 = fmul float %122, %122
  %152 = fmul float %124, %124
  %153 = fadd float %152, %151
  %154 = fmul float %126, %126
  %155 = fadd float %153, %154
  %156 = call float @llvm.AMDGPU.rsq(float %155)
  %157 = fmul float %122, %156
  %158 = fmul float %124, %156
  %159 = fmul float %126, %156
  %160 = fmul float %157, %65
  %161 = fmul float %158, %65
  %162 = fmul float %159, %65
  %163 = fmul float %11, %160
  %164 = fmul float %12, %160
  %165 = fmul float %13, %160
  %166 = fmul float %14, %161
  %167 = fadd float %166, %163
  %168 = fmul float %15, %161
  %169 = fadd float %168, %164
  %170 = fmul float %16, %161
  %171 = fadd float %170, %165
  %172 = fmul float %17, %162
  %173 = fadd float %172, %167
  %174 = fmul float %18, %162
  %175 = fadd float %174, %169
  %176 = fmul float %19, %162
  %177 = fadd float %176, %171
  %178 = fmul float %11, %113
  %179 = fmul float %12, %113
  %180 = fmul float %13, %113
  %181 = fmul float %14, %119
  %182 = fadd float %181, %178
  %183 = fmul float %15, %119
  %184 = fadd float %183, %179
  %185 = fmul float %16, %119
  %186 = fadd float %185, %180
  %187 = fmul float %17, %117
  %188 = fadd float %187, %182
  %189 = fmul float %18, %117
  %190 = fadd float %189, %184
  %191 = fmul float %19, %117
  %192 = fadd float %191, %186
  %193 = fadd float %188, %20
  %194 = fadd float %190, %21
  %195 = fadd float %192, %22
  %196 = fsub float -0.000000e+00, %57
  %197 = fadd float %193, %196
  %198 = fsub float -0.000000e+00, %58
  %199 = fadd float %194, %198
  %200 = fsub float -0.000000e+00, %59
  %201 = fadd float %195, %200
  %202 = fmul float %149, %60
  %203 = fadd float %202, %61
  %204 = fmul float %23, %113
  %205 = fmul float %24, %119
  %206 = fadd float %205, %204
  %207 = fmul float %25, %117
  %208 = fadd float %207, %206
  %209 = fadd float %208, %26
  %210 = fmul float %209, %62
  %211 = fadd float %210, %63
  %212 = fmul float %86, %47
  %213 = fadd float %212, %49
  %214 = fmul float %87, %48
  %215 = fadd float %214, %50
  %216 = fmul float %173, %173
  %217 = fmul float %175, %175
  %218 = fadd float %217, %216
  %219 = fmul float %177, %177
  %220 = fadd float %218, %219
  %221 = call float @llvm.AMDGPU.rsq(float %220)
  %222 = fmul float %173, %221
  %223 = fmul float %175, %221
  %224 = fmul float %177, %221
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %43, float %44, float %45, float %46)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %197, float %199, float %201, float %203)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %213, float %215, float %211, float %64)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %222, float %223, float %224, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %147, float %148, float %149, float %150)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: nounwind readonly
declare float @llvm.sin.f32(float) #2

; Function Attrs: nounwind readonly
declare float @llvm.cos.f32(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #3

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readonly }
attributes #3 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020133
c2028132
c2040131
c2048130
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202013d
c2028139
bf8c0070
7e020205
d2820001
04040904
c202013c
c2028138
bf8c007f
7e040205
d2820002
04080903
060e0302
c202013e
c202813a
bf8c007f
7e100205
d2820003
04200905
06080707
c2028152
bf8c007f
100a0805
c204014c
c2020150
bf8c007f
7e080204
d2820005
04160808
10140aff
3e22f983
7e0a6b0a
c0860704
bf8c007f
e00c2000
80030600
bf8c0770
10160b07
c2020151
bf8c007f
10181604
10160205
d2820004
042e0808
100808ff
3e22f983
7e166d04
d2820001
0406170c
7e086b04
d2820001
04060804
10080b06
10080804
d2820002
040a1704
c2028122
bf8c007f
10080405
c2028126
bf8c007f
d2820004
04120205
10180b08
10181804
d2820003
040e170c
c202012a
bf8c007f
d2820004
04120604
c202012e
bf8c007f
06080804
c2020144
c2028145
bf8c007f
7e180205
d282000c
04300904
c2020102
bf8c007f
101a0404
c2028106
bf8c007f
d282000d
04360205
c204010a
bf8c007f
d282000d
04360608
c204810e
bf8c007f
061a1a09
c2048142
bf8c007f
0a1a1a09
c2048101
bf8c007f
101c0409
c2058105
bf8c007f
d282000e
043a020b
c2050109
bf8c007f
d282000e
043a060a
c206010d
bf8c007f
061c1c0c
c2060141
bf8c007f
0a1c1c0c
c2060100
bf8c007f
101e040c
c2068104
bf8c007f
d282000f
043e020d
c2070108
bf8c007f
d282000f
043e060e
c207810c
bf8c007f
061e1e0f
c2078140
bf8c007f
0a1e1e0f
f800021f
0c0d0e0f
c2078112
bf8c000f
1018040f
c2078116
bf8c007f
d282000c
0432020f
c207811a
bf8c007f
d282000c
0432060f
c207811e
bf8c007f
0618180f
c2078148
c2080149
bf8c007f
7e1a0210
d282000c
04341f0c
c0880708
bf8c007f
e00c2000
80040d00
c2030135
c2038137
bf8c0070
7e000207
d2820000
04000d0e
c2030134
c2038136
bf8c007f
7e220207
d282000d
04440d0d
c203014a
bf8c007f
7e1c0206
f800022f
0e0c000d
bf8c070f
7e006d0a
10000105
c2030153
bf8c007f
d2820000
04200d00
d282000a
041c0d0b
d2820006
04180d05
100a0d06
d2820005
0416150a
d2820005
04160100
7e0e5b05
100a0f0a
c203014b
bf8c007f
100a0a06
100c0f06
100c0c06
10100c09
d2820009
04220a0b
10000f00
10100006
d2820000
0426100a
100e0c0c
d2820007
041e0a0d
d2820007
041e100e
10120f07
d2820009
04260100
100c0c04
d2820005
041a0a05
d2820005
04161008
d2820006
04260b05
7e0c5b06
100a0d05
10000d00
100c0d07
7e0e0280
f800023f
07050006
c2020123
bf8c000f
10000404
c2020127
bf8c007f
d2820000
04020204
c202012b
bf8c007f
d2820000
04020604
c202012f
bf8c007f
06000004
c2020121
bf8c007f
100a0404
c2020125
bf8c007f
d2820005
04160204
c2020129
bf8c007f
d2820005
04160604
c202012d
bf8c007f
060a0a04
c2020120
bf8c007f
10040404
c2020124
bf8c007f
d2820001
040a0204
c2020128
bf8c007f
d2820001
04060604
c200012c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MOV OUT[0], TEMP[1]
 28: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %42 = load <32 x i8> addrspace(2)* %41, !tbaa !0
  %43 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %56 = fmul float %13, %39
  %57 = fadd float %56, %40
  %58 = bitcast float %54 to i32
  %59 = bitcast float %55 to i32
  %60 = insertelement <2 x i32> undef, i32 %58, i32 0
  %61 = insertelement <2 x i32> %60, i32 %59, i32 1
  %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %42, <16 x i8> %44, i32 2)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = fmul float %63, 0x3FCB367A00000000
  %68 = fmul float %64, 0x3FE6E2EB20000000
  %69 = fadd float %68, %67
  %70 = fmul float %65, 0x3FB27BB300000000
  %71 = fadd float %69, %70
  %72 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %63)
  %73 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %64)
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %65)
  %75 = fmul float %72, %35
  %76 = fmul float %73, %36
  %77 = fadd float %75, %76
  %78 = fmul float %74, %37
  %79 = fadd float %77, %78
  %80 = fmul float %66, %38
  %81 = fadd float %79, %80
  %82 = fadd float %81, %30
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDGPU.lrp(float %83, float %49, float 1.000000e+00)
  %85 = call float @llvm.AMDGPU.lrp(float %83, float %50, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %83, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %83, float %52, float 1.000000e+00)
  %88 = fmul float %72, %84
  %89 = fmul float %73, %85
  %90 = fmul float %74, %86
  %91 = fmul float %66, %87
  %92 = fmul float %12, %22
  %93 = fmul float %57, %23
  %94 = bitcast float %92 to i32
  %95 = bitcast float %93 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %46, <16 x i8> %48, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = extractelement <4 x float> %98, i32 3
  %103 = fmul float %72, %31
  %104 = fmul float %73, %32
  %105 = fadd float %103, %104
  %106 = fmul float %74, %33
  %107 = fadd float %105, %106
  %108 = fmul float %66, %34
  %109 = fadd float %107, %108
  %110 = fadd float %109, %29
  %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00)
  %112 = fmul float %111, %102
  %113 = fmul float %99, 0x3FCB367A00000000
  %114 = fmul float %100, 0x3FE6E2EB20000000
  %115 = fadd float %114, %113
  %116 = fmul float %101, 0x3FB27BB300000000
  %117 = fadd float %115, %116
  %118 = fcmp uge float %117, 0x3F50624DE0000000
  %119 = select i1 %118, float %117, float 0x3F50624DE0000000
  %120 = fdiv float 1.000000e+00, %119
  %121 = fmul float %99, %120
  %122 = fmul float %100, %120
  %123 = fmul float %101, %120
  %124 = fmul float %88, %99
  %125 = fmul float %89, %100
  %126 = fmul float %90, %101
  %127 = fmul float %112, %121
  %128 = fadd float %127, %124
  %129 = fmul float %112, %122
  %130 = fadd float %129, %125
  %131 = fmul float %112, %123
  %132 = fadd float %131, %126
  %133 = fmul float %128, 4.000000e+00
  %134 = fmul float %130, 4.000000e+00
  %135 = fmul float %132, 4.000000e+00
  %136 = fcmp uge float %53, %27
  %137 = select i1 %136, float %53, float %27
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.lrp(float %138, float %133, float %24)
  %140 = call float @llvm.AMDGPU.lrp(float %138, float %134, float %25)
  %141 = call float @llvm.AMDGPU.lrp(float %138, float %135, float %26)
  %142 = call i32 @llvm.SI.packf16(float %139, float %140)
  %143 = bitcast i32 %142 to float
  %144 = call i32 @llvm.SI.packf16(float %141, float %91)
  %145 = bitcast i32 %144 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %143, float %145, float %143, float %145)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430504
bf8c0770
10080aff
3e59b3d0
7e2002ff
3f371759
d2820004
04122106
7e2202ff
3d93dd98
d282000a
04122307
c0840100
bf8c007f
c2000910
bf8c007f
d208000b
020000f2
10080b0b
d2820004
04121400
10120d0b
d2820009
04261400
c2008921
bf8c007f
10181201
c2008920
bf8c007f
d282000c
04300304
10160f0b
d2820012
042e1400
c2000922
bf8c007f
d282000a
04300112
c2000923
bf8c007f
d282000a
04280108
c2000919
bf8c007f
06141400
d206080a
0201010a
081614f2
c8300200
c8310201
d282000c
042e190a
10261912
c200092c
c200892d
bf8c007f
7e180201
d2820003
04300103
c2000909
bf8c007f
101a0600
c2000908
bf8c007f
10180400
c0800304
c0c60508
bf8c007f
f0800f00
00030c0c
bf8c0770
10261d13
c200091d
bf8c007f
10041200
c200091c
bf8c007f
d2820002
04080104
c200091e
bf8c007f
d2820002
04080112
c200091f
bf8c007f
d2820002
04080108
c2000918
bf8c007f
06040400
d2060802
02010102
10041f02
100618ff
3e59b3d0
d2820003
040e210d
d2820003
040e230e
7e2002ff
3a83126f
d00c0000
02022103
d2000003
00020710
7e065503
1020070e
d2820010
044e2102
102420f6
c8400700
c8410701
c200090f
bf8c007f
d00c0002
02000110
7e220200
d2000010
000a2111
d2060810
02010110
082220f2
c200090e
bf8c007f
10262200
d2820012
044e2510
c84c0300
c84d0301
d2820013
042e270a
100a2708
5e0a0b12
c8180100
c8190101
d2820006
042e0d0a
100c0d09
100c1b06
100e070d
d2820006
041a0f02
100c0cf6
c200090d
bf8c007f
100e2200
d2820006
041e0d10
c81c0000
c81d0001
d2820000
042e0f0a
10000104
10001900
1002070c
d2820000
04020302
100000f6
c200090c
bf8c007f
10022200
d2820000
04060110
5e000d00
f8001c0f
05000500
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..205]
DCL TEMP[0..6], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: UMUL TEMP[2].x, IMM[1].xxxx, TEMP[0].wwww
  4: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].zzzz
  5: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].yyyy
  6: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].xxxx
  7: UARL ADDR[0].x, TEMP[5].xxxx
  8: MUL TEMP[5], CONST[ADDR[0].x+14], IN[1].xxxx
  9: UARL ADDR[0].x, TEMP[4].xxxx
 10: MAD TEMP[4], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[5]
 11: UARL ADDR[0].x, TEMP[3].xxxx
 12: MAD TEMP[3], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[4]
 13: UARL ADDR[0].x, TEMP[2].xxxx
 14: MAD TEMP[2], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[3]
 15: DP4 TEMP[2].x, TEMP[1], TEMP[2]
 16: UMAD TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 17: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 18: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 19: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 20: UARL ADDR[0].x, TEMP[6].xxxx
 21: MUL TEMP[6], CONST[ADDR[0].x+14], IN[1].xxxx
 22: UARL ADDR[0].x, TEMP[5].xxxx
 23: MAD TEMP[5], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[6]
 24: UARL ADDR[0].x, TEMP[4].xxxx
 25: MAD TEMP[4], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[5]
 26: UARL ADDR[0].x, TEMP[3].xxxx
 27: MAD TEMP[3], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[4]
 28: DP4 TEMP[3].x, TEMP[1], TEMP[3]
 29: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 30: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 31: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 32: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 33: UARL ADDR[0].x, TEMP[0].xxxx
 34: MUL TEMP[0], CONST[ADDR[0].x+14], IN[1].xxxx
 35: UARL ADDR[0].x, TEMP[6].xxxx
 36: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[0]
 37: UARL ADDR[0].x, TEMP[5].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[0]
 39: UARL ADDR[0].x, TEMP[4].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[0]
 41: DP4 TEMP[0].x, TEMP[1], TEMP[0]
 42: MUL TEMP[1], CONST[4], TEMP[2].xxxx
 43: MAD TEMP[1], CONST[5], TEMP[3].xxxx, TEMP[1]
 44: MAD TEMP[1], CONST[6], TEMP[0].xxxx, TEMP[1]
 45: ADD TEMP[1], TEMP[1], CONST[7]
 46: MOV TEMP[4].w, IMM[0].xxxx
 47: MOV TEMP[4].xyz, CONST[8].xyzx
 48: MUL TEMP[2], CONST[0], TEMP[2].xxxx
 49: MAD TEMP[2], CONST[1], TEMP[3].xxxx, TEMP[2]
 50: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 51: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 52: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 53: MAD TEMP[2].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 54: MOV TEMP[0].w, TEMP[2].xxxx
 55: MAD TEMP[2].xy, IN[3].xyyy, CONST[9].xyyy, CONST[9].zwww
 56: MOV OUT[3], TEMP[2]
 57: MOV OUT[1], TEMP[4]
 58: MOV OUT[2], TEMP[0]
 59: MOV OUT[0], TEMP[1]
 60: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = extractelement <4 x float> %67, i32 2
  %71 = extractelement <4 x float> %67, i32 3
  %72 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %5)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = extractelement <4 x float> %74, i32 1
  %77 = extractelement <4 x float> %74, i32 2
  %78 = extractelement <4 x float> %74, i32 3
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = fptosi float %75 to i32
  %85 = fptosi float %76 to i32
  %86 = fptosi float %77 to i32
  %87 = fptosi float %78 to i32
  %88 = bitcast i32 %84 to float
  %89 = bitcast i32 %85 to float
  %90 = bitcast i32 %86 to float
  %91 = bitcast i32 %87 to float
  %92 = fmul float %62, %51
  %93 = fadd float %92, %48
  %94 = fmul float %63, %52
  %95 = fadd float %94, %49
  %96 = fmul float %64, %53
  %97 = fadd float %96, %50
  %98 = bitcast float %91 to i32
  %99 = mul i32 3, %98
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %90 to i32
  %102 = mul i32 3, %101
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %89 to i32
  %105 = mul i32 3, %104
  %106 = bitcast i32 %105 to float
  %107 = bitcast float %88 to i32
  %108 = mul i32 3, %107
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = shl i32 %110, 4
  %112 = add i32 %111, 224
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %68
  %115 = shl i32 %110, 4
  %116 = add i32 %115, 228
  %117 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %116)
  %118 = fmul float %117, %68
  %119 = shl i32 %110, 4
  %120 = add i32 %119, 232
  %121 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %120)
  %122 = fmul float %121, %68
  %123 = shl i32 %110, 4
  %124 = add i32 %123, 236
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %68
  %127 = bitcast float %106 to i32
  %128 = shl i32 %127, 4
  %129 = add i32 %128, 224
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %69
  %132 = fadd float %131, %114
  %133 = shl i32 %127, 4
  %134 = add i32 %133, 228
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %69
  %137 = fadd float %136, %118
  %138 = shl i32 %127, 4
  %139 = add i32 %138, 232
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %69
  %142 = fadd float %141, %122
  %143 = shl i32 %127, 4
  %144 = add i32 %143, 236
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %69
  %147 = fadd float %146, %126
  %148 = bitcast float %103 to i32
  %149 = shl i32 %148, 4
  %150 = add i32 %149, 224
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %70
  %153 = fadd float %152, %132
  %154 = shl i32 %148, 4
  %155 = add i32 %154, 228
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %70
  %158 = fadd float %157, %137
  %159 = shl i32 %148, 4
  %160 = add i32 %159, 232
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %70
  %163 = fadd float %162, %142
  %164 = shl i32 %148, 4
  %165 = add i32 %164, 236
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %70
  %168 = fadd float %167, %147
  %169 = bitcast float %100 to i32
  %170 = shl i32 %169, 4
  %171 = add i32 %170, 224
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %71
  %174 = fadd float %173, %153
  %175 = shl i32 %169, 4
  %176 = add i32 %175, 228
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %71
  %179 = fadd float %178, %158
  %180 = shl i32 %169, 4
  %181 = add i32 %180, 232
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %71
  %184 = fadd float %183, %163
  %185 = shl i32 %169, 4
  %186 = add i32 %185, 236
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %71
  %189 = fadd float %188, %168
  %190 = fmul float %93, %174
  %191 = fmul float %95, %179
  %192 = fadd float %190, %191
  %193 = fmul float %97, %184
  %194 = fadd float %192, %193
  %195 = fmul float 1.000000e+00, %189
  %196 = fadd float %194, %195
  %197 = bitcast float %91 to i32
  %198 = mul i32 3, %197
  %199 = add i32 %198, 1
  %200 = bitcast i32 %199 to float
  %201 = bitcast float %90 to i32
  %202 = mul i32 3, %201
  %203 = add i32 %202, 1
  %204 = bitcast i32 %203 to float
  %205 = bitcast float %89 to i32
  %206 = mul i32 3, %205
  %207 = add i32 %206, 1
  %208 = bitcast i32 %207 to float
  %209 = bitcast float %88 to i32
  %210 = mul i32 3, %209
  %211 = add i32 %210, 1
  %212 = bitcast i32 %211 to float
  %213 = bitcast float %212 to i32
  %214 = shl i32 %213, 4
  %215 = add i32 %214, 224
  %216 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %215)
  %217 = fmul float %216, %68
  %218 = shl i32 %213, 4
  %219 = add i32 %218, 228
  %220 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %219)
  %221 = fmul float %220, %68
  %222 = shl i32 %213, 4
  %223 = add i32 %222, 232
  %224 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %223)
  %225 = fmul float %224, %68
  %226 = shl i32 %213, 4
  %227 = add i32 %226, 236
  %228 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %227)
  %229 = fmul float %228, %68
  %230 = bitcast float %208 to i32
  %231 = shl i32 %230, 4
  %232 = add i32 %231, 224
  %233 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %232)
  %234 = fmul float %233, %69
  %235 = fadd float %234, %217
  %236 = shl i32 %230, 4
  %237 = add i32 %236, 228
  %238 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %237)
  %239 = fmul float %238, %69
  %240 = fadd float %239, %221
  %241 = shl i32 %230, 4
  %242 = add i32 %241, 232
  %243 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %242)
  %244 = fmul float %243, %69
  %245 = fadd float %244, %225
  %246 = shl i32 %230, 4
  %247 = add i32 %246, 236
  %248 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %247)
  %249 = fmul float %248, %69
  %250 = fadd float %249, %229
  %251 = bitcast float %204 to i32
  %252 = shl i32 %251, 4
  %253 = add i32 %252, 224
  %254 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %253)
  %255 = fmul float %254, %70
  %256 = fadd float %255, %235
  %257 = shl i32 %251, 4
  %258 = add i32 %257, 228
  %259 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %258)
  %260 = fmul float %259, %70
  %261 = fadd float %260, %240
  %262 = shl i32 %251, 4
  %263 = add i32 %262, 232
  %264 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %263)
  %265 = fmul float %264, %70
  %266 = fadd float %265, %245
  %267 = shl i32 %251, 4
  %268 = add i32 %267, 236
  %269 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %268)
  %270 = fmul float %269, %70
  %271 = fadd float %270, %250
  %272 = bitcast float %200 to i32
  %273 = shl i32 %272, 4
  %274 = add i32 %273, 224
  %275 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %274)
  %276 = fmul float %275, %71
  %277 = fadd float %276, %256
  %278 = shl i32 %272, 4
  %279 = add i32 %278, 228
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = fmul float %280, %71
  %282 = fadd float %281, %261
  %283 = shl i32 %272, 4
  %284 = add i32 %283, 232
  %285 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %284)
  %286 = fmul float %285, %71
  %287 = fadd float %286, %266
  %288 = shl i32 %272, 4
  %289 = add i32 %288, 236
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = fmul float %290, %71
  %292 = fadd float %291, %271
  %293 = fmul float %93, %277
  %294 = fmul float %95, %282
  %295 = fadd float %293, %294
  %296 = fmul float %97, %287
  %297 = fadd float %295, %296
  %298 = fmul float 1.000000e+00, %292
  %299 = fadd float %297, %298
  %300 = bitcast float %91 to i32
  %301 = mul i32 3, %300
  %302 = add i32 %301, 2
  %303 = bitcast i32 %302 to float
  %304 = bitcast float %90 to i32
  %305 = mul i32 3, %304
  %306 = add i32 %305, 2
  %307 = bitcast i32 %306 to float
  %308 = bitcast float %89 to i32
  %309 = mul i32 3, %308
  %310 = add i32 %309, 2
  %311 = bitcast i32 %310 to float
  %312 = bitcast float %88 to i32
  %313 = mul i32 3, %312
  %314 = add i32 %313, 2
  %315 = bitcast i32 %314 to float
  %316 = bitcast float %315 to i32
  %317 = shl i32 %316, 4
  %318 = add i32 %317, 224
  %319 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %318)
  %320 = fmul float %319, %68
  %321 = shl i32 %316, 4
  %322 = add i32 %321, 228
  %323 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %322)
  %324 = fmul float %323, %68
  %325 = shl i32 %316, 4
  %326 = add i32 %325, 232
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %68
  %329 = shl i32 %316, 4
  %330 = add i32 %329, 236
  %331 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %330)
  %332 = fmul float %331, %68
  %333 = bitcast float %311 to i32
  %334 = shl i32 %333, 4
  %335 = add i32 %334, 224
  %336 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %335)
  %337 = fmul float %336, %69
  %338 = fadd float %337, %320
  %339 = shl i32 %333, 4
  %340 = add i32 %339, 228
  %341 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %340)
  %342 = fmul float %341, %69
  %343 = fadd float %342, %324
  %344 = shl i32 %333, 4
  %345 = add i32 %344, 232
  %346 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %345)
  %347 = fmul float %346, %69
  %348 = fadd float %347, %328
  %349 = shl i32 %333, 4
  %350 = add i32 %349, 236
  %351 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %350)
  %352 = fmul float %351, %69
  %353 = fadd float %352, %332
  %354 = bitcast float %307 to i32
  %355 = shl i32 %354, 4
  %356 = add i32 %355, 224
  %357 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %356)
  %358 = fmul float %357, %70
  %359 = fadd float %358, %338
  %360 = shl i32 %354, 4
  %361 = add i32 %360, 228
  %362 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %361)
  %363 = fmul float %362, %70
  %364 = fadd float %363, %343
  %365 = shl i32 %354, 4
  %366 = add i32 %365, 232
  %367 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %366)
  %368 = fmul float %367, %70
  %369 = fadd float %368, %348
  %370 = shl i32 %354, 4
  %371 = add i32 %370, 236
  %372 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %371)
  %373 = fmul float %372, %70
  %374 = fadd float %373, %353
  %375 = bitcast float %303 to i32
  %376 = shl i32 %375, 4
  %377 = add i32 %376, 224
  %378 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %377)
  %379 = fmul float %378, %71
  %380 = fadd float %379, %359
  %381 = shl i32 %375, 4
  %382 = add i32 %381, 228
  %383 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %382)
  %384 = fmul float %383, %71
  %385 = fadd float %384, %364
  %386 = shl i32 %375, 4
  %387 = add i32 %386, 232
  %388 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %387)
  %389 = fmul float %388, %71
  %390 = fadd float %389, %369
  %391 = shl i32 %375, 4
  %392 = add i32 %391, 236
  %393 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %392)
  %394 = fmul float %393, %71
  %395 = fadd float %394, %374
  %396 = fmul float %93, %380
  %397 = fmul float %95, %385
  %398 = fadd float %396, %397
  %399 = fmul float %97, %390
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %395
  %402 = fadd float %400, %401
  %403 = fmul float %25, %196
  %404 = fmul float %26, %196
  %405 = fmul float %27, %196
  %406 = fmul float %28, %196
  %407 = fmul float %29, %299
  %408 = fadd float %407, %403
  %409 = fmul float %30, %299
  %410 = fadd float %409, %404
  %411 = fmul float %31, %299
  %412 = fadd float %411, %405
  %413 = fmul float %32, %299
  %414 = fadd float %413, %406
  %415 = fmul float %33, %402
  %416 = fadd float %415, %408
  %417 = fmul float %34, %402
  %418 = fadd float %417, %410
  %419 = fmul float %35, %402
  %420 = fadd float %419, %412
  %421 = fmul float %36, %402
  %422 = fadd float %421, %414
  %423 = fadd float %416, %37
  %424 = fadd float %418, %38
  %425 = fadd float %420, %39
  %426 = fadd float %422, %40
  %427 = fmul float %11, %196
  %428 = fmul float %12, %196
  %429 = fmul float %13, %196
  %430 = fmul float %14, %196
  %431 = fmul float %15, %299
  %432 = fadd float %431, %427
  %433 = fmul float %16, %299
  %434 = fadd float %433, %428
  %435 = fmul float %17, %299
  %436 = fadd float %435, %429
  %437 = fmul float %18, %299
  %438 = fadd float %437, %430
  %439 = fmul float %19, %402
  %440 = fadd float %439, %432
  %441 = fmul float %20, %402
  %442 = fadd float %441, %434
  %443 = fmul float %21, %402
  %444 = fadd float %443, %436
  %445 = fadd float %440, %22
  %446 = fadd float %442, %23
  %447 = fadd float %444, %24
  %448 = fsub float -0.000000e+00, %56
  %449 = fadd float %445, %448
  %450 = fsub float -0.000000e+00, %57
  %451 = fadd float %446, %450
  %452 = fsub float -0.000000e+00, %58
  %453 = fadd float %447, %452
  %454 = fmul float %425, %54
  %455 = fadd float %454, %55
  %456 = fmul float %82, %44
  %457 = fadd float %456, %46
  %458 = fmul float %83, %45
  %459 = fadd float %458, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %449, float %451, float %453, float %455)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %457, float %459, float %436, float %438)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %423, float %424, float %425, float %426)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020f00
bf8c0770
7e02110f
d2d60006
02010701
340a0c84
4a020aff
000000e0
e0301000
80000701
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
10100307
7e0e1110
d2d60007
02010707
34140e84
4a1214ff
000000e0
e0301000
80000909
bf8c0770
d2820009
04220509
7e101111
d2d60008
02010708
341c1084
4a161cff
000000e0
e0301000
80000b0b
bf8c0770
d282000b
0426070b
7e121112
d2d60009
02010709
341e1284
4a181eff
000000e0
e0301000
80000c0c
bf8c0770
d282000d
042e090c
4a160aff
000000e4
e0301000
80000b0b
bf8c0770
1016030b
4a1814ff
000000e4
e0301000
80000c0c
bf8c0770
d282000b
042e050c
4a181cff
000000e4
e0301000
80000c0c
bf8c0770
d282000b
042e070c
4a181eff
000000e4
e0301000
80000c0c
bf8c0770
d282000c
042e090c
c0840700
bf8c007f
e00c2000
80021000
c202012d
c2028129
bf8c0070
7e160205
d282000b
042c0911
1028190b
c202012c
c2028128
bf8c007f
7e180205
d282000c
04300910
d2820014
04521b0c
4a1a0aff
000000e8
e0301000
80000d0d
bf8c0770
101a030d
4a2a14ff
000000e8
e0301000
80001515
bf8c0770
d282000d
04360515
4a2a1cff
000000e8
e0301000
80001515
bf8c0770
d282000d
04360715
4a2a1eff
000000e8
e0301000
80001515
bf8c0770
d2820015
04360915
c202012e
c202812a
bf8c007f
7e1a0205
d282000d
04340912
d2820010
04522b0d
4a0a0aff
000000ec
e0301000
80000505
bf8c0770
100a0305
4a1414ff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416050a
4a141cff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416070a
4a141eff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416090a
060a0b10
c2020112
bf8c007f
101c0a04
4a140c81
34141484
4a1e14ff
000000e0
e0301000
80000f0f
bf8c0770
1020030f
4a1e0e81
341e1e84
4a221eff
000000e0
e0301000
80001111
bf8c0770
d2820011
04420511
4a201081
34202084
4a2420ff
000000e0
e0301000
80001212
bf8c0770
d2820012
04460712
4a221281
34222284
4a2622ff
000000e0
e0301000
80001313
bf8c0770
d2820012
044a0913
4a2614ff
000000e4
e0301000
80001313
bf8c0770
10260313
4a281eff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0914
1026270b
d2820012
044e250c
4a2614ff
000000e8
e0301000
80001313
bf8c0770
10260313
4a281eff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0914
d2820012
044a270d
4a1414ff
000000ec
e0301000
80000a0a
bf8c0770
1014030a
4a1e1eff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a050f
4a1e20ff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a070f
4a1e22ff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a090f
06141512
c2020116
bf8c007f
d282000e
043a1404
4a0c0c82
340c0c84
4a1e0cff
000000e0
e0301000
80000f0f
bf8c0770
101e030f
4a0e0e82
340e0e84
4a200eff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0510
4a101082
34101084
4a2010ff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0710
4a121282
34121284
4a2012ff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0910
4a200cff
000000e4
e0301000
80001010
bf8c0770
10200310
4a220eff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420511
4a2210ff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420711
4a2212ff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420911
1016210b
d282000b
042e1f0c
4a180cff
000000e8
e0301000
80000c0c
bf8c0770
1018030c
4a1e0eff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432050f
4a1e10ff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432070f
4a1e12ff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432090f
d282000b
042e190d
4a0c0cff
000000ec
e0301000
80000606
bf8c0770
100c0306
4a0e0eff
000000ec
e0301000
80000707
bf8c0770
d2820006
041a0507
4a0e10ff
000000ec
e0301000
80000707
bf8c0770
d2820006
041a0707
4a0e12ff
000000ec
e0301000
80000707
bf8c0770
d2820001
041a0907
0602030b
c202011a
bf8c007f
d2820002
043a0204
c202011e
bf8c007f
06040404
c2020130
c2028131
bf8c007f
7e060205
d2820004
040c0902
c2020102
bf8c007f
10060a04
c2020106
bf8c007f
d2820003
040e1404
c202010a
bf8c007f
d2820006
040e0204
c202010e
bf8c007f
060c0c04
c2020136
bf8c007f
0a0c0c04
c2020101
bf8c007f
100e0a04
c2020105
bf8c007f
d2820007
041e1404
c2020109
bf8c007f
d2820007
041e0204
c202010d
bf8c007f
060e0e04
c2020135
bf8c007f
0a0e0e04
c2020100
bf8c007f
10100a04
c2020104
bf8c007f
d2820008
04221404
c2020108
bf8c007f
d2820008
04220204
c202010c
bf8c007f
06101004
c2020134
bf8c007f
0a101004
f800021f
04060708
c2020103
bf8c000f
10080a04
c2020107
bf8c007f
d2820004
04121404
c082070c
bf8c007f
e00c2000
80010600
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000907
c2020124
c2028126
bf8c007f
7e160205
d2820006
042c0906
f800022f
04030006
c2020113
bf8c000f
10000a04
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020204
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10060a04
c2020115
bf8c007f
d2820003
040e1404
c2020119
bf8c007f
d2820003
040e0204
c202011d
bf8c007f
06060604
c2020110
bf8c007f
10080a04
c2020114
bf8c007f
d2820004
04121404
c2020118
bf8c007f
d2820001
04120204
c200011c
bf8c007f
06020200
f80008cf
00020301
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[13..14]
DCL CONST[5..12]
DCL TEMP[0..1]
DCL TEMP[2..9], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: DP3 TEMP[6].x, TEMP[4].xyzz, IMM[1].xyzz
 14: LRP TEMP[4].xyz, CONST[7].xxxx, TEMP[6].xxxx, TEMP[4].xyzz
 15: MOV TEMP[6].xy, IN[3].xyyy
 16: TEX TEMP[6].xyz, TEMP[6], SAMP[3], 2D
 17: LRP TEMP[6].xyz, CONST[8].wwww, TEMP[6].xyzz, IMM[0].yyyy
 18: MOV_SAT TEMP[6].xyz, TEMP[6].xyzz
 19: MUL TEMP[5].xyz, TEMP[4].xyzz, TEMP[6].xyzz
 20: DP4 TEMP[4].x, TEMP[5], CONST[10]
 21: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[9].xxxx
 22: DP4 TEMP[6].x, TEMP[5], CONST[11]
 23: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[9].yyyy
 24: MOV TEMP[7].xy, IN[5].xyyy
 25: TEX TEMP[7], TEMP[7], SAMP[1], 2D
 26: MAD TEMP[7].yw, IMM[0].zzzz, TEMP[7], IMM[0].xxxx
 27: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz
 28: RSQ TEMP[8].x, TEMP[8].xxxx
 29: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
 30: DP2 TEMP[8].x, TEMP[7].ywww, TEMP[7].ywww
 31: ADD TEMP[8].x, IMM[0].yyyy, -TEMP[8].xxxx
 32: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx
 33: RSQ TEMP[9].x, TEMP[8].xxxx
 34: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
 35: CMP TEMP[9].x, -TEMP[8].xxxx, TEMP[9].xxxx, IMM[0].wwww
 36: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xxxx
 37: DP3 TEMP[8].x, IN[6].yzww, IN[6].yzww
 38: RSQ TEMP[8].x, TEMP[8].xxxx
 39: MUL TEMP[8].xyz, IN[6].yzww, TEMP[8].xxxx
 40: DP3 TEMP[9].x, IN[7].xyzz, IN[7].xyzz
 41: RSQ TEMP[9].x, TEMP[9].xxxx
 42: MUL TEMP[9].xyz, IN[7].xyzz, TEMP[9].xxxx
 43: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[7].wwww
 44: MAD TEMP[7].xyz, TEMP[8].xyzz, TEMP[7].yyyy, TEMP[9].xyzz
 45: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[7].xyzz
 46: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 47: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 48: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 49: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 50: MOV TEMP[2].xyz, TEMP[2].xyzz
 51: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 52: DP4 TEMP[3].x, TEMP[5], CONST[12]
 53: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[9].zzzz
 54: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 55: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[5].xyzz
 56: LRP TEMP[2], TEMP[6].xxxx, IN[2], IMM[0].yyyy
 57: MUL TEMP[2], TEMP[5], TEMP[2]
 58: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[5].xyyy
 59: MOV TEMP[3].xy, TEMP[3].xyyy
 60: TEX TEMP[3], TEMP[3], SAMP[4], 2D
 61: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 62: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 63: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww
 64: RCP TEMP[5].x, TEMP[5].xxxx
 65: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 66: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 67: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 68: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[2].xxxx
 69: MAX TEMP[3].x, IN[4].wwww, CONST[6].wwww
 70: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 71: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[6].xyzz
 72: MOV OUT[0], TEMP[2]
 73: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %47 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %48 = load <32 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %52 = load <32 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %56 = load <32 x i8> addrspace(2)* %55, !tbaa !0
  %57 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %60 = load <32 x i8> addrspace(2)* %59, !tbaa !0
  %61 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %62 = load <16 x i8> addrspace(2)* %61, !tbaa !0
  %63 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %64 = load <32 x i8> addrspace(2)* %63, !tbaa !0
  %65 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = fcmp ugt float %16, 0.000000e+00
  %68 = select i1 %67, float 1.000000e+00, float 0.000000e+00
  %69 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %90 = fmul float %13, %45
  %91 = fadd float %90, %46
  %92 = call float @llvm.AMDIL.clamp.(float %68, float 0.000000e+00, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %96 = bitcast float %92 to i32
  %97 = icmp ne i32 %96, 0
  %. = select i1 %97, float -1.000000e+00, float 1.000000e+00
  %98 = bitcast float %79 to i32
  %99 = bitcast float %80 to i32
  %100 = insertelement <2 x i32> undef, i32 %98, i32 0
  %101 = insertelement <2 x i32> %100, i32 %99, i32 1
  %102 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %101, <32 x i8> %48, <16 x i8> %50, i32 2)
  %103 = extractelement <4 x float> %102, i32 0
  %104 = extractelement <4 x float> %102, i32 1
  %105 = extractelement <4 x float> %102, i32 2
  %106 = extractelement <4 x float> %102, i32 3
  %107 = fmul float %103, 0x3FCB367A00000000
  %108 = fmul float %104, 0x3FE6E2EB20000000
  %109 = fadd float %108, %107
  %110 = fmul float %105, 0x3FB27BB300000000
  %111 = fadd float %109, %110
  %112 = call float @llvm.AMDGPU.lrp(float %28, float %111, float %103)
  %113 = call float @llvm.AMDGPU.lrp(float %28, float %111, float %104)
  %114 = call float @llvm.AMDGPU.lrp(float %28, float %111, float %105)
  %115 = bitcast float %73 to i32
  %116 = bitcast float %74 to i32
  %117 = insertelement <2 x i32> undef, i32 %115, i32 0
  %118 = insertelement <2 x i32> %117, i32 %116, i32 1
  %119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %60, <16 x i8> %62, i32 2)
  %120 = extractelement <4 x float> %119, i32 0
  %121 = extractelement <4 x float> %119, i32 1
  %122 = extractelement <4 x float> %119, i32 2
  %123 = call float @llvm.AMDGPU.lrp(float %29, float %120, float 1.000000e+00)
  %124 = call float @llvm.AMDGPU.lrp(float %29, float %121, float 1.000000e+00)
  %125 = call float @llvm.AMDGPU.lrp(float %29, float %122, float 1.000000e+00)
  %126 = call float @llvm.AMDIL.clamp.(float %123, float 0.000000e+00, float 1.000000e+00)
  %127 = call float @llvm.AMDIL.clamp.(float %124, float 0.000000e+00, float 1.000000e+00)
  %128 = call float @llvm.AMDIL.clamp.(float %125, float 0.000000e+00, float 1.000000e+00)
  %129 = fmul float %112, %126
  %130 = fmul float %113, %127
  %131 = fmul float %114, %128
  %132 = fmul float %129, %33
  %133 = fmul float %130, %34
  %134 = fadd float %132, %133
  %135 = fmul float %131, %35
  %136 = fadd float %134, %135
  %137 = fmul float %106, %36
  %138 = fadd float %136, %137
  %139 = fadd float %138, %30
  %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
  %141 = fmul float %129, %37
  %142 = fmul float %130, %38
  %143 = fadd float %141, %142
  %144 = fmul float %131, %39
  %145 = fadd float %143, %144
  %146 = fmul float %106, %40
  %147 = fadd float %145, %146
  %148 = fadd float %147, %31
  %149 = call float @llvm.AMDIL.clamp.(float %148, float 0.000000e+00, float 1.000000e+00)
  %150 = bitcast float %79 to i32
  %151 = bitcast float %80 to i32
  %152 = insertelement <2 x i32> undef, i32 %150, i32 0
  %153 = insertelement <2 x i32> %152, i32 %151, i32 1
  %154 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %52, <16 x i8> %54, i32 2)
  %155 = extractelement <4 x float> %154, i32 1
  %156 = extractelement <4 x float> %154, i32 3
  %157 = fmul float 2.000000e+00, %155
  %158 = fadd float %157, -1.000000e+00
  %159 = fmul float 2.000000e+00, %156
  %160 = fadd float %159, -1.000000e+00
  %161 = fmul float %81, %81
  %162 = fmul float %82, %82
  %163 = fadd float %162, %161
  %164 = fmul float %83, %83
  %165 = fadd float %163, %164
  %166 = call float @llvm.AMDGPU.rsq(float %165)
  %167 = fmul float %81, %166
  %168 = fmul float %82, %166
  %169 = fmul float %83, %166
  %170 = fmul float %158, %158
  %171 = fmul float %160, %160
  %172 = fadd float %170, %171
  %173 = fsub float -0.000000e+00, %172
  %174 = fadd float 1.000000e+00, %173
  %175 = fcmp uge float 0.000000e+00, %174
  %176 = select i1 %175, float 0.000000e+00, float %174
  %177 = call float @llvm.AMDGPU.rsq(float %176)
  %178 = fmul float %177, %176
  %179 = fsub float -0.000000e+00, %176
  %180 = call float @llvm.AMDGPU.cndlt(float %179, float %178, float 0.000000e+00)
  %181 = fmul float %167, %180
  %182 = fmul float %168, %180
  %183 = fmul float %169, %180
  %184 = fmul float %84, %84
  %185 = fmul float %85, %85
  %186 = fadd float %185, %184
  %187 = fmul float %86, %86
  %188 = fadd float %186, %187
  %189 = call float @llvm.AMDGPU.rsq(float %188)
  %190 = fmul float %84, %189
  %191 = fmul float %85, %189
  %192 = fmul float %86, %189
  %193 = fmul float %87, %87
  %194 = fmul float %88, %88
  %195 = fadd float %194, %193
  %196 = fmul float %89, %89
  %197 = fadd float %195, %196
  %198 = call float @llvm.AMDGPU.rsq(float %197)
  %199 = fmul float %87, %198
  %200 = fmul float %88, %198
  %201 = fmul float %89, %198
  %202 = fmul float %199, %160
  %203 = fmul float %200, %160
  %204 = fmul float %201, %160
  %205 = fmul float %190, %158
  %206 = fadd float %205, %202
  %207 = fmul float %191, %158
  %208 = fadd float %207, %203
  %209 = fmul float %192, %158
  %210 = fadd float %209, %204
  %211 = fmul float %181, %.
  %212 = fadd float %211, %206
  %213 = fmul float %182, %.
  %214 = fadd float %213, %208
  %215 = fmul float %183, %.
  %216 = fadd float %215, %210
  %217 = fmul float %212, %75
  %218 = fmul float %214, %76
  %219 = fadd float %218, %217
  %220 = fmul float %216, %77
  %221 = fadd float %219, %220
  %222 = fmul float %221, %212
  %223 = fmul float %221, %214
  %224 = fmul float %221, %216
  %225 = fmul float 2.000000e+00, %222
  %226 = fmul float 2.000000e+00, %223
  %227 = fmul float 2.000000e+00, %224
  %228 = fsub float -0.000000e+00, %225
  %229 = fadd float %75, %228
  %230 = fsub float -0.000000e+00, %226
  %231 = fadd float %76, %230
  %232 = fsub float -0.000000e+00, %227
  %233 = fadd float %77, %232
  %234 = insertelement <4 x float> undef, float %229, i32 0
  %235 = insertelement <4 x float> %234, float %231, i32 1
  %236 = insertelement <4 x float> %235, float %233, i32 2
  %237 = insertelement <4 x float> %236, float 0.000000e+00, i32 3
  %238 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %237)
  %239 = extractelement <4 x float> %238, i32 0
  %240 = extractelement <4 x float> %238, i32 1
  %241 = extractelement <4 x float> %238, i32 2
  %242 = extractelement <4 x float> %238, i32 3
  %243 = call float @fabs(float %241)
  %244 = fdiv float 1.000000e+00, %243
  %245 = fmul float %239, %244
  %246 = fadd float %245, 1.500000e+00
  %247 = fmul float %240, %244
  %248 = fadd float %247, 1.500000e+00
  %249 = bitcast float %248 to i32
  %250 = bitcast float %246 to i32
  %251 = bitcast float %242 to i32
  %252 = insertelement <4 x i32> undef, i32 %249, i32 0
  %253 = insertelement <4 x i32> %252, i32 %250, i32 1
  %254 = insertelement <4 x i32> %253, i32 %251, i32 2
  %255 = insertelement <4 x i32> %254, i32 undef, i32 3
  %256 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %255, <32 x i8> %56, <16 x i8> %58, i32 4)
  %257 = extractelement <4 x float> %256, i32 0
  %258 = extractelement <4 x float> %256, i32 1
  %259 = extractelement <4 x float> %256, i32 2
  %260 = extractelement <4 x float> %256, i32 3
  %261 = fmul float %129, %41
  %262 = fmul float %130, %42
  %263 = fadd float %261, %262
  %264 = fmul float %131, %43
  %265 = fadd float %263, %264
  %266 = fmul float %106, %44
  %267 = fadd float %265, %266
  %268 = fadd float %267, %32
  %269 = call float @llvm.AMDIL.clamp.(float %268, float 0.000000e+00, float 1.000000e+00)
  %270 = fmul float %260, %269
  %271 = call float @llvm.AMDGPU.lrp(float %270, float %257, float %129)
  %272 = call float @llvm.AMDGPU.lrp(float %270, float %258, float %130)
  %273 = call float @llvm.AMDGPU.lrp(float %270, float %259, float %131)
  %274 = call float @llvm.AMDGPU.lrp(float %149, float %69, float 1.000000e+00)
  %275 = call float @llvm.AMDGPU.lrp(float %149, float %70, float 1.000000e+00)
  %276 = call float @llvm.AMDGPU.lrp(float %149, float %71, float 1.000000e+00)
  %277 = call float @llvm.AMDGPU.lrp(float %149, float %72, float 1.000000e+00)
  %278 = fmul float %271, %274
  %279 = fmul float %272, %275
  %280 = fmul float %273, %276
  %281 = fmul float %106, %277
  %282 = fmul float %12, %22
  %283 = fmul float %91, %23
  %284 = bitcast float %282 to i32
  %285 = bitcast float %283 to i32
  %286 = insertelement <2 x i32> undef, i32 %284, i32 0
  %287 = insertelement <2 x i32> %286, i32 %285, i32 1
  %288 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %287, <32 x i8> %64, <16 x i8> %66, i32 2)
  %289 = extractelement <4 x float> %288, i32 0
  %290 = extractelement <4 x float> %288, i32 1
  %291 = extractelement <4 x float> %288, i32 2
  %292 = extractelement <4 x float> %288, i32 3
  %293 = fmul float %140, %292
  %294 = fmul float %289, 0x3FCB367A00000000
  %295 = fmul float %290, 0x3FE6E2EB20000000
  %296 = fadd float %295, %294
  %297 = fmul float %291, 0x3FB27BB300000000
  %298 = fadd float %296, %297
  %299 = fcmp uge float %298, 0x3F50624DE0000000
  %300 = select i1 %299, float %298, float 0x3F50624DE0000000
  %301 = fdiv float 1.000000e+00, %300
  %302 = fmul float %289, %301
  %303 = fmul float %290, %301
  %304 = fmul float %291, %301
  %305 = fmul float %278, %289
  %306 = fmul float %279, %290
  %307 = fmul float %280, %291
  %308 = fmul float %293, %302
  %309 = fadd float %308, %305
  %310 = fmul float %293, %303
  %311 = fadd float %310, %306
  %312 = fmul float %293, %304
  %313 = fadd float %312, %307
  %314 = fmul float %309, 4.000000e+00
  %315 = fmul float %311, 4.000000e+00
  %316 = fmul float %313, 4.000000e+00
  %317 = fcmp uge float %78, %27
  %318 = select i1 %317, float %78, float %27
  %319 = call float @llvm.AMDIL.clamp.(float %318, float 0.000000e+00, float 1.000000e+00)
  %320 = call float @llvm.AMDGPU.lrp(float %319, float %314, float %24)
  %321 = call float @llvm.AMDGPU.lrp(float %319, float %315, float %25)
  %322 = call float @llvm.AMDGPU.lrp(float %319, float %316, float %26)
  %323 = call i32 @llvm.SI.packf16(float %320, float %321)
  %324 = bitcast i32 %323 to float
  %325 = call i32 @llvm.SI.packf16(float %322, float %281)
  %326 = bitcast i32 %325 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %324, float %326, float %324, float %326)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340f00
c8350f01
c8300e00
c8310e01
1014190c
d282000b
042a1b0d
c8281000
c8291001
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441500
c8451501
c8401400
c8411401
101a2110
d282000e
04362311
c8341600
c8351601
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541200
c8551201
c8501100
c8511101
10222914
d2820012
04462b15
c8441300
c8451301
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0800
c83d0801
10281f0c
c8400900
c8410901
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200a00
c8210a01
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430808
c0840300
c0c60500
bf8c0070
f0800f00
00430405
bf8c0770
101a08ff
3e59b3d0
7e1802ff
3f371759
d282000e
04361905
7e1a02ff
3d93dd98
d282000e
043a1b06
c0840100
bf8c007f
c200091c
bf8c007f
d2080010
020000f2
101e0910
d282000f
043e1c00
c8480500
c8490501
c8440400
c8450401
c086030c
c0c80518
bf8c007f
f0800700
00641111
c2008923
bf8c0070
d2080014
020002f2
d2820015
04522201
d2060815
02010115
101e2b0f
102a0b10
d2820015
04561c00
d2820016
04522401
d2060816
02010116
10302d15
c2038931
bf8c007f
102a3007
c2038930
bf8c007f
d2820015
04540f0f
10200d10
d282000e
04421c00
d2820010
04522601
d2060810
02010110
101c210e
c2000932
bf8c007f
d2820010
0454010e
c2000933
bf8c007f
d2820010
04400107
c2000926
bf8c007f
06202000
d2060810
02010110
1024210b
082624f2
10203113
d2820014
04421312
c200092d
bf8c007f
10203000
c200092c
bf8c007f
d2820010
0440010f
c200092e
bf8c007f
d2820010
0440010e
c200092f
bf8c007f
d2820010
04400107
c2000925
bf8c007f
06202000
d2060810
02010110
082220f2
c8540100
c8550101
d2820015
04462b10
10322b14
c2000938
c2008939
bf8c007f
7e280201
d2820003
04500103
c2000915
bf8c007f
102a0600
c2000914
bf8c007f
10280400
c0800310
c0c60520
bf8c007f
f0800f00
00031414
bf8c0770
10322b19
c2000929
bf8c007f
10043000
c2000928
bf8c007f
d2820002
0408010f
c200092a
bf8c007f
d2820002
0408010e
c200092b
bf8c007f
d2820002
04080107
c2000924
bf8c007f
06040400
d2060802
02010102
10042f02
100628ff
3e59b3d0
d2820003
040e1915
d2820003
040e1b16
7e1802ff
3a83126f
d00c0000
02021903
d2000003
0002070c
7e065503
10180715
d282000c
04661902
103018f6
c8300b00
c8310b01
c200091b
bf8c007f
d00c0002
0200010c
7e1a0200
d200000c
000a190d
d206080c
0201010c
081a18f2
c2000919
bf8c007f
10321a00
d2820018
0466310c
101e1f13
d282000f
043e1112
c8640000
c8650001
d2820019
04463310
101e330f
101e290f
10320714
d282000f
043e3302
101e1ef6
c2000918
bf8c007f
10321a00
d282000f
04661f0c
5e1e310f
101c1d13
d2820008
043a1512
c8240200
c8250201
d2820009
04461310
10101308
10102d08
10060716
d2820002
04220702
100404f6
c200091a
bf8c007f
10061a00
d2820002
040e050c
c80c0300
c80d0301
d2820000
04460710
10000107
5e000102
f8001c0f
000f000f
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL IN[7]
DCL IN[8]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..10]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[8].xyzz, CONST[7].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MOV TEMP[3].w, IMM[0].yyyy
  8: MOV TEMP[3].xyz, IN[5].xyzx
  9: MOV TEMP[4].w, IMM[0].yyyy
 10: MOV TEMP[4].xyz, IN[8].xyzx
 11: MUL TEMP[5], CONST[0], TEMP[1].xxxx
 12: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[5]
 13: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 14: ADD TEMP[0], TEMP[0], CONST[3]
 15: MOV TEMP[2].w, IMM[0].xxxx
 16: MOV TEMP[2].xyz, CONST[4].xyzx
 17: DP4 TEMP[5].x, TEMP[3], IN[0]
 18: DP4 TEMP[6].x, TEMP[3], IN[1]
 19: MOV TEMP[5].y, TEMP[6].xxxx
 20: DP4 TEMP[3].x, TEMP[3], IN[2]
 21: MOV TEMP[5].z, TEMP[3].xxxx
 22: MUL TEMP[3].xyz, TEMP[5].xyzz, CONST[6].wwww
 23: DP4 TEMP[5].x, TEMP[4], IN[0]
 24: DP4 TEMP[6].x, TEMP[4], IN[1]
 25: MOV TEMP[5].y, TEMP[6].xxxx
 26: DP4 TEMP[4].x, TEMP[4], IN[2]
 27: MOV TEMP[5].z, TEMP[4].xxxx
 28: MUL TEMP[4].xyz, TEMP[5].xyzz, CONST[6].wwww
 29: MUL TEMP[2], TEMP[2], IN[3]
 30: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[10].xyzz
 31: MAD TEMP[5].x, TEMP[0].zzzz, CONST[9].xxxx, CONST[9].yyyy
 32: MOV TEMP[1].w, TEMP[5].xxxx
 33: MAD TEMP[5].xy, IN[6].xyyy, CONST[5].xyyy, CONST[5].zwww
 34: MOV TEMP[5].zw, TEMP[3].yyxy
 35: MOV TEMP[6].x, TEMP[3].zzzz
 36: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 37: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 38: MOV TEMP[6].yzw, TEMP[3].yxyz
 39: MOV TEMP[3].xyz, TEMP[4].xyzx
 40: MOV OUT[1], TEMP[2]
 41: MOV OUT[2], IN[7]
 42: MOV OUT[4], TEMP[5]
 43: MOV OUT[6], TEMP[3]
 44: MOV OUT[5], TEMP[6]
 45: MOV OUT[3], TEMP[1]
 46: MOV OUT[0], TEMP[0]
 47: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = extractelement <4 x float> %82, i32 2
  %86 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %87 = load <16 x i8> addrspace(2)* %86, !tbaa !0
  %88 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %5)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = getelementptr <16 x i8> addrspace(2)* %3, i32 7
  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
  %93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %92, i32 0, i32 %5)
  %94 = extractelement <4 x float> %93, i32 0
  %95 = extractelement <4 x float> %93, i32 1
  %96 = extractelement <4 x float> %93, i32 2
  %97 = extractelement <4 x float> %93, i32 3
  %98 = getelementptr <16 x i8> addrspace(2)* %3, i32 8
  %99 = load <16 x i8> addrspace(2)* %98, !tbaa !0
  %100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %99, i32 0, i32 %5)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = fmul float %77, %38
  %105 = fadd float %104, %35
  %106 = fmul float %78, %39
  %107 = fadd float %106, %36
  %108 = fmul float %79, %40
  %109 = fadd float %108, %37
  %110 = fmul float %105, %49
  %111 = fmul float %107, %50
  %112 = fadd float %110, %111
  %113 = fmul float %109, %51
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %52
  %116 = fadd float %114, %115
  %117 = fmul float %105, %56
  %118 = fmul float %107, %57
  %119 = fadd float %117, %118
  %120 = fmul float %109, %58
  %121 = fadd float %119, %120
  %122 = fmul float 1.000000e+00, %59
  %123 = fadd float %121, %122
  %124 = fmul float %105, %63
  %125 = fmul float %107, %64
  %126 = fadd float %124, %125
  %127 = fmul float %109, %65
  %128 = fadd float %126, %127
  %129 = fmul float 1.000000e+00, %66
  %130 = fadd float %128, %129
  %131 = fmul float %11, %116
  %132 = fmul float %12, %116
  %133 = fmul float %13, %116
  %134 = fmul float %14, %116
  %135 = fmul float %15, %123
  %136 = fadd float %135, %131
  %137 = fmul float %16, %123
  %138 = fadd float %137, %132
  %139 = fmul float %17, %123
  %140 = fadd float %139, %133
  %141 = fmul float %18, %123
  %142 = fadd float %141, %134
  %143 = fmul float %19, %130
  %144 = fadd float %143, %136
  %145 = fmul float %20, %130
  %146 = fadd float %145, %138
  %147 = fmul float %21, %130
  %148 = fadd float %147, %140
  %149 = fmul float %22, %130
  %150 = fadd float %149, %142
  %151 = fadd float %144, %23
  %152 = fadd float %146, %24
  %153 = fadd float %148, %25
  %154 = fadd float %150, %26
  %155 = fmul float %83, %49
  %156 = fmul float %84, %50
  %157 = fadd float %155, %156
  %158 = fmul float %85, %51
  %159 = fadd float %157, %158
  %160 = fmul float 0.000000e+00, %52
  %161 = fadd float %159, %160
  %162 = fmul float %83, %56
  %163 = fmul float %84, %57
  %164 = fadd float %162, %163
  %165 = fmul float %85, %58
  %166 = fadd float %164, %165
  %167 = fmul float 0.000000e+00, %59
  %168 = fadd float %166, %167
  %169 = fmul float %83, %63
  %170 = fmul float %84, %64
  %171 = fadd float %169, %170
  %172 = fmul float %85, %65
  %173 = fadd float %171, %172
  %174 = fmul float 0.000000e+00, %66
  %175 = fadd float %173, %174
  %176 = fmul float %161, %34
  %177 = fmul float %168, %34
  %178 = fmul float %175, %34
  %179 = fmul float %101, %49
  %180 = fmul float %102, %50
  %181 = fadd float %179, %180
  %182 = fmul float %103, %51
  %183 = fadd float %181, %182
  %184 = fmul float 0.000000e+00, %52
  %185 = fadd float %183, %184
  %186 = fmul float %101, %56
  %187 = fmul float %102, %57
  %188 = fadd float %186, %187
  %189 = fmul float %103, %58
  %190 = fadd float %188, %189
  %191 = fmul float 0.000000e+00, %59
  %192 = fadd float %190, %191
  %193 = fmul float %101, %63
  %194 = fmul float %102, %64
  %195 = fadd float %193, %194
  %196 = fmul float %103, %65
  %197 = fadd float %195, %196
  %198 = fmul float 0.000000e+00, %66
  %199 = fadd float %197, %198
  %200 = fmul float %185, %34
  %201 = fmul float %192, %34
  %202 = fmul float %199, %34
  %203 = fmul float %27, %70
  %204 = fmul float %28, %71
  %205 = fmul float %29, %72
  %206 = fmul float 1.000000e+00, %73
  %207 = fsub float -0.000000e+00, %43
  %208 = fadd float %116, %207
  %209 = fsub float -0.000000e+00, %44
  %210 = fadd float %123, %209
  %211 = fsub float -0.000000e+00, %45
  %212 = fadd float %130, %211
  %213 = fmul float %153, %41
  %214 = fadd float %213, %42
  %215 = fmul float %89, %30
  %216 = fadd float %215, %32
  %217 = fmul float %90, %31
  %218 = fadd float %217, %33
  %219 = fmul float %202, %177
  %220 = fmul float %200, %178
  %221 = fmul float %201, %176
  %222 = fsub float -0.000000e+00, %219
  %223 = fmul float %201, %178
  %224 = fadd float %223, %222
  %225 = fsub float -0.000000e+00, %220
  %226 = fmul float %202, %176
  %227 = fadd float %226, %225
  %228 = fsub float -0.000000e+00, %221
  %229 = fmul float %200, %177
  %230 = fadd float %229, %228
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %203, float %204, float %205, float %206)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %94, float %95, float %96, float %97)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %208, float %210, float %212, float %214)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %216, float %218, float %176, float %177)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %178, float %224, float %227, float %230)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %200, float %201, float %202, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %151, float %152, float %153, float %154)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c084071c
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840710
bf8c000f
e00c2000
80020900
c2020121
c202811d
bf8c0070
7e020205
d2820003
0404090a
c0840704
bf8c007f
e00c2000
80020500
bf8c0770
10020d03
c2020120
c202811c
bf8c007f
7e040205
d2820004
04080909
d2820001
04060b04
c2020122
c202811e
bf8c007f
7e040205
d2820011
0408090b
d2820001
04060f11
06021101
c0840700
bf8c007f
e00c2000
80020900
bf8c0770
10041503
d2820002
040a1304
d2820002
040a1711
06041902
c2020102
bf8c007f
101a0404
c2020106
bf8c007f
d2820012
04360204
c0840708
bf8c007f
e00c2000
80020d00
bf8c0770
10061d03
d2820003
040e1b04
d2820003
040e1f11
06062103
c202010a
bf8c007f
d2820004
044a0604
c202010e
bf8c007f
06080804
c2020124
c2028125
bf8c007f
7e220205
d2820011
04440904
c202012a
bf8c007f
0a240604
c2020129
bf8c007f
0a260204
c2020128
bf8c007f
0a280404
f800022f
11121314
c0840714
bf8c000f
e00c2000
80021200
bf8c0770
10220d13
d2820011
04460b12
d2820011
04460f14
d2820011
04450108
c202011b
bf8c007f
10222204
102c1513
d2820016
045a1312
d2820016
045a1714
d2820016
0459010c
102c2c04
c0840718
bf8c007f
e00c2000
80021700
c2028115
c2040117
bf8c0070
7e360208
d282001b
046c0b18
c2028114
c2040116
bf8c007f
7e380208
d2820017
04700b17
f800023f
11161b17
c0840720
bf8c000f
e00c2000
80021700
bf8c0770
10000d18
d2820000
04020b17
d2820000
04020f19
d2820000
04010108
10000004
100c2d00
100a1518
d2820005
04161317
d2820005
04161719
d2820005
0415010c
100a0a04
100e2305
080c0d07
100e1d13
d2820007
041e1b12
d2820007
041e1f14
d2820007
041d0110
100e0e04
10120f05
10101d18
d2820008
04221b17
d2820008
04221f19
d2820008
04210110
10101004
10142d08
0812130a
10142308
10160f00
0814150b
f800024f
06090a07
bf8c070f
7e0c0280
f800025f
06080005
c2020103
bf8c000f
10000404
c2020107
bf8c007f
d2820000
04020204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[11..12]
DCL CONST[3..10]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV TEMP[1].xy, IN[4].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[1].xyz, CONST[5].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: MOV TEMP[3].xy, IN[2].xyyy
  8: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D
  9: LRP TEMP[3].xyz, CONST[6].wwww, TEMP[3].xyzz, IMM[0].wwww
 10: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz
 11: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 12: DP4 TEMP[1].x, TEMP[2], CONST[9]
 13: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[7].yyyy
 14: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 15: MUL TEMP[1], TEMP[2], TEMP[1]
 16: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[3].xyyy
 17: MOV TEMP[3].xy, TEMP[3].xyyy
 18: TEX TEMP[3], TEMP[3], SAMP[2], 2D
 19: DP4 TEMP[2].x, TEMP[2], CONST[8]
 20: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[7].xxxx
 21: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 22: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 23: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 24: RCP TEMP[4].x, TEMP[4].xxxx
 25: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 26: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 27: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 28: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 29: MAX TEMP[2].x, IN[3].wwww, CONST[4].wwww
 30: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 31: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[4].xyzz
 32: MAD TEMP[2].x, TEMP[1].wwww, CONST[5].yyyy, CONST[5].zzzz
 33: SLT TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz
 34: F2I TEMP[2].x, -TEMP[2]
 35: UIF TEMP[2].xxxx :2
 36:   KILL
 37: ENDIF
 38: MOV OUT[0], TEMP[1]
 39: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %44 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %45 = load <32 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %58 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %59 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %65 = fmul float %13, %42
  %66 = fadd float %65, %43
  %67 = bitcast float %63 to i32
  %68 = bitcast float %64 to i32
  %69 = insertelement <2 x i32> undef, i32 %67, i32 0
  %70 = insertelement <2 x i32> %69, i32 %68, i32 1
  %71 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %70, <32 x i8> %45, <16 x i8> %47, i32 2)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = extractelement <4 x float> %71, i32 3
  %76 = fmul float %72, 0x3FCB367A00000000
  %77 = fmul float %73, 0x3FE6E2EB20000000
  %78 = fadd float %77, %76
  %79 = fmul float %74, 0x3FB27BB300000000
  %80 = fadd float %78, %79
  %81 = call float @llvm.AMDGPU.lrp(float %28, float %80, float %72)
  %82 = call float @llvm.AMDGPU.lrp(float %28, float %80, float %73)
  %83 = call float @llvm.AMDGPU.lrp(float %28, float %80, float %74)
  %84 = bitcast float %60 to i32
  %85 = bitcast float %61 to i32
  %86 = insertelement <2 x i32> undef, i32 %84, i32 0
  %87 = insertelement <2 x i32> %86, i32 %85, i32 1
  %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %49, <16 x i8> %51, i32 2)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = extractelement <4 x float> %88, i32 2
  %92 = call float @llvm.AMDGPU.lrp(float %31, float %89, float 1.000000e+00)
  %93 = call float @llvm.AMDGPU.lrp(float %31, float %90, float 1.000000e+00)
  %94 = call float @llvm.AMDGPU.lrp(float %31, float %91, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float %92, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float %93, float 0.000000e+00, float 1.000000e+00)
  %97 = call float @llvm.AMDIL.clamp.(float %94, float 0.000000e+00, float 1.000000e+00)
  %98 = fmul float %81, %95
  %99 = fmul float %82, %96
  %100 = fmul float %83, %97
  %101 = fmul float %98, %38
  %102 = fmul float %99, %39
  %103 = fadd float %101, %102
  %104 = fmul float %100, %40
  %105 = fadd float %103, %104
  %106 = fmul float %75, %41
  %107 = fadd float %105, %106
  %108 = fadd float %107, %33
  %109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00)
  %110 = call float @llvm.AMDGPU.lrp(float %109, float %56, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %109, float %57, float 1.000000e+00)
  %112 = call float @llvm.AMDGPU.lrp(float %109, float %58, float 1.000000e+00)
  %113 = call float @llvm.AMDGPU.lrp(float %109, float %59, float 1.000000e+00)
  %114 = fmul float %98, %110
  %115 = fmul float %99, %111
  %116 = fmul float %100, %112
  %117 = fmul float %75, %113
  %118 = fmul float %12, %22
  %119 = fmul float %66, %23
  %120 = bitcast float %118 to i32
  %121 = bitcast float %119 to i32
  %122 = insertelement <2 x i32> undef, i32 %120, i32 0
  %123 = insertelement <2 x i32> %122, i32 %121, i32 1
  %124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %123, <32 x i8> %53, <16 x i8> %55, i32 2)
  %125 = extractelement <4 x float> %124, i32 0
  %126 = extractelement <4 x float> %124, i32 1
  %127 = extractelement <4 x float> %124, i32 2
  %128 = extractelement <4 x float> %124, i32 3
  %129 = fmul float %98, %34
  %130 = fmul float %99, %35
  %131 = fadd float %129, %130
  %132 = fmul float %100, %36
  %133 = fadd float %131, %132
  %134 = fmul float %75, %37
  %135 = fadd float %133, %134
  %136 = fadd float %135, %32
  %137 = call float @llvm.AMDIL.clamp.(float %136, float 0.000000e+00, float 1.000000e+00)
  %138 = fmul float %137, %128
  %139 = fmul float %125, 0x3FCB367A00000000
  %140 = fmul float %126, 0x3FE6E2EB20000000
  %141 = fadd float %140, %139
  %142 = fmul float %127, 0x3FB27BB300000000
  %143 = fadd float %141, %142
  %144 = fcmp uge float %143, 0x3F50624DE0000000
  %145 = select i1 %144, float %143, float 0x3F50624DE0000000
  %146 = fdiv float 1.000000e+00, %145
  %147 = fmul float %125, %146
  %148 = fmul float %126, %146
  %149 = fmul float %127, %146
  %150 = fmul float %114, %125
  %151 = fmul float %115, %126
  %152 = fmul float %116, %127
  %153 = fmul float %138, %147
  %154 = fadd float %153, %150
  %155 = fmul float %138, %148
  %156 = fadd float %155, %151
  %157 = fmul float %138, %149
  %158 = fadd float %157, %152
  %159 = fmul float %154, 4.000000e+00
  %160 = fmul float %156, 4.000000e+00
  %161 = fmul float %158, 4.000000e+00
  %162 = fcmp uge float %62, %27
  %163 = select i1 %162, float %62, float %27
  %164 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00)
  %165 = call float @llvm.AMDGPU.lrp(float %164, float %159, float %24)
  %166 = call float @llvm.AMDGPU.lrp(float %164, float %160, float %25)
  %167 = call float @llvm.AMDGPU.lrp(float %164, float %161, float %26)
  %168 = fmul float %117, %29
  %169 = fadd float %168, %30
  %170 = fcmp ult float %169, 0.000000e+00
  %171 = select i1 %170, float 1.000000e+00, float 0.000000e+00
  %172 = fsub float -0.000000e+00, %171
  %173 = fptosi float %172 to i32
  %174 = bitcast i32 %173 to float
  %175 = bitcast float %174 to i32
  %176 = icmp ne i32 %175, 0
  br i1 %176, label %IF, label %ENDIF

IF:                                               ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF

ENDIF:                                            ; preds = %main_body, %IF
  %177 = call i32 @llvm.SI.packf16(float %165, float %166)
  %178 = bitcast i32 %177 to float
  %179 = call i32 @llvm.SI.packf16(float %167, float %117)
  %180 = bitcast i32 %179 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %178, float %180, float %178, float %180)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140d00
c8150d01
c8100c00
c8110c01
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1402ff
3f371759
d2820008
04221505
7e1802ff
3d93dd98
d2820009
04221906
c0840100
bf8c007f
c2000914
bf8c007f
d208000b
020000f2
1010090b
d2820008
04221200
c8380500
c8390501
c8340400
c8350401
c0860304
c0c80508
bf8c007f
f0800700
00640e0d
c200891b
bf8c0070
d2080011
020002f2
d282000d
04461c01
d206080d
0201010d
10101b08
101a0b0b
d282000d
04361200
d2820012
04461e01
d2060812
02010112
101a250d
c2038925
bf8c007f
10241a07
c2038924
bf8c007f
d2820013
04480f08
10160d0b
d2820009
042e1200
d282000b
04462001
d206080b
0201010b
10241709
c2000926
bf8c007f
d2820009
044c0112
c2000927
bf8c007f
d2820009
04240107
c200091d
bf8c007f
06121200
d2060809
02010109
081612f2
c8380200
c8390201
d282000e
042e1d09
10261d12
c2000930
c2008931
bf8c007f
7e1c0201
d2820003
04380103
c200090d
bf8c007f
101e0600
c200090c
bf8c007f
101c0400
c0800308
c0c60510
bf8c007f
f0800f00
00030e0e
bf8c0770
10042113
c2000921
bf8c007f
10061a00
c2000920
bf8c007f
d2820003
040c0108
c2000922
bf8c007f
d2820003
040c0112
c2000923
bf8c007f
d2820003
040c0107
c200091c
bf8c007f
06060600
d2060803
02010103
10062303
10241cff
3e59b3d0
d282000a
044a150f
d282000a
042a1910
7e1802ff
3a83126f
d00c0000
0202190a
d200000a
0002150c
7e14550a
10181510
d2820002
040a1903
100404f6
c8300b00
c8310b01
c2000913
bf8c007f
d00c0002
0200010c
7e240200
d200000c
000a1912
d2060812
0201010c
082624f2
c2000912
bf8c007f
10182600
d2820002
04320512
c8300100
c8310101
d282000c
042e1909
1018190d
10181f0c
101a150f
d282000c
04321b03
101818f6
c2000911
bf8c007f
101a2600
d282000c
04361912
c8340000
c8350001
d282000d
042e1b09
10101b08
10101d08
1014150e
d2820003
04221503
100606f6
c2000910
bf8c007f
10102600
d2820003
04220712
c8200300
c8210301
d2820000
042e1109
10000107
c2000915
c2008916
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000102
5e021903
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MAD TEMP[0].xyz, IN[4].xyzz, CONST[7].xyzz, CONST[6].xyzz
  2: DP4 TEMP[1].x, TEMP[0], IN[0]
  3: DP4 TEMP[2].x, TEMP[0], IN[1]
  4: MOV TEMP[1].y, TEMP[2].xxxx
  5: DP4 TEMP[0].x, TEMP[0], IN[2]
  6: MOV TEMP[1].z, TEMP[0].xxxx
  7: MUL TEMP[3], CONST[0], TEMP[1].xxxx
  8: MAD TEMP[2], CONST[1], TEMP[2].xxxx, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 10: ADD TEMP[0], TEMP[0], CONST[3]
 11: MOV TEMP[2].w, IMM[0].xxxx
 12: MOV TEMP[2].xyz, CONST[4].xyzx
 13: MUL TEMP[2], TEMP[2], IN[3]
 14: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[9].xyzz
 15: MAD TEMP[3].x, TEMP[0].zzzz, CONST[8].xxxx, CONST[8].yyyy
 16: MOV TEMP[1].w, TEMP[3].xxxx
 17: MAD TEMP[3].xy, IN[5].xyyy, CONST[5].xyyy, CONST[5].zwww
 18: MOV OUT[4], TEMP[3]
 19: MOV OUT[1], TEMP[2]
 20: MOV OUT[2], IN[6]
 21: MOV OUT[3], TEMP[1]
 22: MOV OUT[0], TEMP[0]
 23: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %5)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = extractelement <4 x float> %47, i32 2
  %51 = extractelement <4 x float> %47, i32 3
  %52 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %5)
  %55 = extractelement <4 x float> %54, i32 0
  %56 = extractelement <4 x float> %54, i32 1
  %57 = extractelement <4 x float> %54, i32 2
  %58 = extractelement <4 x float> %54, i32 3
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = extractelement <4 x float> %68, i32 2
  %72 = extractelement <4 x float> %68, i32 3
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = extractelement <4 x float> %75, i32 2
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0
  %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %5)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = extractelement <4 x float> %86, i32 2
  %90 = extractelement <4 x float> %86, i32 3
  %91 = fmul float %76, %37
  %92 = fadd float %91, %34
  %93 = fmul float %77, %38
  %94 = fadd float %93, %35
  %95 = fmul float %78, %39
  %96 = fadd float %95, %36
  %97 = fmul float %92, %48
  %98 = fmul float %94, %49
  %99 = fadd float %97, %98
  %100 = fmul float %96, %50
  %101 = fadd float %99, %100
  %102 = fmul float 1.000000e+00, %51
  %103 = fadd float %101, %102
  %104 = fmul float %92, %55
  %105 = fmul float %94, %56
  %106 = fadd float %104, %105
  %107 = fmul float %96, %57
  %108 = fadd float %106, %107
  %109 = fmul float 1.000000e+00, %58
  %110 = fadd float %108, %109
  %111 = fmul float %92, %62
  %112 = fmul float %94, %63
  %113 = fadd float %111, %112
  %114 = fmul float %96, %64
  %115 = fadd float %113, %114
  %116 = fmul float 1.000000e+00, %65
  %117 = fadd float %115, %116
  %118 = fmul float %11, %103
  %119 = fmul float %12, %103
  %120 = fmul float %13, %103
  %121 = fmul float %14, %103
  %122 = fmul float %15, %110
  %123 = fadd float %122, %118
  %124 = fmul float %16, %110
  %125 = fadd float %124, %119
  %126 = fmul float %17, %110
  %127 = fadd float %126, %120
  %128 = fmul float %18, %110
  %129 = fadd float %128, %121
  %130 = fmul float %19, %117
  %131 = fadd float %130, %123
  %132 = fmul float %20, %117
  %133 = fadd float %132, %125
  %134 = fmul float %21, %117
  %135 = fadd float %134, %127
  %136 = fmul float %22, %117
  %137 = fadd float %136, %129
  %138 = fadd float %131, %23
  %139 = fadd float %133, %24
  %140 = fadd float %135, %25
  %141 = fadd float %137, %26
  %142 = fmul float %27, %69
  %143 = fmul float %28, %70
  %144 = fmul float %29, %71
  %145 = fmul float 1.000000e+00, %72
  %146 = fsub float -0.000000e+00, %42
  %147 = fadd float %103, %146
  %148 = fsub float -0.000000e+00, %43
  %149 = fadd float %110, %148
  %150 = fsub float -0.000000e+00, %44
  %151 = fadd float %117, %150
  %152 = fmul float %140, %40
  %153 = fadd float %152, %41
  %154 = fmul float %82, %30
  %155 = fadd float %154, %32
  %156 = fmul float %83, %31
  %157 = fadd float %156, %33
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %142, float %143, float %144, float %145)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %87, float %88, float %89, float %90)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %147, float %149, float %151, float %153)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %155, float %157, float %120, float %121)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %138, float %139, float %140, float %141)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c084070c
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020112
bf8c007f
100a0604
c2020111
bf8c007f
100c0404
c2020110
bf8c007f
100e0204
f800020f
04050607
c0840718
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840710
bf8c000f
e00c2000
80020500
c202011d
c2028119
bf8c0070
7e020205
d2820003
04040906
c0840704
bf8c007f
e00c2000
80020900
bf8c0770
10021503
c202011c
c2028118
bf8c007f
7e040205
d2820004
04080905
d2820001
04061304
c202011e
c202811a
bf8c007f
7e040205
d2820006
04080907
d2820001
04061706
06021901
c0840700
bf8c007f
e00c2000
80020700
bf8c0770
10041103
d2820002
040a0f04
d2820002
040a1306
06041502
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820007
04160204
c0840708
bf8c007f
e00c2000
80020800
bf8c0770
10061303
d2820003
040e1104
d2820003
040e1506
06061703
c202010a
bf8c007f
d2820004
041e0604
c202010e
bf8c007f
06080804
c2020120
c2028121
bf8c007f
7e0c0205
d2820006
04180904
c2020126
bf8c007f
0a0e0604
c2020125
bf8c007f
0a100204
c2020124
bf8c007f
0a120404
f800022f
06070809
c2020103
bf8c000f
100c0404
c0820714
bf8c007f
e00c2000
80010700
c2020115
c2028117
bf8c0070
7e000205
d2820000
04000908
c2020114
c2028116
bf8c007f
7e160205
d2820007
042c0907
f800023f
06050007
c2020107
bf8c000f
d2820000
041a0204
c202010b
bf8c007f
d2820000
04020604
c202010f
bf8c007f
06000004
c2020101
bf8c007f
100a0404
c2020105
bf8c007f
d2820005
04160204
c2020109
bf8c007f
d2820005
04160604
c202010d
bf8c007f
060a0a04
c2020100
bf8c007f
10040404
c2020104
bf8c007f
d2820001
040a0204
c2020108
bf8c007f
d2820001
04060604
c200010c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MAD TEMP[2].x, TEMP[1].wwww, CONST[4].yyyy, CONST[4].zzzz
 28: SLT TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz
 29: F2I TEMP[2].x, -TEMP[2]
 30: UIF TEMP[2].xxxx :2
 31:   KILL
 32: ENDIF
 33: MOV OUT[0], TEMP[1]
 34: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %43 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %44 = load <32 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %48 = load <32 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %58 = fmul float %13, %41
  %59 = fadd float %58, %42
  %60 = bitcast float %56 to i32
  %61 = bitcast float %57 to i32
  %62 = insertelement <2 x i32> undef, i32 %60, i32 0
  %63 = insertelement <2 x i32> %62, i32 %61, i32 1
  %64 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %44, <16 x i8> %46, i32 2)
  %65 = extractelement <4 x float> %64, i32 0
  %66 = extractelement <4 x float> %64, i32 1
  %67 = extractelement <4 x float> %64, i32 2
  %68 = extractelement <4 x float> %64, i32 3
  %69 = fmul float %65, 0x3FCB367A00000000
  %70 = fmul float %66, 0x3FE6E2EB20000000
  %71 = fadd float %70, %69
  %72 = fmul float %67, 0x3FB27BB300000000
  %73 = fadd float %71, %72
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %65)
  %75 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %66)
  %76 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %67)
  %77 = fmul float %74, %37
  %78 = fmul float %75, %38
  %79 = fadd float %77, %78
  %80 = fmul float %76, %39
  %81 = fadd float %79, %80
  %82 = fmul float %68, %40
  %83 = fadd float %81, %82
  %84 = fadd float %83, %32
  %85 = call float @llvm.AMDIL.clamp.(float %84, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %85, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %85, float %52, float 1.000000e+00)
  %88 = call float @llvm.AMDGPU.lrp(float %85, float %53, float 1.000000e+00)
  %89 = call float @llvm.AMDGPU.lrp(float %85, float %54, float 1.000000e+00)
  %90 = fmul float %74, %86
  %91 = fmul float %75, %87
  %92 = fmul float %76, %88
  %93 = fmul float %68, %89
  %94 = fmul float %12, %22
  %95 = fmul float %59, %23
  %96 = bitcast float %94 to i32
  %97 = bitcast float %95 to i32
  %98 = insertelement <2 x i32> undef, i32 %96, i32 0
  %99 = insertelement <2 x i32> %98, i32 %97, i32 1
  %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %48, <16 x i8> %50, i32 2)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = extractelement <4 x float> %100, i32 3
  %105 = fmul float %74, %33
  %106 = fmul float %75, %34
  %107 = fadd float %105, %106
  %108 = fmul float %76, %35
  %109 = fadd float %107, %108
  %110 = fmul float %68, %36
  %111 = fadd float %109, %110
  %112 = fadd float %111, %31
  %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00)
  %114 = fmul float %113, %104
  %115 = fmul float %101, 0x3FCB367A00000000
  %116 = fmul float %102, 0x3FE6E2EB20000000
  %117 = fadd float %116, %115
  %118 = fmul float %103, 0x3FB27BB300000000
  %119 = fadd float %117, %118
  %120 = fcmp uge float %119, 0x3F50624DE0000000
  %121 = select i1 %120, float %119, float 0x3F50624DE0000000
  %122 = fdiv float 1.000000e+00, %121
  %123 = fmul float %101, %122
  %124 = fmul float %102, %122
  %125 = fmul float %103, %122
  %126 = fmul float %90, %101
  %127 = fmul float %91, %102
  %128 = fmul float %92, %103
  %129 = fmul float %114, %123
  %130 = fadd float %129, %126
  %131 = fmul float %114, %124
  %132 = fadd float %131, %127
  %133 = fmul float %114, %125
  %134 = fadd float %133, %128
  %135 = fmul float %130, 4.000000e+00
  %136 = fmul float %132, 4.000000e+00
  %137 = fmul float %134, 4.000000e+00
  %138 = fcmp uge float %55, %27
  %139 = select i1 %138, float %55, float %27
  %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
  %141 = call float @llvm.AMDGPU.lrp(float %140, float %135, float %24)
  %142 = call float @llvm.AMDGPU.lrp(float %140, float %136, float %25)
  %143 = call float @llvm.AMDGPU.lrp(float %140, float %137, float %26)
  %144 = fmul float %93, %29
  %145 = fadd float %144, %30
  %146 = fcmp ult float %145, 0.000000e+00
  %147 = select i1 %146, float 1.000000e+00, float 0.000000e+00
  %148 = fsub float -0.000000e+00, %147
  %149 = fptosi float %148 to i32
  %150 = bitcast i32 %149 to float
  %151 = bitcast float %150 to i32
  %152 = icmp ne i32 %151, 0
  br i1 %152, label %IF, label %ENDIF

IF:                                               ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF

ENDIF:                                            ; preds = %main_body, %IF
  %153 = call i32 @llvm.SI.packf16(float %141, float %142)
  %154 = bitcast i32 %153 to float
  %155 = call i32 @llvm.SI.packf16(float %143, float %93)
  %156 = bitcast i32 %155 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %154, float %156, float %154, float %156)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1802ff
3f371759
d2820008
04221905
7e1a02ff
3d93dd98
d2820009
04221b06
c0840100
bf8c007f
c2000910
bf8c007f
d208000a
020000f2
1010090a
d2820008
04221200
10160b0a
d282000b
042e1200
c2008921
bf8c007f
101c1601
c2008920
bf8c007f
d282000e
04380308
10140d0a
d2820012
042a1200
c2000922
bf8c007f
d2820009
04380112
c2000923
bf8c007f
d2820009
04240107
c2000919
bf8c007f
06121200
d2060809
02010109
081412f2
c8380200
c8390201
d282000e
042a1d09
10261d12
c200092c
c200892d
bf8c007f
7e1c0201
d2820003
04380103
c2000909
bf8c007f
101e0600
c2000908
bf8c007f
101c0400
c0800304
c0c60508
bf8c007f
f0800f00
00030e0e
bf8c0770
10042113
c200091d
bf8c007f
10061600
c200091c
bf8c007f
d2820003
040c0108
c200091e
bf8c007f
d2820003
040c0112
c200091f
bf8c007f
d2820003
040c0107
c2000918
bf8c007f
06060600
d2060803
02010103
10062303
10241cff
3e59b3d0
d282000c
044a190f
d282000c
04321b10
7e1a02ff
3a83126f
d00c0000
02021b0c
d200000c
0002190d
7e18550c
101a1910
d2820002
040a1b03
100404f6
c8340700
c8350701
c200090f
bf8c007f
d00c0002
0200010d
7e240200
d200000d
000a1b12
d206080d
0201010d
08241af2
c200090e
bf8c007f
10262400
d2820002
044e050d
c84c0100
c84d0101
d2820013
042a2709
1016270b
10161f0b
1026190f
d282000b
042e2703
101616f6
c200090d
bf8c007f
10262400
d282000b
044e170d
c84c0000
c84d0001
d2820013
042a2709
10102708
10101d08
1018190e
d2820003
04221903
100606f6
c200090c
bf8c007f
10102400
d2820003
0422070d
c8200300
c8210301
d2820000
042a1109
10000107
c2000911
c2008912
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000102
5e021703
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..205]
DCL TEMP[0..6], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: UMUL TEMP[2].x, IMM[1].xxxx, TEMP[0].wwww
  4: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].zzzz
  5: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].yyyy
  6: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].xxxx
  7: UARL ADDR[0].x, TEMP[5].xxxx
  8: MUL TEMP[5], CONST[ADDR[0].x+14], IN[1].xxxx
  9: UARL ADDR[0].x, TEMP[4].xxxx
 10: MAD TEMP[4], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[5]
 11: UARL ADDR[0].x, TEMP[3].xxxx
 12: MAD TEMP[3], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[4]
 13: UARL ADDR[0].x, TEMP[2].xxxx
 14: MAD TEMP[2], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[3]
 15: DP4 TEMP[2].x, TEMP[1], TEMP[2]
 16: UMAD TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 17: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 18: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 19: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 20: UARL ADDR[0].x, TEMP[6].xxxx
 21: MUL TEMP[6], CONST[ADDR[0].x+14], IN[1].xxxx
 22: UARL ADDR[0].x, TEMP[5].xxxx
 23: MAD TEMP[5], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[6]
 24: UARL ADDR[0].x, TEMP[4].xxxx
 25: MAD TEMP[4], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[5]
 26: UARL ADDR[0].x, TEMP[3].xxxx
 27: MAD TEMP[3], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[4]
 28: DP4 TEMP[3].x, TEMP[1], TEMP[3]
 29: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 30: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 31: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 32: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 33: UARL ADDR[0].x, TEMP[0].xxxx
 34: MUL TEMP[0], CONST[ADDR[0].x+14], IN[1].xxxx
 35: UARL ADDR[0].x, TEMP[6].xxxx
 36: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].yyyy, TEMP[0]
 37: UARL ADDR[0].x, TEMP[5].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].zzzz, TEMP[0]
 39: UARL ADDR[0].x, TEMP[4].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+14], IN[1].wwww, TEMP[0]
 41: DP4 TEMP[0].x, TEMP[1], TEMP[0]
 42: MUL TEMP[1], CONST[4], TEMP[2].xxxx
 43: MAD TEMP[1], CONST[5], TEMP[3].xxxx, TEMP[1]
 44: MAD TEMP[1], CONST[6], TEMP[0].xxxx, TEMP[1]
 45: ADD TEMP[1], TEMP[1], CONST[7]
 46: MOV TEMP[4].w, IMM[0].xxxx
 47: MOV TEMP[4].xyz, CONST[8].xyzx
 48: MUL TEMP[2], CONST[0], TEMP[2].xxxx
 49: MAD TEMP[2], CONST[1], TEMP[3].xxxx, TEMP[2]
 50: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 51: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 52: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 53: MAD TEMP[2].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 54: MOV TEMP[0].w, TEMP[2].xxxx
 55: MAD TEMP[2].xy, IN[3].xyyy, CONST[9].xyyy, CONST[9].zwww
 56: MOV OUT[3], TEMP[2]
 57: MOV OUT[1], TEMP[4]
 58: MOV OUT[2], TEMP[0]
 59: MOV OUT[0], TEMP[1]
 60: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = extractelement <4 x float> %67, i32 2
  %71 = extractelement <4 x float> %67, i32 3
  %72 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %5)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = extractelement <4 x float> %74, i32 1
  %77 = extractelement <4 x float> %74, i32 2
  %78 = extractelement <4 x float> %74, i32 3
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = fptosi float %75 to i32
  %85 = fptosi float %76 to i32
  %86 = fptosi float %77 to i32
  %87 = fptosi float %78 to i32
  %88 = bitcast i32 %84 to float
  %89 = bitcast i32 %85 to float
  %90 = bitcast i32 %86 to float
  %91 = bitcast i32 %87 to float
  %92 = fmul float %62, %51
  %93 = fadd float %92, %48
  %94 = fmul float %63, %52
  %95 = fadd float %94, %49
  %96 = fmul float %64, %53
  %97 = fadd float %96, %50
  %98 = bitcast float %91 to i32
  %99 = mul i32 3, %98
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %90 to i32
  %102 = mul i32 3, %101
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %89 to i32
  %105 = mul i32 3, %104
  %106 = bitcast i32 %105 to float
  %107 = bitcast float %88 to i32
  %108 = mul i32 3, %107
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = shl i32 %110, 4
  %112 = add i32 %111, 224
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %68
  %115 = shl i32 %110, 4
  %116 = add i32 %115, 228
  %117 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %116)
  %118 = fmul float %117, %68
  %119 = shl i32 %110, 4
  %120 = add i32 %119, 232
  %121 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %120)
  %122 = fmul float %121, %68
  %123 = shl i32 %110, 4
  %124 = add i32 %123, 236
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %68
  %127 = bitcast float %106 to i32
  %128 = shl i32 %127, 4
  %129 = add i32 %128, 224
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %69
  %132 = fadd float %131, %114
  %133 = shl i32 %127, 4
  %134 = add i32 %133, 228
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %69
  %137 = fadd float %136, %118
  %138 = shl i32 %127, 4
  %139 = add i32 %138, 232
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %69
  %142 = fadd float %141, %122
  %143 = shl i32 %127, 4
  %144 = add i32 %143, 236
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %69
  %147 = fadd float %146, %126
  %148 = bitcast float %103 to i32
  %149 = shl i32 %148, 4
  %150 = add i32 %149, 224
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %70
  %153 = fadd float %152, %132
  %154 = shl i32 %148, 4
  %155 = add i32 %154, 228
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %70
  %158 = fadd float %157, %137
  %159 = shl i32 %148, 4
  %160 = add i32 %159, 232
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %70
  %163 = fadd float %162, %142
  %164 = shl i32 %148, 4
  %165 = add i32 %164, 236
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %70
  %168 = fadd float %167, %147
  %169 = bitcast float %100 to i32
  %170 = shl i32 %169, 4
  %171 = add i32 %170, 224
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %71
  %174 = fadd float %173, %153
  %175 = shl i32 %169, 4
  %176 = add i32 %175, 228
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %71
  %179 = fadd float %178, %158
  %180 = shl i32 %169, 4
  %181 = add i32 %180, 232
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %71
  %184 = fadd float %183, %163
  %185 = shl i32 %169, 4
  %186 = add i32 %185, 236
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %71
  %189 = fadd float %188, %168
  %190 = fmul float %93, %174
  %191 = fmul float %95, %179
  %192 = fadd float %190, %191
  %193 = fmul float %97, %184
  %194 = fadd float %192, %193
  %195 = fmul float 1.000000e+00, %189
  %196 = fadd float %194, %195
  %197 = bitcast float %91 to i32
  %198 = mul i32 3, %197
  %199 = add i32 %198, 1
  %200 = bitcast i32 %199 to float
  %201 = bitcast float %90 to i32
  %202 = mul i32 3, %201
  %203 = add i32 %202, 1
  %204 = bitcast i32 %203 to float
  %205 = bitcast float %89 to i32
  %206 = mul i32 3, %205
  %207 = add i32 %206, 1
  %208 = bitcast i32 %207 to float
  %209 = bitcast float %88 to i32
  %210 = mul i32 3, %209
  %211 = add i32 %210, 1
  %212 = bitcast i32 %211 to float
  %213 = bitcast float %212 to i32
  %214 = shl i32 %213, 4
  %215 = add i32 %214, 224
  %216 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %215)
  %217 = fmul float %216, %68
  %218 = shl i32 %213, 4
  %219 = add i32 %218, 228
  %220 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %219)
  %221 = fmul float %220, %68
  %222 = shl i32 %213, 4
  %223 = add i32 %222, 232
  %224 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %223)
  %225 = fmul float %224, %68
  %226 = shl i32 %213, 4
  %227 = add i32 %226, 236
  %228 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %227)
  %229 = fmul float %228, %68
  %230 = bitcast float %208 to i32
  %231 = shl i32 %230, 4
  %232 = add i32 %231, 224
  %233 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %232)
  %234 = fmul float %233, %69
  %235 = fadd float %234, %217
  %236 = shl i32 %230, 4
  %237 = add i32 %236, 228
  %238 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %237)
  %239 = fmul float %238, %69
  %240 = fadd float %239, %221
  %241 = shl i32 %230, 4
  %242 = add i32 %241, 232
  %243 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %242)
  %244 = fmul float %243, %69
  %245 = fadd float %244, %225
  %246 = shl i32 %230, 4
  %247 = add i32 %246, 236
  %248 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %247)
  %249 = fmul float %248, %69
  %250 = fadd float %249, %229
  %251 = bitcast float %204 to i32
  %252 = shl i32 %251, 4
  %253 = add i32 %252, 224
  %254 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %253)
  %255 = fmul float %254, %70
  %256 = fadd float %255, %235
  %257 = shl i32 %251, 4
  %258 = add i32 %257, 228
  %259 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %258)
  %260 = fmul float %259, %70
  %261 = fadd float %260, %240
  %262 = shl i32 %251, 4
  %263 = add i32 %262, 232
  %264 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %263)
  %265 = fmul float %264, %70
  %266 = fadd float %265, %245
  %267 = shl i32 %251, 4
  %268 = add i32 %267, 236
  %269 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %268)
  %270 = fmul float %269, %70
  %271 = fadd float %270, %250
  %272 = bitcast float %200 to i32
  %273 = shl i32 %272, 4
  %274 = add i32 %273, 224
  %275 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %274)
  %276 = fmul float %275, %71
  %277 = fadd float %276, %256
  %278 = shl i32 %272, 4
  %279 = add i32 %278, 228
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = fmul float %280, %71
  %282 = fadd float %281, %261
  %283 = shl i32 %272, 4
  %284 = add i32 %283, 232
  %285 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %284)
  %286 = fmul float %285, %71
  %287 = fadd float %286, %266
  %288 = shl i32 %272, 4
  %289 = add i32 %288, 236
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = fmul float %290, %71
  %292 = fadd float %291, %271
  %293 = fmul float %93, %277
  %294 = fmul float %95, %282
  %295 = fadd float %293, %294
  %296 = fmul float %97, %287
  %297 = fadd float %295, %296
  %298 = fmul float 1.000000e+00, %292
  %299 = fadd float %297, %298
  %300 = bitcast float %91 to i32
  %301 = mul i32 3, %300
  %302 = add i32 %301, 2
  %303 = bitcast i32 %302 to float
  %304 = bitcast float %90 to i32
  %305 = mul i32 3, %304
  %306 = add i32 %305, 2
  %307 = bitcast i32 %306 to float
  %308 = bitcast float %89 to i32
  %309 = mul i32 3, %308
  %310 = add i32 %309, 2
  %311 = bitcast i32 %310 to float
  %312 = bitcast float %88 to i32
  %313 = mul i32 3, %312
  %314 = add i32 %313, 2
  %315 = bitcast i32 %314 to float
  %316 = bitcast float %315 to i32
  %317 = shl i32 %316, 4
  %318 = add i32 %317, 224
  %319 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %318)
  %320 = fmul float %319, %68
  %321 = shl i32 %316, 4
  %322 = add i32 %321, 228
  %323 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %322)
  %324 = fmul float %323, %68
  %325 = shl i32 %316, 4
  %326 = add i32 %325, 232
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %68
  %329 = shl i32 %316, 4
  %330 = add i32 %329, 236
  %331 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %330)
  %332 = fmul float %331, %68
  %333 = bitcast float %311 to i32
  %334 = shl i32 %333, 4
  %335 = add i32 %334, 224
  %336 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %335)
  %337 = fmul float %336, %69
  %338 = fadd float %337, %320
  %339 = shl i32 %333, 4
  %340 = add i32 %339, 228
  %341 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %340)
  %342 = fmul float %341, %69
  %343 = fadd float %342, %324
  %344 = shl i32 %333, 4
  %345 = add i32 %344, 232
  %346 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %345)
  %347 = fmul float %346, %69
  %348 = fadd float %347, %328
  %349 = shl i32 %333, 4
  %350 = add i32 %349, 236
  %351 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %350)
  %352 = fmul float %351, %69
  %353 = fadd float %352, %332
  %354 = bitcast float %307 to i32
  %355 = shl i32 %354, 4
  %356 = add i32 %355, 224
  %357 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %356)
  %358 = fmul float %357, %70
  %359 = fadd float %358, %338
  %360 = shl i32 %354, 4
  %361 = add i32 %360, 228
  %362 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %361)
  %363 = fmul float %362, %70
  %364 = fadd float %363, %343
  %365 = shl i32 %354, 4
  %366 = add i32 %365, 232
  %367 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %366)
  %368 = fmul float %367, %70
  %369 = fadd float %368, %348
  %370 = shl i32 %354, 4
  %371 = add i32 %370, 236
  %372 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %371)
  %373 = fmul float %372, %70
  %374 = fadd float %373, %353
  %375 = bitcast float %303 to i32
  %376 = shl i32 %375, 4
  %377 = add i32 %376, 224
  %378 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %377)
  %379 = fmul float %378, %71
  %380 = fadd float %379, %359
  %381 = shl i32 %375, 4
  %382 = add i32 %381, 228
  %383 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %382)
  %384 = fmul float %383, %71
  %385 = fadd float %384, %364
  %386 = shl i32 %375, 4
  %387 = add i32 %386, 232
  %388 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %387)
  %389 = fmul float %388, %71
  %390 = fadd float %389, %369
  %391 = shl i32 %375, 4
  %392 = add i32 %391, 236
  %393 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %392)
  %394 = fmul float %393, %71
  %395 = fadd float %394, %374
  %396 = fmul float %93, %380
  %397 = fmul float %95, %385
  %398 = fadd float %396, %397
  %399 = fmul float %97, %390
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %395
  %402 = fadd float %400, %401
  %403 = fmul float %25, %196
  %404 = fmul float %26, %196
  %405 = fmul float %27, %196
  %406 = fmul float %28, %196
  %407 = fmul float %29, %299
  %408 = fadd float %407, %403
  %409 = fmul float %30, %299
  %410 = fadd float %409, %404
  %411 = fmul float %31, %299
  %412 = fadd float %411, %405
  %413 = fmul float %32, %299
  %414 = fadd float %413, %406
  %415 = fmul float %33, %402
  %416 = fadd float %415, %408
  %417 = fmul float %34, %402
  %418 = fadd float %417, %410
  %419 = fmul float %35, %402
  %420 = fadd float %419, %412
  %421 = fmul float %36, %402
  %422 = fadd float %421, %414
  %423 = fadd float %416, %37
  %424 = fadd float %418, %38
  %425 = fadd float %420, %39
  %426 = fadd float %422, %40
  %427 = fmul float %11, %196
  %428 = fmul float %12, %196
  %429 = fmul float %13, %196
  %430 = fmul float %14, %196
  %431 = fmul float %15, %299
  %432 = fadd float %431, %427
  %433 = fmul float %16, %299
  %434 = fadd float %433, %428
  %435 = fmul float %17, %299
  %436 = fadd float %435, %429
  %437 = fmul float %18, %299
  %438 = fadd float %437, %430
  %439 = fmul float %19, %402
  %440 = fadd float %439, %432
  %441 = fmul float %20, %402
  %442 = fadd float %441, %434
  %443 = fmul float %21, %402
  %444 = fadd float %443, %436
  %445 = fadd float %440, %22
  %446 = fadd float %442, %23
  %447 = fadd float %444, %24
  %448 = fsub float -0.000000e+00, %56
  %449 = fadd float %445, %448
  %450 = fsub float -0.000000e+00, %57
  %451 = fadd float %446, %450
  %452 = fsub float -0.000000e+00, %58
  %453 = fadd float %447, %452
  %454 = fmul float %425, %54
  %455 = fadd float %454, %55
  %456 = fmul float %82, %44
  %457 = fadd float %456, %46
  %458 = fmul float %83, %45
  %459 = fadd float %458, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %449, float %451, float %453, float %455)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %457, float %459, float %436, float %438)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %423, float %424, float %425, float %426)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020f00
bf8c0770
7e02110f
d2d60006
02010701
340a0c84
4a020aff
000000e0
e0301000
80000701
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
10100307
7e0e1110
d2d60007
02010707
34140e84
4a1214ff
000000e0
e0301000
80000909
bf8c0770
d2820009
04220509
7e101111
d2d60008
02010708
341c1084
4a161cff
000000e0
e0301000
80000b0b
bf8c0770
d282000b
0426070b
7e121112
d2d60009
02010709
341e1284
4a181eff
000000e0
e0301000
80000c0c
bf8c0770
d282000d
042e090c
4a160aff
000000e4
e0301000
80000b0b
bf8c0770
1016030b
4a1814ff
000000e4
e0301000
80000c0c
bf8c0770
d282000b
042e050c
4a181cff
000000e4
e0301000
80000c0c
bf8c0770
d282000b
042e070c
4a181eff
000000e4
e0301000
80000c0c
bf8c0770
d282000c
042e090c
c0840700
bf8c007f
e00c2000
80021000
c202012d
c2028129
bf8c0070
7e160205
d282000b
042c0911
1028190b
c202012c
c2028128
bf8c007f
7e180205
d282000c
04300910
d2820014
04521b0c
4a1a0aff
000000e8
e0301000
80000d0d
bf8c0770
101a030d
4a2a14ff
000000e8
e0301000
80001515
bf8c0770
d282000d
04360515
4a2a1cff
000000e8
e0301000
80001515
bf8c0770
d282000d
04360715
4a2a1eff
000000e8
e0301000
80001515
bf8c0770
d2820015
04360915
c202012e
c202812a
bf8c007f
7e1a0205
d282000d
04340912
d2820010
04522b0d
4a0a0aff
000000ec
e0301000
80000505
bf8c0770
100a0305
4a1414ff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416050a
4a141cff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416070a
4a141eff
000000ec
e0301000
80000a0a
bf8c0770
d2820005
0416090a
060a0b10
c2020112
bf8c007f
101c0a04
4a140c81
34141484
4a1e14ff
000000e0
e0301000
80000f0f
bf8c0770
1020030f
4a1e0e81
341e1e84
4a221eff
000000e0
e0301000
80001111
bf8c0770
d2820011
04420511
4a201081
34202084
4a2420ff
000000e0
e0301000
80001212
bf8c0770
d2820012
04460712
4a221281
34222284
4a2622ff
000000e0
e0301000
80001313
bf8c0770
d2820012
044a0913
4a2614ff
000000e4
e0301000
80001313
bf8c0770
10260313
4a281eff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
000000e4
e0301000
80001414
bf8c0770
d2820013
044e0914
1026270b
d2820012
044e250c
4a2614ff
000000e8
e0301000
80001313
bf8c0770
10260313
4a281eff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
000000e8
e0301000
80001414
bf8c0770
d2820013
044e0914
d2820012
044a270d
4a1414ff
000000ec
e0301000
80000a0a
bf8c0770
1014030a
4a1e1eff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a050f
4a1e20ff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a070f
4a1e22ff
000000ec
e0301000
80000f0f
bf8c0770
d282000a
042a090f
06141512
c2020116
bf8c007f
d282000e
043a1404
4a0c0c82
340c0c84
4a1e0cff
000000e0
e0301000
80000f0f
bf8c0770
101e030f
4a0e0e82
340e0e84
4a200eff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0510
4a101082
34101084
4a2010ff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0710
4a121282
34121284
4a2012ff
000000e0
e0301000
80001010
bf8c0770
d282000f
043e0910
4a200cff
000000e4
e0301000
80001010
bf8c0770
10200310
4a220eff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420511
4a2210ff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420711
4a2212ff
000000e4
e0301000
80001111
bf8c0770
d2820010
04420911
1016210b
d282000b
042e1f0c
4a180cff
000000e8
e0301000
80000c0c
bf8c0770
1018030c
4a1e0eff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432050f
4a1e10ff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432070f
4a1e12ff
000000e8
e0301000
80000f0f
bf8c0770
d282000c
0432090f
d282000b
042e190d
4a0c0cff
000000ec
e0301000
80000606
bf8c0770
100c0306
4a0e0eff
000000ec
e0301000
80000707
bf8c0770
d2820006
041a0507
4a0e10ff
000000ec
e0301000
80000707
bf8c0770
d2820006
041a0707
4a0e12ff
000000ec
e0301000
80000707
bf8c0770
d2820001
041a0907
0602030b
c202011a
bf8c007f
d2820002
043a0204
c202011e
bf8c007f
06040404
c2020130
c2028131
bf8c007f
7e060205
d2820004
040c0902
c2020102
bf8c007f
10060a04
c2020106
bf8c007f
d2820003
040e1404
c202010a
bf8c007f
d2820006
040e0204
c202010e
bf8c007f
060c0c04
c2020136
bf8c007f
0a0c0c04
c2020101
bf8c007f
100e0a04
c2020105
bf8c007f
d2820007
041e1404
c2020109
bf8c007f
d2820007
041e0204
c202010d
bf8c007f
060e0e04
c2020135
bf8c007f
0a0e0e04
c2020100
bf8c007f
10100a04
c2020104
bf8c007f
d2820008
04221404
c2020108
bf8c007f
d2820008
04220204
c202010c
bf8c007f
06101004
c2020134
bf8c007f
0a101004
f800021f
04060708
c2020103
bf8c000f
10080a04
c2020107
bf8c007f
d2820004
04121404
c082070c
bf8c007f
e00c2000
80010600
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000907
c2020124
c2028126
bf8c007f
7e160205
d2820006
042c0906
f800022f
04030006
c2020113
bf8c000f
10000a04
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020204
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10060a04
c2020115
bf8c007f
d2820003
040e1404
c2020119
bf8c007f
d2820003
040e0204
c202011d
bf8c007f
06060604
c2020110
bf8c007f
10080a04
c2020114
bf8c007f
d2820004
04121404
c2020118
bf8c007f
d2820001
04120204
c200011c
bf8c007f
06020200
f80008cf
00020301
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..10]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[8].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MOV OUT[1], TEMP[1]
 13: MOV OUT[0], TEMP[0]
 14: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = fmul float %46, %40
  %56 = fadd float %55, %37
  %57 = fmul float %47, %41
  %58 = fadd float %57, %38
  %59 = fmul float %48, %42
  %60 = fadd float %59, %39
  %61 = fmul float %20, %56
  %62 = fmul float %21, %56
  %63 = fmul float %22, %56
  %64 = fmul float %23, %56
  %65 = fmul float %24, %58
  %66 = fadd float %65, %61
  %67 = fmul float %25, %58
  %68 = fadd float %67, %62
  %69 = fmul float %26, %58
  %70 = fadd float %69, %63
  %71 = fmul float %27, %58
  %72 = fadd float %71, %64
  %73 = fmul float %28, %60
  %74 = fadd float %73, %66
  %75 = fmul float %29, %60
  %76 = fadd float %75, %68
  %77 = fmul float %30, %60
  %78 = fadd float %77, %70
  %79 = fmul float %31, %60
  %80 = fadd float %79, %72
  %81 = fadd float %74, %32
  %82 = fadd float %76, %33
  %83 = fadd float %78, %34
  %84 = fadd float %80, %35
  %85 = fmul float %52, %36
  %86 = fmul float %53, %36
  %87 = fmul float %54, %36
  %88 = fmul float %11, %85
  %89 = fmul float %12, %85
  %90 = fmul float %13, %85
  %91 = fmul float %14, %86
  %92 = fadd float %91, %88
  %93 = fmul float %15, %86
  %94 = fadd float %93, %89
  %95 = fmul float %16, %86
  %96 = fadd float %95, %90
  %97 = fmul float %17, %87
  %98 = fadd float %97, %92
  %99 = fmul float %18, %87
  %100 = fadd float %99, %94
  %101 = fmul float %19, %87
  %102 = fadd float %101, %96
  %103 = fmul float %98, %98
  %104 = fmul float %100, %100
  %105 = fadd float %104, %103
  %106 = fmul float %102, %102
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %98, %108
  %110 = fmul float %100, %108
  %111 = fmul float %102, %108
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %111, float %72)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020123
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820001
04060804
d2820002
041a0301
7e045b02
100c0501
100e0503
10100505
c0820700
bf8c007f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020113
bf8c007f
10120204
c2020117
bf8c007f
d2820009
04260004
f800020f
09060708
c202012a
c2028126
bf8c000f
7e0c0205
d2820002
04180904
c202011b
bf8c007f
d2820003
04260404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[17..18]
DCL CONST[3..16]
DCL TEMP[0]
DCL TEMP[1..6], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     1.0000,     0.0000}
IMM[1] FLT32 { 1024.0000,     0.0100,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[18].xxxx, CONST[18].yyyy
  2: MAD TEMP[1], TEMP[0].xyxy, CONST[3], CONST[4]
  3: MOV TEMP[2].xy, TEMP[1].xyyy
  4: TEX TEMP[2], TEMP[2], SAMP[2], 2D
  5: MAD TEMP[3].xyz, TEMP[2].yzww, IMM[0].xxxx, IMM[0].yyyy
  6: MOV TEMP[4].xy, TEMP[3].xyxx
  7: DP2 TEMP[5].x, TEMP[3].xyyy, TEMP[3].xyyy
  8: ADD_SAT TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
  9: RSQ TEMP[6].x, TEMP[5].xxxx
 10: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 11: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 12: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[3].zzzz
 13: MOV TEMP[4].z, TEMP[5].xxxx
 14: MOV TEMP[6].z, IMM[0].zzzz
 15: MOV TEMP[6].xy, TEMP[1].zwzz
 16: MOV TEMP[1].xy, TEMP[1].xyyy
 17: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D
 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[5].zzzz, CONST[5].wwww
 19: RCP TEMP[1].x, TEMP[1].xxxx
 20: MUL TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx
 21: MUL TEMP[6], CONST[6], TEMP[3].xxxx
 22: MAD TEMP[3], CONST[7], TEMP[3].yyyy, TEMP[6]
 23: MAD TEMP[3].xyz, CONST[8], TEMP[5].xxxx, TEMP[3]
 24: MOV TEMP[3].xyz, TEMP[3].xyzz
 25: TEX TEMP[3].xyz, TEMP[3], SAMP[1], CUBE
 26: DP3 TEMP[5].x, TEMP[4].xyzz, CONST[10].xyzz
 27: ADD TEMP[5].x, TEMP[5].xxxx, CONST[12].wwww
 28: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz
 29: RSQ TEMP[6].x, TEMP[6].xxxx
 30: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[6].xxxx
 31: ADD TEMP[1].xyz, CONST[15].xyzz, -TEMP[1].xyzz
 32: DP3 TEMP[6].x, TEMP[1].xyzz, TEMP[1].xyzz
 33: RSQ TEMP[6].x, TEMP[6].xxxx
 34: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[6].xxxx
 35: DP3_SAT TEMP[1].x, TEMP[1].xyzz, TEMP[4].xyzz
 36: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
 37: POW TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 38: SGE TEMP[2].x, TEMP[5].xxxx, IMM[1].yyyy
 39: F2I TEMP[2].x, -TEMP[2]
 40: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz
 41: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 42: MOV_SAT TEMP[2].x, TEMP[5].xxxx
 43: LRP TEMP[2].xyz, TEMP[2].xxxx, CONST[11].xyzz, CONST[13].xyzz
 44: MOV_SAT TEMP[4].x, -TEMP[5].xxxx
 45: LRP TEMP[4].xyz, TEMP[4].xxxx, CONST[12].xyzz, CONST[13].xyzz
 46: SLT TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 47: F2I TEMP[5].x, -TEMP[5]
 48: UIF TEMP[5].xxxx :2
 49:   MOV TEMP[4].xyz, TEMP[4].xyzx
 50: ELSE :2
 51:   MOV TEMP[4].xyz, TEMP[2].xyzx
 52: ENDIF
 53: ADD TEMP[2].xyz, TEMP[4].xyzz, TEMP[3].xyzz
 54: MUL TEMP[1].x, CONST[14].wwww, TEMP[1].xxxx
 55: MOV TEMP[2].w, TEMP[1].xxxx
 56: MOV OUT[0], TEMP[2]
 57: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 288)
  %61 = call float @llvm.SI.load.const(<16 x i8> %21, i32 292)
  %62 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %63 = load <32 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %67 = load <32 x i8> addrspace(2)* %66, !tbaa !0
  %68 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0
  %70 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %71 = load <32 x i8> addrspace(2)* %70, !tbaa !0
  %72 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0
  %74 = fmul float %13, %60
  %75 = fadd float %74, %61
  %76 = fmul float %12, %22
  %77 = fadd float %76, %26
  %78 = fmul float %75, %23
  %79 = fadd float %78, %27
  %80 = fmul float %12, %24
  %81 = fadd float %80, %28
  %82 = fmul float %75, %25
  %83 = fadd float %82, %29
  %84 = bitcast float %77 to i32
  %85 = bitcast float %79 to i32
  %86 = insertelement <2 x i32> undef, i32 %84, i32 0
  %87 = insertelement <2 x i32> %86, i32 %85, i32 1
  %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %71, <16 x i8> %73, i32 2)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = extractelement <4 x float> %88, i32 2
  %92 = extractelement <4 x float> %88, i32 3
  %93 = fmul float %90, 2.000000e+00
  %94 = fadd float %93, -1.000000e+00
  %95 = fmul float %91, 2.000000e+00
  %96 = fadd float %95, -1.000000e+00
  %97 = fmul float %92, 2.000000e+00
  %98 = fadd float %97, -1.000000e+00
  %99 = fmul float %94, %94
  %100 = fmul float %96, %96
  %101 = fadd float %99, %100
  %102 = fsub float -0.000000e+00, %101
  %103 = fadd float 1.000000e+00, %102
  %104 = call float @llvm.AMDIL.clamp.(float %103, float 0.000000e+00, float 1.000000e+00)
  %105 = call float @llvm.AMDGPU.rsq(float %104)
  %106 = fmul float %105, %104
  %107 = fsub float -0.000000e+00, %104
  %108 = call float @llvm.AMDGPU.cndlt(float %107, float %106, float 0.000000e+00)
  %109 = fmul float %108, %98
  %110 = bitcast float %77 to i32
  %111 = bitcast float %79 to i32
  %112 = insertelement <2 x i32> undef, i32 %110, i32 0
  %113 = insertelement <2 x i32> %112, i32 %111, i32 1
  %114 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %63, <16 x i8> %65, i32 2)
  %115 = extractelement <4 x float> %114, i32 0
  %116 = fmul float %115, %30
  %117 = fadd float %116, %31
  %118 = fdiv float 1.000000e+00, %117
  %119 = fmul float %81, %118
  %120 = fmul float %83, %118
  %121 = fmul float 1.000000e+00, %118
  %122 = fmul float %32, %94
  %123 = fmul float %33, %94
  %124 = fmul float %34, %94
  %125 = fmul float %35, %94
  %126 = fmul float %36, %96
  %127 = fadd float %126, %122
  %128 = fmul float %37, %96
  %129 = fadd float %128, %123
  %130 = fmul float %38, %96
  %131 = fadd float %130, %124
  %132 = fmul float %39, %96
  %133 = fadd float %132, %125
  %134 = fmul float %40, %109
  %135 = fadd float %134, %127
  %136 = fmul float %41, %109
  %137 = fadd float %136, %129
  %138 = fmul float %42, %109
  %139 = fadd float %138, %131
  %140 = insertelement <4 x float> undef, float %135, i32 0
  %141 = insertelement <4 x float> %140, float %137, i32 1
  %142 = insertelement <4 x float> %141, float %139, i32 2
  %143 = insertelement <4 x float> %142, float %133, i32 3
  %144 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %143)
  %145 = extractelement <4 x float> %144, i32 0
  %146 = extractelement <4 x float> %144, i32 1
  %147 = extractelement <4 x float> %144, i32 2
  %148 = extractelement <4 x float> %144, i32 3
  %149 = call float @fabs(float %147)
  %150 = fdiv float 1.000000e+00, %149
  %151 = fmul float %145, %150
  %152 = fadd float %151, 1.500000e+00
  %153 = fmul float %146, %150
  %154 = fadd float %153, 1.500000e+00
  %155 = bitcast float %154 to i32
  %156 = bitcast float %152 to i32
  %157 = bitcast float %148 to i32
  %158 = insertelement <4 x i32> undef, i32 %155, i32 0
  %159 = insertelement <4 x i32> %158, i32 %156, i32 1
  %160 = insertelement <4 x i32> %159, i32 %157, i32 2
  %161 = insertelement <4 x i32> %160, i32 undef, i32 3
  %162 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %161, <32 x i8> %67, <16 x i8> %69, i32 4)
  %163 = extractelement <4 x float> %162, i32 0
  %164 = extractelement <4 x float> %162, i32 1
  %165 = extractelement <4 x float> %162, i32 2
  %166 = fmul float %94, %43
  %167 = fmul float %96, %44
  %168 = fadd float %167, %166
  %169 = fmul float %109, %45
  %170 = fadd float %168, %169
  %171 = fadd float %170, %52
  %172 = fmul float %119, %119
  %173 = fmul float %120, %120
  %174 = fadd float %173, %172
  %175 = fmul float %121, %121
  %176 = fadd float %174, %175
  %177 = call float @llvm.AMDGPU.rsq(float %176)
  %178 = fmul float %119, %177
  %179 = fmul float %120, %177
  %180 = fmul float %121, %177
  %181 = fsub float -0.000000e+00, %178
  %182 = fadd float %57, %181
  %183 = fsub float -0.000000e+00, %179
  %184 = fadd float %58, %183
  %185 = fsub float -0.000000e+00, %180
  %186 = fadd float %59, %185
  %187 = fmul float %182, %182
  %188 = fmul float %184, %184
  %189 = fadd float %188, %187
  %190 = fmul float %186, %186
  %191 = fadd float %189, %190
  %192 = call float @llvm.AMDGPU.rsq(float %191)
  %193 = fmul float %182, %192
  %194 = fmul float %184, %192
  %195 = fmul float %186, %192
  %196 = fmul float %193, %94
  %197 = fmul float %194, %96
  %198 = fadd float %197, %196
  %199 = fmul float %195, %109
  %200 = fadd float %198, %199
  %201 = call float @llvm.AMDIL.clamp.(float %200, float 0.000000e+00, float 1.000000e+00)
  %202 = fmul float %89, 1.024000e+03
  %203 = call float @llvm.pow.f32(float %201, float %202)
  %204 = fcmp uge float %171, 0x3F847AE140000000
  %205 = select i1 %204, float 1.000000e+00, float 0.000000e+00
  %206 = fsub float -0.000000e+00, %205
  %207 = fptosi float %206 to i32
  %208 = bitcast i32 %207 to float
  %209 = bitcast float %208 to i32
  %210 = and i32 %209, 1065353216
  %211 = bitcast i32 %210 to float
  %212 = fmul float %203, %211
  %213 = call float @llvm.AMDIL.clamp.(float %171, float 0.000000e+00, float 1.000000e+00)
  %214 = call float @llvm.AMDGPU.lrp(float %213, float %46, float %53)
  %215 = call float @llvm.AMDGPU.lrp(float %213, float %47, float %54)
  %216 = call float @llvm.AMDGPU.lrp(float %213, float %48, float %55)
  %217 = fsub float -0.000000e+00, %171
  %218 = call float @llvm.AMDIL.clamp.(float %217, float 0.000000e+00, float 1.000000e+00)
  %219 = call float @llvm.AMDGPU.lrp(float %218, float %49, float %53)
  %220 = call float @llvm.AMDGPU.lrp(float %218, float %50, float %54)
  %221 = call float @llvm.AMDGPU.lrp(float %218, float %51, float %55)
  %222 = fcmp ult float %171, 0.000000e+00
  %223 = select i1 %222, float 1.000000e+00, float 0.000000e+00
  %224 = fsub float -0.000000e+00, %223
  %225 = fptosi float %224 to i32
  %226 = bitcast i32 %225 to float
  %227 = bitcast float %226 to i32
  %228 = icmp ne i32 %227, 0
  %. = select i1 %228, float %219, float %214
  %.28 = select i1 %228, float %220, float %215
  %.29 = select i1 %228, float %221, float %216
  %229 = fadd float %., %163
  %230 = fadd float %.28, %164
  %231 = fadd float %.29, %165
  %232 = fmul float %56, %212
  %233 = call i32 @llvm.SI.packf16(float %229, float %230)
  %234 = bitcast i32 %233 to float
  %235 = call i32 @llvm.SI.packf16(float %231, float %232)
  %236 = bitcast i32 %235 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %234, float %236, float %234, float %236)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840100
bf8c007f
c2000948
c2008949
bf8c007f
7e000201
d2820007
04000103
c200090d
c2008911
bf8c007f
7e000201
d282000a
04000107
c200090c
c2008910
bf8c007f
7e000201
d2820009
04000102
c0860308
c0c80510
bf8c007f
f0800f00
00640309
bf8c0770
06000904
060000f3
06020b05
060202f3
10100301
d2820008
04220100
081010f2
d2060808
02010108
7e165b08
1016110b
d2060008
22010108
d0080000
02021080
d2000008
00021680
06160d06
061616f3
10101708
c2000919
bf8c007f
10160000
c200091d
bf8c007f
d282000b
042e0200
c2000921
bf8c007f
d282000c
042e1000
c2000918
bf8c007f
101e0000
c200091c
bf8c007f
d282000f
043e0200
c2000920
bf8c007f
d282000b
043e1000
c200091a
bf8c007f
101e0000
c200091e
bf8c007f
d282000f
043e0200
c2000922
bf8c007f
d282000d
043e1000
c200091b
bf8c007f
101e0000
c200091f
bf8c007f
d282000e
043e0200
d28a0010
0436190b
d28c000f
0436190b
d28e0011
0436190b
d2880012
0436190b
d206010b
02010111
7e16550b
7e1802ff
3fc00000
d2820011
0432170f
d2820010
04321710
c0860304
c0c80508
bf8c007f
f0800700
00640d10
c2000928
bf8c0070
10160000
c2000929
bf8c007f
d282000b
042c0101
c200092a
bf8c007f
d282000b
042c0108
c2000933
bf8c007f
06161600
d2060810
0201010b
082220f2
c2000935
bf8c007f
10182200
c200892d
bf8c007f
d282000c
04300310
d2060012
2201010b
d2060812
02010112
082624f2
10282600
c2000931
bf8c007f
d2820014
04500112
d0020000
0201010b
d2000015
0001e480
d2060015
22010115
7e2a1115
d10a0000
02010115
d200000c
0002290c
06181d0c
c2030934
bf8c007f
10282206
c203892c
bf8c007f
d2820014
04500f10
102a2606
c2030930
bf8c007f
d2820015
04540d12
d2000014
00022b14
06281b14
5e181914
c2030936
bf8c007f
10222206
c203892e
bf8c007f
d2820010
04440f10
10222606
c2030932
bf8c007f
d2820011
04440d12
d2000010
00022310
061a1f10
c0800300
c0c60500
bf8c007f
f0800100
00030909
c2000916
c2008917
bf8c0070
7e140201
d2820009
04280109
7e125509
c200090f
c2008913
bf8c007f
7e140201
d2820007
04280107
100e1307
c200090e
c2008912
bf8c007f
7e140201
d2820002
04280102
101c1302
10041d0e
d2820002
040a0f07
d2820002
040a1309
7e145b02
10041507
c200093d
bf8c007f
08040400
100e150e
c200093c
bf8c007f
080e0e00
101c0f07
d282000e
043a0502
10121509
c200093e
bf8c007f
08121200
d282000a
043a1309
7e145b0a
10041502
100e1507
10000107
d2820000
04020302
10021509
d2820000
04021101
d2060800
02010100
7e004f00
100206ff
44800000
0e000101
7e004b00
7e0202ff
3c23d70a
d00c0000
0202030b
d2000001
0001e480
d2060001
22010101
7e021101
360202f2
10000300
c200093b
bf8c007f
10000000
5e00010d
f8001c0f
000c000c
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL TEMP[0], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MOV OUT[0], TEMP[0]
  3: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %10, i32 0, i32 %5)
  %12 = extractelement <4 x float> %11, i32 0
  %13 = extractelement <4 x float> %11, i32 1
  %14 = extractelement <4 x float> %11, i32 2
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %12, float %13, float %14, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800700
bf8c007f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[10..11]
DCL CONST[3..9]
DCL TEMP[0..1]
DCL TEMP[2..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[4].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MAD TEMP[4].yw, IMM[0].zzzz, TEMP[4], IMM[0].xxxx
 13: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 14: RSQ TEMP[5].x, TEMP[5].xxxx
 15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 16: DP2 TEMP[5].x, TEMP[4].ywww, TEMP[4].ywww
 17: ADD TEMP[5].x, IMM[0].yyyy, -TEMP[5].xxxx
 18: MAX TEMP[5].x, IMM[0].wwww, TEMP[5].xxxx
 19: RSQ TEMP[6].x, TEMP[5].xxxx
 20: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 21: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 23: DP3 TEMP[5].x, IN[5].yzww, IN[5].yzww
 24: RSQ TEMP[5].x, TEMP[5].xxxx
 25: MUL TEMP[5].xyz, IN[5].yzww, TEMP[5].xxxx
 26: DP3 TEMP[6].x, IN[6].xyzz, IN[6].xyzz
 27: RSQ TEMP[6].x, TEMP[6].xxxx
 28: MUL TEMP[6].xyz, IN[6].xyzz, TEMP[6].xxxx
 29: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[4].wwww
 30: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].yyyy, TEMP[6].xyzz
 31: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 32: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 33: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 34: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 35: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 36: MOV TEMP[2].xyz, TEMP[2].xyzz
 37: TEX TEMP[2], TEMP[2], SAMP[1], CUBE
 38: DP4 TEMP[3].x, IMM[0].yyyy, CONST[8]
 39: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[6].yyyy
 40: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 41: MOV TEMP[4].w, TEMP[3].wwww
 42: MUL TEMP[5].xy, TEMP[0].xyyy, CONST[3].xyyy
 43: MOV TEMP[5].xy, TEMP[5].xyyy
 44: TEX TEMP[5], TEMP[5], SAMP[2], 2D
 45: DP4 TEMP[6].x, IMM[0].yyyy, CONST[7]
 46: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[6].xxxx
 47: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].wwww
 48: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[1].xyzz
 49: MAX TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
 50: RCP TEMP[7].x, TEMP[7].xxxx
 51: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 52: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz
 53: MAD TEMP[3].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[3].xyzz
 54: MUL TEMP[4].xyz, TEMP[3].xyzz, IMM[2].xxxx
 55: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 56: DP4 TEMP[3].x, IMM[0].yyyy, CONST[9]
 57: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[6].zzzz
 58: MAD TEMP[4].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 59: MAX TEMP[2].x, IN[3].wwww, CONST[4].wwww
 60: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 61: LRP TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz, CONST[4].xyzz
 62: MOV OUT[0], TEMP[4]
 63: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = fcmp ugt float %16, 0.000000e+00
  %58 = select i1 %57, float 1.000000e+00, float 0.000000e+00
  %59 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %66 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %67 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %68 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %78 = fmul float %13, %43
  %79 = fadd float %78, %44
  %80 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %81 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %82 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %83 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %84 = bitcast float %80 to i32
  %85 = icmp ne i32 %84, 0
  %. = select i1 %85, float -1.000000e+00, float 1.000000e+00
  %86 = bitcast float %67 to i32
  %87 = bitcast float %68 to i32
  %88 = insertelement <2 x i32> undef, i32 %86, i32 0
  %89 = insertelement <2 x i32> %88, i32 %87, i32 1
  %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %46, <16 x i8> %48, i32 2)
  %91 = extractelement <4 x float> %90, i32 1
  %92 = extractelement <4 x float> %90, i32 3
  %93 = fmul float 2.000000e+00, %91
  %94 = fadd float %93, -1.000000e+00
  %95 = fmul float 2.000000e+00, %92
  %96 = fadd float %95, -1.000000e+00
  %97 = fmul float %69, %69
  %98 = fmul float %70, %70
  %99 = fadd float %98, %97
  %100 = fmul float %71, %71
  %101 = fadd float %99, %100
  %102 = call float @llvm.AMDGPU.rsq(float %101)
  %103 = fmul float %69, %102
  %104 = fmul float %70, %102
  %105 = fmul float %71, %102
  %106 = fmul float %94, %94
  %107 = fmul float %96, %96
  %108 = fadd float %106, %107
  %109 = fsub float -0.000000e+00, %108
  %110 = fadd float 1.000000e+00, %109
  %111 = fcmp uge float 0.000000e+00, %110
  %112 = select i1 %111, float 0.000000e+00, float %110
  %113 = call float @llvm.AMDGPU.rsq(float %112)
  %114 = fmul float %113, %112
  %115 = fsub float -0.000000e+00, %112
  %116 = call float @llvm.AMDGPU.cndlt(float %115, float %114, float 0.000000e+00)
  %117 = fmul float %103, %116
  %118 = fmul float %104, %116
  %119 = fmul float %105, %116
  %120 = fmul float %72, %72
  %121 = fmul float %73, %73
  %122 = fadd float %121, %120
  %123 = fmul float %74, %74
  %124 = fadd float %122, %123
  %125 = call float @llvm.AMDGPU.rsq(float %124)
  %126 = fmul float %72, %125
  %127 = fmul float %73, %125
  %128 = fmul float %74, %125
  %129 = fmul float %75, %75
  %130 = fmul float %76, %76
  %131 = fadd float %130, %129
  %132 = fmul float %77, %77
  %133 = fadd float %131, %132
  %134 = call float @llvm.AMDGPU.rsq(float %133)
  %135 = fmul float %75, %134
  %136 = fmul float %76, %134
  %137 = fmul float %77, %134
  %138 = fmul float %135, %96
  %139 = fmul float %136, %96
  %140 = fmul float %137, %96
  %141 = fmul float %126, %94
  %142 = fadd float %141, %138
  %143 = fmul float %127, %94
  %144 = fadd float %143, %139
  %145 = fmul float %128, %94
  %146 = fadd float %145, %140
  %147 = fmul float %117, %.
  %148 = fadd float %147, %142
  %149 = fmul float %118, %.
  %150 = fadd float %149, %144
  %151 = fmul float %119, %.
  %152 = fadd float %151, %146
  %153 = fmul float %148, %63
  %154 = fmul float %150, %64
  %155 = fadd float %154, %153
  %156 = fmul float %152, %65
  %157 = fadd float %155, %156
  %158 = fmul float %157, %148
  %159 = fmul float %157, %150
  %160 = fmul float %157, %152
  %161 = fmul float 2.000000e+00, %158
  %162 = fmul float 2.000000e+00, %159
  %163 = fmul float 2.000000e+00, %160
  %164 = fsub float -0.000000e+00, %161
  %165 = fadd float %63, %164
  %166 = fsub float -0.000000e+00, %162
  %167 = fadd float %64, %166
  %168 = fsub float -0.000000e+00, %163
  %169 = fadd float %65, %168
  %170 = insertelement <4 x float> undef, float %165, i32 0
  %171 = insertelement <4 x float> %170, float %167, i32 1
  %172 = insertelement <4 x float> %171, float %169, i32 2
  %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 3
  %174 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %173)
  %175 = extractelement <4 x float> %174, i32 0
  %176 = extractelement <4 x float> %174, i32 1
  %177 = extractelement <4 x float> %174, i32 2
  %178 = extractelement <4 x float> %174, i32 3
  %179 = call float @fabs(float %177)
  %180 = fdiv float 1.000000e+00, %179
  %181 = fmul float %175, %180
  %182 = fadd float %181, 1.500000e+00
  %183 = fmul float %176, %180
  %184 = fadd float %183, 1.500000e+00
  %185 = bitcast float %184 to i32
  %186 = bitcast float %182 to i32
  %187 = bitcast float %178 to i32
  %188 = insertelement <4 x i32> undef, i32 %185, i32 0
  %189 = insertelement <4 x i32> %188, i32 %186, i32 1
  %190 = insertelement <4 x i32> %189, i32 %187, i32 2
  %191 = insertelement <4 x i32> %190, i32 undef, i32 3
  %192 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %191, <32 x i8> %50, <16 x i8> %52, i32 4)
  %193 = extractelement <4 x float> %192, i32 0
  %194 = extractelement <4 x float> %192, i32 1
  %195 = extractelement <4 x float> %192, i32 2
  %196 = extractelement <4 x float> %192, i32 3
  %197 = fmul float 1.000000e+00, %35
  %198 = fmul float 1.000000e+00, %36
  %199 = fadd float %197, %198
  %200 = fmul float 1.000000e+00, %37
  %201 = fadd float %199, %200
  %202 = fmul float 1.000000e+00, %38
  %203 = fadd float %201, %202
  %204 = fadd float %203, %29
  %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00)
  %206 = call float @llvm.AMDGPU.lrp(float %205, float %59, float 1.000000e+00)
  %207 = call float @llvm.AMDGPU.lrp(float %205, float %60, float 1.000000e+00)
  %208 = call float @llvm.AMDGPU.lrp(float %205, float %61, float 1.000000e+00)
  %209 = call float @llvm.AMDGPU.lrp(float %205, float %62, float 1.000000e+00)
  %210 = fmul float %12, %22
  %211 = fmul float %79, %23
  %212 = bitcast float %210 to i32
  %213 = bitcast float %211 to i32
  %214 = insertelement <2 x i32> undef, i32 %212, i32 0
  %215 = insertelement <2 x i32> %214, i32 %213, i32 1
  %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %54, <16 x i8> %56, i32 2)
  %217 = extractelement <4 x float> %216, i32 0
  %218 = extractelement <4 x float> %216, i32 1
  %219 = extractelement <4 x float> %216, i32 2
  %220 = extractelement <4 x float> %216, i32 3
  %221 = fmul float 1.000000e+00, %31
  %222 = fmul float 1.000000e+00, %32
  %223 = fadd float %221, %222
  %224 = fmul float 1.000000e+00, %33
  %225 = fadd float %223, %224
  %226 = fmul float 1.000000e+00, %34
  %227 = fadd float %225, %226
  %228 = fadd float %227, %28
  %229 = call float @llvm.AMDIL.clamp.(float %228, float 0.000000e+00, float 1.000000e+00)
  %230 = fmul float %229, %220
  %231 = fmul float %217, 0x3FCB367A00000000
  %232 = fmul float %218, 0x3FE6E2EB20000000
  %233 = fadd float %232, %231
  %234 = fmul float %219, 0x3FB27BB300000000
  %235 = fadd float %233, %234
  %236 = fcmp uge float %235, 0x3F50624DE0000000
  %237 = select i1 %236, float %235, float 0x3F50624DE0000000
  %238 = fdiv float 1.000000e+00, %237
  %239 = fmul float %217, %238
  %240 = fmul float %218, %238
  %241 = fmul float %219, %238
  %242 = fmul float %206, %217
  %243 = fmul float %207, %218
  %244 = fmul float %208, %219
  %245 = fmul float %230, %239
  %246 = fadd float %245, %242
  %247 = fmul float %230, %240
  %248 = fadd float %247, %243
  %249 = fmul float %230, %241
  %250 = fadd float %249, %244
  %251 = fmul float %246, 4.000000e+00
  %252 = fmul float %248, 4.000000e+00
  %253 = fmul float %250, 4.000000e+00
  %254 = fmul float %193, %196
  %255 = fmul float %194, %196
  %256 = fmul float %195, %196
  %257 = fmul float 1.000000e+00, %39
  %258 = fmul float 1.000000e+00, %40
  %259 = fadd float %257, %258
  %260 = fmul float 1.000000e+00, %41
  %261 = fadd float %259, %260
  %262 = fmul float 1.000000e+00, %42
  %263 = fadd float %261, %262
  %264 = fadd float %263, %30
  %265 = call float @llvm.AMDIL.clamp.(float %264, float 0.000000e+00, float 1.000000e+00)
  %266 = fmul float %254, %265
  %267 = fadd float %266, %251
  %268 = fmul float %255, %265
  %269 = fadd float %268, %252
  %270 = fmul float %256, %265
  %271 = fadd float %270, %253
  %272 = fcmp uge float %66, %27
  %273 = select i1 %272, float %66, float %27
  %274 = call float @llvm.AMDIL.clamp.(float %273, float 0.000000e+00, float 1.000000e+00)
  %275 = call float @llvm.AMDGPU.lrp(float %274, float %267, float %24)
  %276 = call float @llvm.AMDGPU.lrp(float %274, float %269, float %25)
  %277 = call float @llvm.AMDGPU.lrp(float %274, float %271, float %26)
  %278 = call i32 @llvm.SI.packf16(float %275, float %276)
  %279 = bitcast i32 %278 to float
  %280 = call i32 @llvm.SI.packf16(float %277, float %209)
  %281 = bitcast i32 %280 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %279, float %281, float %279, float %281)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c8140800
c8150801
c0840300
c0c60500
bf8c007f
f0800a00
00430605
bf8c0770
060a0d06
060a0af3
060c0f07
060c0cf3
100e0d06
d2820007
041e0b05
080e0ef2
d0060008
02010107
d2000007
00210107
7e105b07
10100f08
d2060007
22010107
d0080008
02020e80
d2000007
00221080
c82c0b00
c82d0b01
c8280a00
c8290a01
1010150a
d2820009
0422170b
c8200c00
c8210c01
d2820009
04261108
7e125b09
1016130b
101a0f0b
c83c1100
c83d1101
c8381000
c8391001
10161d0e
d282000c
042e1f0f
c82c1200
c82d1201
d282000c
0432170b
7e185b0c
101e190f
10220d0f
c84c0e00
c84d0e01
c8480d00
c8490d01
101e2512
d2820010
043e2713
c83c0f00
c83d0f01
d2820010
04421f0f
7e205b10
10262113
d2820013
04460b13
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000011
0021e6f2
d2820004
044e230d
1014130a
10140f0a
101a190e
101a0d0d
101c2112
d282000d
04360b0e
d282000a
0436230a
c8340400
c8350401
10241b0a
c8380500
c8390501
d2820012
044a1d04
10101308
100e0f08
1010190b
100c0d08
1010210f
d2820005
041a0b08
d2820005
04162307
c8180600
c8190601
d2820007
044a0d05
10100907
d2820004
04220907
081e090e
10081507
d2820004
04121507
081c090d
10080b07
d2820004
04120b07
08200906
7e220280
d28a0005
04421f0e
d28c0004
04421f0e
d28e0006
04421f0e
d2880007
04421f0e
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840304
c0c60508
bf8c007f
f0800f00
00430405
bf8c0770
101a0f05
c0840100
bf8c007f
c200092c
c200892d
bf8c007f
7e100201
d2820003
04200103
c200090d
bf8c007f
10120600
c200090c
bf8c007f
10100400
c0800308
c0c60510
bf8c007f
f0800f00
00030808
bf8c0770
100410ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0709
7e0602ff
3d93dd98
d2820002
040a070a
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e185502
101e1909
c8380100
c8390101
c2000920
c2008921
bf8c007f
7e040201
d2060002
02020400
c2000922
bf8c007f
06040400
c2000923
bf8c007f
06040400
c2000919
bf8c007f
06040400
d2060802
02010102
080604f2
d282000e
040e1d02
1020130e
c200091c
c200891d
bf8c007f
7e1c0201
d206000e
02021c00
c200091e
bf8c007f
061c1c00
c200091f
bf8c007f
061c1c00
c2000918
bf8c007f
061c1c00
d206080e
0201010e
101c170e
d282000f
04421f0e
10201ef6
c2000924
c2008925
bf8c007f
7e1e0201
d206000f
02021e00
c2000926
bf8c007f
061e1e00
c2000927
bf8c007f
061e1e00
c200091a
bf8c007f
061e1e00
d206080f
0201010f
d2820011
04421f0d
c8340700
c8350701
c2000913
bf8c007f
d00c0002
0200010d
7e200200
d200000d
000a1b10
d206080d
0201010d
08201af2
c2000911
bf8c007f
10242000
d2820011
044a230d
10240f04
10261908
c8500000
c8510001
d2820014
040e2902
10281114
d2820013
0452270e
102626f6
d2820012
044e1f12
c2000910
bf8c007f
10262000
d2820012
044e250d
5e222312
10080f06
100a190a
c8180200
c8190201
d2820006
040e0d02
100c1506
d2820005
041a0b0e
100a0af6
d2820004
04161f04
c2000912
bf8c007f
100a2000
d2820004
0416090d
c8140300
c8150301
d2820000
040e0b02
5e000104
f8001c0f
00110011
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL CONST[0..14]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3].xyz, IN[1].xyzz, CONST[10].wwww
  8: MUL TEMP[4], CONST[0], TEMP[3].xxxx
  9: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 10: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 11: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 14: MUL TEMP[4].xyz, IN[3].xyzz, CONST[10].wwww
 15: MUL TEMP[5], CONST[0], TEMP[4].xxxx
 16: MAD TEMP[5], CONST[1], TEMP[4].yyyy, TEMP[5]
 17: MAD TEMP[4].xyz, CONST[2], TEMP[4].zzzz, TEMP[5]
 18: MUL TEMP[5], CONST[0], TEMP[0].xxxx
 19: MAD TEMP[5], CONST[1], TEMP[0].yyyy, TEMP[5]
 20: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[5]
 21: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 22: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[14].xyzz
 23: MAD TEMP[5].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 24: MOV TEMP[0].w, TEMP[5].xxxx
 25: MAD TEMP[5].xy, IN[2].xyyy, CONST[9].xyyy, CONST[9].zwww
 26: MOV TEMP[5].zw, TEMP[3].yyxy
 27: MOV TEMP[6].x, TEMP[3].zzzz
 28: MUL TEMP[7].xyz, TEMP[4].zxyy, TEMP[3].yzxx
 29: MAD TEMP[3].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[7].xyzz
 30: MOV TEMP[6].yzw, TEMP[3].yxyz
 31: MOV TEMP[3].xyz, TEMP[4].xyzx
 32: MOV OUT[1], TEMP[2]
 33: MOV OUT[3], TEMP[5]
 34: MOV OUT[5], TEMP[3]
 35: MOV OUT[4], TEMP[6]
 36: MOV OUT[2], TEMP[0]
 37: MOV OUT[0], TEMP[1]
 38: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
  %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %5)
  %78 = extractelement <4 x float> %77, i32 0
  %79 = extractelement <4 x float> %77, i32 1
  %80 = extractelement <4 x float> %77, i32 2
  %81 = fmul float %61, %50
  %82 = fadd float %81, %47
  %83 = fmul float %62, %51
  %84 = fadd float %83, %48
  %85 = fmul float %63, %52
  %86 = fadd float %85, %49
  %87 = fmul float %23, %82
  %88 = fmul float %24, %82
  %89 = fmul float %25, %82
  %90 = fmul float %26, %82
  %91 = fmul float %27, %84
  %92 = fadd float %91, %87
  %93 = fmul float %28, %84
  %94 = fadd float %93, %88
  %95 = fmul float %29, %84
  %96 = fadd float %95, %89
  %97 = fmul float %30, %84
  %98 = fadd float %97, %90
  %99 = fmul float %31, %86
  %100 = fadd float %99, %92
  %101 = fmul float %32, %86
  %102 = fadd float %101, %94
  %103 = fmul float %33, %86
  %104 = fadd float %103, %96
  %105 = fmul float %34, %86
  %106 = fadd float %105, %98
  %107 = fadd float %100, %35
  %108 = fadd float %102, %36
  %109 = fadd float %104, %37
  %110 = fadd float %106, %38
  %111 = fmul float %67, %46
  %112 = fmul float %68, %46
  %113 = fmul float %69, %46
  %114 = fmul float %11, %111
  %115 = fmul float %12, %111
  %116 = fmul float %13, %111
  %117 = fmul float %14, %112
  %118 = fadd float %117, %114
  %119 = fmul float %15, %112
  %120 = fadd float %119, %115
  %121 = fmul float %16, %112
  %122 = fadd float %121, %116
  %123 = fmul float %17, %113
  %124 = fadd float %123, %118
  %125 = fmul float %18, %113
  %126 = fadd float %125, %120
  %127 = fmul float %19, %113
  %128 = fadd float %127, %122
  %129 = fmul float %124, %124
  %130 = fmul float %126, %126
  %131 = fadd float %130, %129
  %132 = fmul float %128, %128
  %133 = fadd float %131, %132
  %134 = call float @llvm.AMDGPU.rsq(float %133)
  %135 = fmul float %124, %134
  %136 = fmul float %126, %134
  %137 = fmul float %128, %134
  %138 = fmul float %78, %46
  %139 = fmul float %79, %46
  %140 = fmul float %80, %46
  %141 = fmul float %11, %138
  %142 = fmul float %12, %138
  %143 = fmul float %13, %138
  %144 = fmul float %14, %139
  %145 = fadd float %144, %141
  %146 = fmul float %15, %139
  %147 = fadd float %146, %142
  %148 = fmul float %16, %139
  %149 = fadd float %148, %143
  %150 = fmul float %17, %140
  %151 = fadd float %150, %145
  %152 = fmul float %18, %140
  %153 = fadd float %152, %147
  %154 = fmul float %19, %140
  %155 = fadd float %154, %149
  %156 = fmul float %11, %82
  %157 = fmul float %12, %82
  %158 = fmul float %13, %82
  %159 = fmul float %14, %84
  %160 = fadd float %159, %156
  %161 = fmul float %15, %84
  %162 = fadd float %161, %157
  %163 = fmul float %16, %84
  %164 = fadd float %163, %158
  %165 = fmul float %17, %86
  %166 = fadd float %165, %160
  %167 = fmul float %18, %86
  %168 = fadd float %167, %162
  %169 = fmul float %19, %86
  %170 = fadd float %169, %164
  %171 = fadd float %166, %20
  %172 = fadd float %168, %21
  %173 = fadd float %170, %22
  %174 = fsub float -0.000000e+00, %55
  %175 = fadd float %171, %174
  %176 = fsub float -0.000000e+00, %56
  %177 = fadd float %172, %176
  %178 = fsub float -0.000000e+00, %57
  %179 = fadd float %173, %178
  %180 = fmul float %109, %53
  %181 = fadd float %180, %54
  %182 = fmul float %73, %42
  %183 = fadd float %182, %44
  %184 = fmul float %74, %43
  %185 = fadd float %184, %45
  %186 = fmul float %155, %136
  %187 = fmul float %151, %137
  %188 = fmul float %153, %135
  %189 = fsub float -0.000000e+00, %186
  %190 = fmul float %153, %137
  %191 = fadd float %190, %189
  %192 = fsub float -0.000000e+00, %187
  %193 = fmul float %155, %135
  %194 = fadd float %193, %192
  %195 = fsub float -0.000000e+00, %188
  %196 = fmul float %151, %136
  %197 = fadd float %196, %195
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %175, float %177, float %179, float %181)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %183, float %185, float %135, float %136)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %137, float %191, float %194, float %197)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %151, float %153, float %155, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %107, float %108, float %109, float %110)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c2020131
c202812d
bf8c0070
7e020205
d2820001
04040904
c2020130
c202812c
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c2020132
c202812e
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2028106
bf8c007f
d2820006
041a0205
c204010a
bf8c007f
d2820006
041a0608
c204810e
bf8c007f
060c0c09
c204813a
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060139
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078138
bf8c007f
0a10100f
f800021f
05060708
c0880704
bf8c000f
e00c2000
80040700
c207812b
bf8c0070
100a100f
100c0e0f
10160c09
d282000b
042e0a0b
100e120f
d2820009
042e0e0a
10100c0c
d2820008
04220a0d
d282000a
04220e0e
1010150a
d2820008
04221309
100c0c04
d2820005
041a0a05
d2820007
04160e08
d2820005
04220f07
7e105b05
100a1109
100c110a
c0880708
bf8c007f
e00c2000
80040900
c2080125
c2088127
bf8c0070
7e1a0211
d282000d
0434210a
c2080124
c2088126
bf8c007f
7e1c0211
d2820009
04382109
f800022f
05060d09
c088070c
bf8c000f
e00c2000
80040c00
bf8c0770
10121a0f
1016180f
10001609
d2820000
0402120b
10181c0f
d2820000
0402180a
101a0d00
1014160c
d282000a
042a120d
d282000a
042a180e
101c0b0a
081a1b0e
100e1107
101c0f0a
10101604
d2820008
04221205
d2820008
04221808
100c0d08
080c1d06
100a0b08
10120f00
080a0b09
f800023f
0d060507
bf8c070f
7e0a0280
f800024f
0508000a
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[9..10]
DCL CONST[2..8]
DCL TEMP[0..1]
DCL TEMP[2..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0010}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: UIF TEMP[1].xxxx :3
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :3
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP3 TEMP[3].x, IN[4].xyzz, IN[4].xyzz
  9: RSQ TEMP[3].x, TEMP[3].xxxx
 10: MUL TEMP[3].xyz, IN[4].xyzz, TEMP[3].xxxx
 11: MUL TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xxxx
 12: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 13: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 14: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 15: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 16: MOV TEMP[2].xyz, TEMP[2].xyzz
 17: TEX TEMP[2], TEMP[2], SAMP[0], CUBE
 18: DP4 TEMP[3].x, IMM[0].yyyy, CONST[7]
 19: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[5].yyyy
 20: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 21: MOV TEMP[4].w, TEMP[3].wwww
 22: MUL TEMP[5].xy, TEMP[0].xyyy, CONST[2].xyyy
 23: MOV TEMP[5].xy, TEMP[5].xyyy
 24: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 25: DP4 TEMP[6].x, IMM[0].yyyy, CONST[6]
 26: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[5].xxxx
 27: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].wwww
 28: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[1].xyzz
 29: MAX TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww
 30: RCP TEMP[7].x, TEMP[7].xxxx
 31: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 32: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz
 33: MAD TEMP[3].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[3].xyzz
 34: MUL TEMP[4].xyz, TEMP[3].xyzz, IMM[1].wwww
 35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 36: DP4 TEMP[3].x, IMM[0].yyyy, CONST[8]
 37: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[5].zzzz
 38: MAD TEMP[4].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[4].xyzz
 39: MAX TEMP[2].x, IN[3].wwww, CONST[3].wwww
 40: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 41: LRP TEMP[4].xyz, TEMP[2].xxxx, TEMP[4].xyzz, CONST[3].xyzz
 42: MOV OUT[0], TEMP[4]
 43: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = fcmp ugt float %16, 0.000000e+00
  %54 = select i1 %53, float 1.000000e+00, float 0.000000e+00
  %55 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %58 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %59 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %66 = fmul float %13, %43
  %67 = fadd float %66, %44
  %68 = call float @llvm.AMDIL.clamp.(float %54, float 0.000000e+00, float 1.000000e+00)
  %69 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %70 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %71 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %72 = bitcast float %68 to i32
  %73 = icmp ne i32 %72, 0
  %. = select i1 %73, float -1.000000e+00, float 1.000000e+00
  %74 = fmul float %63, %63
  %75 = fmul float %64, %64
  %76 = fadd float %75, %74
  %77 = fmul float %65, %65
  %78 = fadd float %76, %77
  %79 = call float @llvm.AMDGPU.rsq(float %78)
  %80 = fmul float %63, %79
  %81 = fmul float %64, %79
  %82 = fmul float %65, %79
  %83 = fmul float %80, %.
  %84 = fmul float %81, %.
  %85 = fmul float %82, %.
  %86 = fmul float %83, %59
  %87 = fmul float %84, %60
  %88 = fadd float %87, %86
  %89 = fmul float %85, %61
  %90 = fadd float %88, %89
  %91 = fmul float %90, %83
  %92 = fmul float %90, %84
  %93 = fmul float %90, %85
  %94 = fmul float 2.000000e+00, %91
  %95 = fmul float 2.000000e+00, %92
  %96 = fmul float 2.000000e+00, %93
  %97 = fsub float -0.000000e+00, %94
  %98 = fadd float %59, %97
  %99 = fsub float -0.000000e+00, %95
  %100 = fadd float %60, %99
  %101 = fsub float -0.000000e+00, %96
  %102 = fadd float %61, %101
  %103 = insertelement <4 x float> undef, float %98, i32 0
  %104 = insertelement <4 x float> %103, float %100, i32 1
  %105 = insertelement <4 x float> %104, float %102, i32 2
  %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 3
  %107 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %106)
  %108 = extractelement <4 x float> %107, i32 0
  %109 = extractelement <4 x float> %107, i32 1
  %110 = extractelement <4 x float> %107, i32 2
  %111 = extractelement <4 x float> %107, i32 3
  %112 = call float @fabs(float %110)
  %113 = fdiv float 1.000000e+00, %112
  %114 = fmul float %108, %113
  %115 = fadd float %114, 1.500000e+00
  %116 = fmul float %109, %113
  %117 = fadd float %116, 1.500000e+00
  %118 = bitcast float %117 to i32
  %119 = bitcast float %115 to i32
  %120 = bitcast float %111 to i32
  %121 = insertelement <4 x i32> undef, i32 %118, i32 0
  %122 = insertelement <4 x i32> %121, i32 %119, i32 1
  %123 = insertelement <4 x i32> %122, i32 %120, i32 2
  %124 = insertelement <4 x i32> %123, i32 undef, i32 3
  %125 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %124, <32 x i8> %46, <16 x i8> %48, i32 4)
  %126 = extractelement <4 x float> %125, i32 0
  %127 = extractelement <4 x float> %125, i32 1
  %128 = extractelement <4 x float> %125, i32 2
  %129 = extractelement <4 x float> %125, i32 3
  %130 = fmul float 1.000000e+00, %35
  %131 = fmul float 1.000000e+00, %36
  %132 = fadd float %130, %131
  %133 = fmul float 1.000000e+00, %37
  %134 = fadd float %132, %133
  %135 = fmul float 1.000000e+00, %38
  %136 = fadd float %134, %135
  %137 = fadd float %136, %29
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.lrp(float %138, float %55, float 1.000000e+00)
  %140 = call float @llvm.AMDGPU.lrp(float %138, float %56, float 1.000000e+00)
  %141 = call float @llvm.AMDGPU.lrp(float %138, float %57, float 1.000000e+00)
  %142 = call float @llvm.AMDGPU.lrp(float %138, float %58, float 1.000000e+00)
  %143 = fmul float %12, %22
  %144 = fmul float %67, %23
  %145 = bitcast float %143 to i32
  %146 = bitcast float %144 to i32
  %147 = insertelement <2 x i32> undef, i32 %145, i32 0
  %148 = insertelement <2 x i32> %147, i32 %146, i32 1
  %149 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %148, <32 x i8> %50, <16 x i8> %52, i32 2)
  %150 = extractelement <4 x float> %149, i32 0
  %151 = extractelement <4 x float> %149, i32 1
  %152 = extractelement <4 x float> %149, i32 2
  %153 = extractelement <4 x float> %149, i32 3
  %154 = fmul float 1.000000e+00, %31
  %155 = fmul float 1.000000e+00, %32
  %156 = fadd float %154, %155
  %157 = fmul float 1.000000e+00, %33
  %158 = fadd float %156, %157
  %159 = fmul float 1.000000e+00, %34
  %160 = fadd float %158, %159
  %161 = fadd float %160, %28
  %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
  %163 = fmul float %162, %153
  %164 = fmul float %150, 0x3FCB367A00000000
  %165 = fmul float %151, 0x3FE6E2EB20000000
  %166 = fadd float %165, %164
  %167 = fmul float %152, 0x3FB27BB300000000
  %168 = fadd float %166, %167
  %169 = fcmp uge float %168, 0x3F50624DE0000000
  %170 = select i1 %169, float %168, float 0x3F50624DE0000000
  %171 = fdiv float 1.000000e+00, %170
  %172 = fmul float %150, %171
  %173 = fmul float %151, %171
  %174 = fmul float %152, %171
  %175 = fmul float %139, %150
  %176 = fmul float %140, %151
  %177 = fmul float %141, %152
  %178 = fmul float %163, %172
  %179 = fadd float %178, %175
  %180 = fmul float %163, %173
  %181 = fadd float %180, %176
  %182 = fmul float %163, %174
  %183 = fadd float %182, %177
  %184 = fmul float %179, 4.000000e+00
  %185 = fmul float %181, 4.000000e+00
  %186 = fmul float %183, 4.000000e+00
  %187 = fmul float %126, %129
  %188 = fmul float %127, %129
  %189 = fmul float %128, %129
  %190 = fmul float 1.000000e+00, %39
  %191 = fmul float 1.000000e+00, %40
  %192 = fadd float %190, %191
  %193 = fmul float 1.000000e+00, %41
  %194 = fadd float %192, %193
  %195 = fmul float 1.000000e+00, %42
  %196 = fadd float %194, %195
  %197 = fadd float %196, %30
  %198 = call float @llvm.AMDIL.clamp.(float %197, float 0.000000e+00, float 1.000000e+00)
  %199 = fmul float %187, %198
  %200 = fadd float %199, %184
  %201 = fmul float %188, %198
  %202 = fadd float %201, %185
  %203 = fmul float %189, %198
  %204 = fadd float %203, %186
  %205 = fcmp uge float %62, %27
  %206 = select i1 %205, float %62, float %27
  %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00)
  %208 = call float @llvm.AMDGPU.lrp(float %207, float %200, float %24)
  %209 = call float @llvm.AMDGPU.lrp(float %207, float %202, float %25)
  %210 = call float @llvm.AMDGPU.lrp(float %207, float %204, float %26)
  %211 = call i32 @llvm.SI.packf16(float %208, float %209)
  %212 = bitcast i32 %211 to float
  %213 = call i32 @llvm.SI.packf16(float %210, float %142)
  %214 = bitcast i32 %213 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %212, float %214, float %212, float %214)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c81c0800
c81d0801
100a0f07
d2820008
04160d06
c8140a00
c8150a01
d2820008
04220b05
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000009
0021e6f2
100c1306
10081107
10081304
c81c0400
c81d0401
10160f04
c8280500
c8290501
d282000b
042e1506
100a1105
100a1305
c8200600
c8210601
d2820009
042e1105
10160d09
d2820006
042e0d09
081a0d0a
100c0909
d2820004
041a0909
08180907
10080b09
d2820004
04120b09
081c0908
7e1e0280
d28a0005
043a1b0c
d28c0004
043a1b0c
d28e0006
043a1b0c
d2880007
043a1b0c
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840300
c0c60500
bf8c007f
f0800f00
00430405
bf8c0770
101a0f05
c0840100
bf8c007f
c2000928
c2008929
bf8c007f
7e100201
d2820003
04200103
c2000909
bf8c007f
10120600
c2000908
bf8c007f
10100400
c0800304
c0c60508
bf8c007f
f0800f00
00030808
bf8c0770
100410ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0709
7e0602ff
3d93dd98
d2820002
040a070a
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e185502
101e1909
c8380100
c8390101
c200091c
c200891d
bf8c007f
7e040201
d2060002
02020400
c200091e
bf8c007f
06040400
c200091f
bf8c007f
06040400
c2000915
bf8c007f
06040400
d2060802
02010102
080604f2
d282000e
040e1d02
1020130e
c2000918
c2008919
bf8c007f
7e1c0201
d206000e
02021c00
c200091a
bf8c007f
061c1c00
c200091b
bf8c007f
061c1c00
c2000914
bf8c007f
061c1c00
d206080e
0201010e
101c170e
d282000f
04421f0e
10201ef6
c2000920
c2008921
bf8c007f
7e1e0201
d206000f
02021e00
c2000922
bf8c007f
061e1e00
c2000923
bf8c007f
061e1e00
c2000916
bf8c007f
061e1e00
d206080f
0201010f
d2820011
04421f0d
c8340700
c8350701
c200090f
bf8c007f
d00c0002
0200010d
7e200200
d200000d
000a1b10
d206080d
0201010d
08201af2
c200090d
bf8c007f
10242000
d2820011
044a230d
10240f04
10261908
c8500000
c8510001
d2820014
040e2902
10281114
d2820013
0452270e
102626f6
d2820012
044e1f12
c200090c
bf8c007f
10262000
d2820012
044e250d
5e222312
10080f06
100a190a
c8180200
c8190201
d2820006
040e0d02
100c1506
d2820005
041a0b0e
100a0af6
d2820004
04161f04
c200090e
bf8c007f
100a2000
d2820004
0416090d
c8140300
c8150301
d2820000
040e0b02
5e000104
f8001c0f
00110011
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3].xyz, IN[1].xyzz, CONST[9].wwww
  8: MUL TEMP[4], CONST[0], TEMP[3].xxxx
  9: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 10: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 11: MUL TEMP[4], CONST[0], TEMP[0].xxxx
 12: MAD TEMP[4], CONST[1], TEMP[0].yyyy, TEMP[4]
 13: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[4]
 14: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 15: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 16: MAD TEMP[4].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 17: MOV TEMP[0].w, TEMP[4].xxxx
 18: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
 19: RSQ TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MOV OUT[3], TEMP[3]
 22: MOV OUT[1], TEMP[2]
 23: MOV OUT[2], TEMP[0]
 24: MOV OUT[0], TEMP[1]
 25: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %54 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %5)
  %57 = extractelement <4 x float> %56, i32 0
  %58 = extractelement <4 x float> %56, i32 1
  %59 = extractelement <4 x float> %56, i32 2
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = fmul float %57, %46
  %67 = fadd float %66, %43
  %68 = fmul float %58, %47
  %69 = fadd float %68, %44
  %70 = fmul float %59, %48
  %71 = fadd float %70, %45
  %72 = fmul float %23, %67
  %73 = fmul float %24, %67
  %74 = fmul float %25, %67
  %75 = fmul float %26, %67
  %76 = fmul float %27, %69
  %77 = fadd float %76, %72
  %78 = fmul float %28, %69
  %79 = fadd float %78, %73
  %80 = fmul float %29, %69
  %81 = fadd float %80, %74
  %82 = fmul float %30, %69
  %83 = fadd float %82, %75
  %84 = fmul float %31, %71
  %85 = fadd float %84, %77
  %86 = fmul float %32, %71
  %87 = fadd float %86, %79
  %88 = fmul float %33, %71
  %89 = fadd float %88, %81
  %90 = fmul float %34, %71
  %91 = fadd float %90, %83
  %92 = fadd float %85, %35
  %93 = fadd float %87, %36
  %94 = fadd float %89, %37
  %95 = fadd float %91, %38
  %96 = fmul float %63, %42
  %97 = fmul float %64, %42
  %98 = fmul float %65, %42
  %99 = fmul float %11, %96
  %100 = fmul float %12, %96
  %101 = fmul float %13, %96
  %102 = fmul float %14, %97
  %103 = fadd float %102, %99
  %104 = fmul float %15, %97
  %105 = fadd float %104, %100
  %106 = fmul float %16, %97
  %107 = fadd float %106, %101
  %108 = fmul float %17, %98
  %109 = fadd float %108, %103
  %110 = fmul float %18, %98
  %111 = fadd float %110, %105
  %112 = fmul float %19, %98
  %113 = fadd float %112, %107
  %114 = fmul float %11, %67
  %115 = fmul float %12, %67
  %116 = fmul float %13, %67
  %117 = fmul float %14, %69
  %118 = fadd float %117, %114
  %119 = fmul float %15, %69
  %120 = fadd float %119, %115
  %121 = fmul float %16, %69
  %122 = fadd float %121, %116
  %123 = fmul float %17, %71
  %124 = fadd float %123, %118
  %125 = fmul float %18, %71
  %126 = fadd float %125, %120
  %127 = fmul float %19, %71
  %128 = fadd float %127, %122
  %129 = fadd float %124, %20
  %130 = fadd float %126, %21
  %131 = fadd float %128, %22
  %132 = fsub float -0.000000e+00, %51
  %133 = fadd float %129, %132
  %134 = fsub float -0.000000e+00, %52
  %135 = fadd float %130, %134
  %136 = fsub float -0.000000e+00, %53
  %137 = fadd float %131, %136
  %138 = fmul float %94, %49
  %139 = fadd float %138, %50
  %140 = fmul float %109, %109
  %141 = fmul float %111, %111
  %142 = fadd float %141, %140
  %143 = fmul float %113, %113
  %144 = fadd float %142, %143
  %145 = call float @llvm.AMDGPU.rsq(float %144)
  %146 = fmul float %109, %145
  %147 = fmul float %111, %145
  %148 = fmul float %113, %145
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %133, float %135, float %137, float %139)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %146, float %147, float %148, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %92, float %93, float %94, float %95)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2028106
bf8c007f
d2820006
041a0205
c204010a
bf8c007f
d2820006
041a0608
c204810e
bf8c007f
060c0c09
c2048136
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060135
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078134
bf8c007f
0a10100f
f800021f
05060708
c0880704
bf8c000f
e00c2000
80040700
c2030127
bf8c0070
10001006
100c0e06
100a0c09
d2820005
0416000b
10101206
d2820005
0416100a
100e0c0c
d2820007
041e000d
d2820007
041e100e
10120f07
d2820009
04260b05
100c0c04
d2820000
041a0005
d2820000
04021008
d2820006
04260100
7e0c5b06
10000d00
100a0d05
100c0d07
7e0e0280
f800022f
07000506
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: DP4 TEMP[0].x, IMM[0].xxxx, CONST[4]
  1: ADD_SAT TEMP[0].x, TEMP[0].xxxx, CONST[2].yyyy
  2: LRP TEMP[0], TEMP[0].xxxx, IN[0], IMM[0].xxxx
  3: MOV TEMP[1].w, TEMP[0].wwww
  4: MAX TEMP[2].x, IN[1].wwww, CONST[0].wwww
  5: MOV_SAT TEMP[2].x, TEMP[2].xxxx
  6: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[0].xyzz
  7: MOV OUT[0], TEMP[1]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 12)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %36 = fmul float 1.000000e+00, %27
  %37 = fmul float 1.000000e+00, %28
  %38 = fadd float %36, %37
  %39 = fmul float 1.000000e+00, %29
  %40 = fadd float %38, %39
  %41 = fmul float 1.000000e+00, %30
  %42 = fadd float %40, %41
  %43 = fadd float %42, %26
  %44 = call float @llvm.AMDIL.clamp.(float %43, float 0.000000e+00, float 1.000000e+00)
  %45 = call float @llvm.AMDGPU.lrp(float %44, float %31, float 1.000000e+00)
  %46 = call float @llvm.AMDGPU.lrp(float %44, float %32, float 1.000000e+00)
  %47 = call float @llvm.AMDGPU.lrp(float %44, float %33, float 1.000000e+00)
  %48 = call float @llvm.AMDGPU.lrp(float %44, float %34, float 1.000000e+00)
  %49 = fcmp uge float %35, %25
  %50 = select i1 %49, float %35, float %25
  %51 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00)
  %52 = call float @llvm.AMDGPU.lrp(float %51, float %45, float %22)
  %53 = call float @llvm.AMDGPU.lrp(float %51, float %46, float %23)
  %54 = call float @llvm.AMDGPU.lrp(float %51, float %47, float %24)
  %55 = call i32 @llvm.SI.packf16(float %52, float %53)
  %56 = bitcast i32 %55 to float
  %57 = call i32 @llvm.SI.packf16(float %54, float %48)
  %58 = bitcast i32 %57 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8080700
c8090701
c0800100
bf8c007f
c2020103
bf8c007f
d00c0008
02000902
7e060204
d2000002
00220503
d2060802
02010102
080604f2
c2020101
bf8c007f
100c0604
c81c0100
c81d0101
c2020110
c2028111
bf8c007f
7e080205
d2060004
02020804
c2020112
bf8c007f
06080804
c2020113
bf8c007f
06080804
c2020109
bf8c007f
06080804
d2060804
02010104
080a08f2
d2820007
04160f04
d2820006
041a0f02
c2020100
bf8c007f
100e0604
c8200000
c8210001
d2820008
04161104
d2820007
041e1102
5e0c0d07
c2000102
bf8c007f
10060600
c81c0200
c81d0201
d2820007
04160f04
d2820002
040e0f02
c80c0300
c80d0301
d2820000
04160704
5e000102
f8001c0f
00060006
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..12]
DCL TEMP[0..2], LOCAL
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2], CONST[0], TEMP[0].xxxx
  6: MAD TEMP[2], CONST[1], TEMP[0].yyyy, TEMP[2]
  7: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[2]
  8: ADD TEMP[0].xyz, TEMP[0], CONST[3]
  9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[11].xyzz
 10: MAD TEMP[2].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 11: MOV TEMP[0].w, TEMP[2].xxxx
 12: MOV OUT[1], CONST[8]
 13: MOV OUT[2], TEMP[0]
 14: MOV OUT[0], TEMP[1]
 15: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %5)
  %57 = extractelement <4 x float> %56, i32 0
  %58 = extractelement <4 x float> %56, i32 1
  %59 = extractelement <4 x float> %56, i32 2
  %60 = fmul float %57, %46
  %61 = fadd float %60, %43
  %62 = fmul float %58, %47
  %63 = fadd float %62, %44
  %64 = fmul float %59, %48
  %65 = fadd float %64, %45
  %66 = fmul float %23, %61
  %67 = fmul float %24, %61
  %68 = fmul float %25, %61
  %69 = fmul float %26, %61
  %70 = fmul float %27, %63
  %71 = fadd float %70, %66
  %72 = fmul float %28, %63
  %73 = fadd float %72, %67
  %74 = fmul float %29, %63
  %75 = fadd float %74, %68
  %76 = fmul float %30, %63
  %77 = fadd float %76, %69
  %78 = fmul float %31, %65
  %79 = fadd float %78, %71
  %80 = fmul float %32, %65
  %81 = fadd float %80, %73
  %82 = fmul float %33, %65
  %83 = fadd float %82, %75
  %84 = fmul float %34, %65
  %85 = fadd float %84, %77
  %86 = fadd float %79, %35
  %87 = fadd float %81, %36
  %88 = fadd float %83, %37
  %89 = fadd float %85, %38
  %90 = fmul float %11, %61
  %91 = fmul float %12, %61
  %92 = fmul float %13, %61
  %93 = fmul float %14, %63
  %94 = fadd float %93, %90
  %95 = fmul float %15, %63
  %96 = fadd float %95, %91
  %97 = fmul float %16, %63
  %98 = fadd float %97, %92
  %99 = fmul float %17, %65
  %100 = fadd float %99, %94
  %101 = fmul float %18, %65
  %102 = fadd float %101, %96
  %103 = fmul float %19, %65
  %104 = fadd float %103, %98
  %105 = fadd float %100, %20
  %106 = fadd float %102, %21
  %107 = fadd float %104, %22
  %108 = fsub float -0.000000e+00, %49
  %109 = fadd float %105, %108
  %110 = fsub float -0.000000e+00, %50
  %111 = fadd float %106, %110
  %112 = fsub float -0.000000e+00, %51
  %113 = fadd float %107, %112
  %114 = fmul float %88, %52
  %115 = fadd float %114, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %109, float %111, float %113, float %115)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %86, float %87, float %88, float %89)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0820700
bf8c000f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020112
bf8c007f
100c0204
c2020116
bf8c007f
d2820006
041a0004
c202012a
c2028126
bf8c007f
7e0e0205
d2820002
041c0904
c202011a
bf8c007f
d2820003
041a0404
c202011e
bf8c007f
06060604
c2020130
c2028131
bf8c007f
7e080205
d2820004
04100903
c2020102
bf8c007f
100a0204
c2020106
bf8c007f
d2820005
04160004
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c202012e
bf8c007f
0a0a0a04
c2020101
bf8c007f
100c0204
c2020105
bf8c007f
d2820006
041a0004
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c202012d
bf8c007f
0a0c0c04
c2020100
bf8c007f
100e0204
c2020104
bf8c007f
d2820007
041e0004
c2020108
bf8c007f
d2820007
041e0404
c202010c
bf8c007f
060e0e04
c202012c
bf8c007f
0a0e0e04
f800021f
04050607
c2020113
bf8c000f
10080204
c2020117
bf8c007f
d2820004
04120004
c202011b
bf8c007f
d2820004
04120404
c202011f
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
04030500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[2..14]
DCL TEMP[0]
DCL TEMP[1..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.0000,     0.0100}
IMM[1] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[4].xxxx
  2: MOV TEMP[1].xy, IN[3].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP4 TEMP[3].x, IMM[0].yyyy, CONST[12]
  9: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[11].xxxx
 10: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 11: RSQ TEMP[4].x, TEMP[4].xxxx
 12: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 14: DP4 TEMP[2].x, IMM[0].yyyy, CONST[13]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[11].yyyy
 16: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].yyyy
 17: MOV TEMP[4].w, TEMP[2]
 18: DP3 TEMP[5].x, TEMP[1].xyzz, CONST[3].xyzz
 19: ADD TEMP[5].x, TEMP[5].xxxx, CONST[7].wwww
 20: MOV_SAT TEMP[6].x, TEMP[5].xxxx
 21: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[5].xyzz, CONST[6].xyzz
 22: MOV_SAT TEMP[7].x, -TEMP[5].xxxx
 23: LRP TEMP[7].xyz, TEMP[7].xxxx, CONST[7].xyzz, CONST[6].xyzz
 24: SLT TEMP[8].x, TEMP[5].xxxx, IMM[0].zzzz
 25: F2I TEMP[8].x, -TEMP[8]
 26: UIF TEMP[8].xxxx :1
 27:   MOV TEMP[7].xyz, TEMP[7].xyzx
 28: ELSE :1
 29:   MOV TEMP[7].xyz, TEMP[6].xyzx
 30: ENDIF
 31: DP3 TEMP[6].x, IN[2].xyzz, IN[2].xyzz
 32: RSQ TEMP[6].x, TEMP[6].xxxx
 33: MUL TEMP[6].xyz, IN[2].xyzz, TEMP[6].xxxx
 34: ADD TEMP[6].xyz, CONST[4].xyzz, -TEMP[6].xyzz
 35: DP3 TEMP[8].x, TEMP[6].xyzz, TEMP[6].xyzz
 36: RSQ TEMP[8].x, TEMP[8].xxxx
 37: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xxxx
 38: DP3_SAT TEMP[6].x, TEMP[6].xyzz, TEMP[1].xyzz
 39: POW TEMP[6].x, TEMP[6].xxxx, CONST[10].xxxx
 40: SGE TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 41: F2I TEMP[5].x, -TEMP[5]
 42: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
 43: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx
 44: MUL TEMP[5].xyz, CONST[8].xyzz, TEMP[5].xxxx
 45: MOV TEMP[1].xyz, TEMP[1].xyzz
 46: TEX TEMP[1].xyz, TEMP[1], SAMP[1], CUBE
 47: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[7].xyzz
 48: MOV TEMP[6].xy, IN[3].xyyy
 49: TEX TEMP[6], TEMP[6], SAMP[0], 2D
 50: MUL TEMP[6], TEMP[6], CONST[9]
 51: LRP TEMP[1].xyz, TEMP[6].wwww, TEMP[6].xyzz, TEMP[1].xyzz
 52: LRP TEMP[5].xyz, TEMP[6].wwww, IMM[0].zzzz, TEMP[5].xyzz
 53: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz
 54: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[1].xyzz
 55: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 56: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 57: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[4].xyzz, CONST[2].xyzz
 58: MOV OUT[0], TEMP[4]
 59: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %85 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %87 = bitcast float %83 to i32
  %88 = icmp ne i32 %87, 0
  %. = select i1 %88, float -1.000000e+00, float 1.000000e+00
  %89 = fmul float 1.000000e+00, %52
  %90 = fmul float 1.000000e+00, %53
  %91 = fadd float %89, %90
  %92 = fmul float 1.000000e+00, %54
  %93 = fadd float %91, %92
  %94 = fmul float 1.000000e+00, %55
  %95 = fadd float %93, %94
  %96 = fadd float %95, %50
  %97 = call float @llvm.AMDIL.clamp.(float %96, float 0.000000e+00, float 1.000000e+00)
  %98 = fmul float %80, %80
  %99 = fmul float %81, %81
  %100 = fadd float %99, %98
  %101 = fmul float %82, %82
  %102 = fadd float %100, %101
  %103 = call float @llvm.AMDGPU.rsq(float %102)
  %104 = fmul float %80, %103
  %105 = fmul float %81, %103
  %106 = fmul float %82, %103
  %107 = fmul float %104, %.
  %108 = fmul float %105, %.
  %109 = fmul float %106, %.
  %110 = fmul float 1.000000e+00, %56
  %111 = fmul float 1.000000e+00, %57
  %112 = fadd float %110, %111
  %113 = fmul float 1.000000e+00, %58
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %59
  %116 = fadd float %114, %115
  %117 = fadd float %116, %51
  %118 = call float @llvm.AMDIL.clamp.(float %117, float 0.000000e+00, float 1.000000e+00)
  %119 = call float @llvm.AMDGPU.lrp(float %118, float %70, float 1.000000e+00)
  %120 = call float @llvm.AMDGPU.lrp(float %118, float %71, float 1.000000e+00)
  %121 = call float @llvm.AMDGPU.lrp(float %118, float %72, float 1.000000e+00)
  %122 = call float @llvm.AMDGPU.lrp(float %118, float %73, float 1.000000e+00)
  %123 = fmul float %107, %26
  %124 = fmul float %108, %27
  %125 = fadd float %124, %123
  %126 = fmul float %109, %28
  %127 = fadd float %125, %126
  %128 = fadd float %127, %41
  %129 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00)
  %130 = call float @llvm.AMDGPU.lrp(float %129, float %32, float %35)
  %131 = call float @llvm.AMDGPU.lrp(float %129, float %33, float %36)
  %132 = call float @llvm.AMDGPU.lrp(float %129, float %34, float %37)
  %133 = fsub float -0.000000e+00, %128
  %134 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00)
  %135 = call float @llvm.AMDGPU.lrp(float %134, float %38, float %35)
  %136 = call float @llvm.AMDGPU.lrp(float %134, float %39, float %36)
  %137 = call float @llvm.AMDGPU.lrp(float %134, float %40, float %37)
  %138 = fcmp ult float %128, 0.000000e+00
  %139 = select i1 %138, float 1.000000e+00, float 0.000000e+00
  %140 = fsub float -0.000000e+00, %139
  %141 = fptosi float %140 to i32
  %142 = bitcast i32 %141 to float
  %143 = bitcast float %142 to i32
  %144 = icmp ne i32 %143, 0
  %temp28.0 = select i1 %144, float %135, float %130
  %temp29.0 = select i1 %144, float %136, float %131
  %temp30.0 = select i1 %144, float %137, float %132
  %145 = fmul float %74, %74
  %146 = fmul float %75, %75
  %147 = fadd float %146, %145
  %148 = fmul float %76, %76
  %149 = fadd float %147, %148
  %150 = call float @llvm.AMDGPU.rsq(float %149)
  %151 = fmul float %74, %150
  %152 = fmul float %75, %150
  %153 = fmul float %76, %150
  %154 = fsub float -0.000000e+00, %151
  %155 = fadd float %29, %154
  %156 = fsub float -0.000000e+00, %152
  %157 = fadd float %30, %156
  %158 = fsub float -0.000000e+00, %153
  %159 = fadd float %31, %158
  %160 = fmul float %155, %155
  %161 = fmul float %157, %157
  %162 = fadd float %161, %160
  %163 = fmul float %159, %159
  %164 = fadd float %162, %163
  %165 = call float @llvm.AMDGPU.rsq(float %164)
  %166 = fmul float %155, %165
  %167 = fmul float %157, %165
  %168 = fmul float %159, %165
  %169 = fmul float %166, %107
  %170 = fmul float %167, %108
  %171 = fadd float %170, %169
  %172 = fmul float %168, %109
  %173 = fadd float %171, %172
  %174 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00)
  %175 = call float @llvm.pow.f32(float %174, float %49)
  %176 = fcmp uge float %128, 0x3F847AE140000000
  %177 = select i1 %176, float 1.000000e+00, float 0.000000e+00
  %178 = fsub float -0.000000e+00, %177
  %179 = fptosi float %178 to i32
  %180 = bitcast i32 %179 to float
  %181 = bitcast float %180 to i32
  %182 = and i32 %181, 1065353216
  %183 = bitcast i32 %182 to float
  %184 = fmul float %175, %183
  %185 = fmul float %42, %184
  %186 = fmul float %43, %184
  %187 = fmul float %44, %184
  %188 = insertelement <4 x float> undef, float %107, i32 0
  %189 = insertelement <4 x float> %188, float %108, i32 1
  %190 = insertelement <4 x float> %189, float %109, i32 2
  %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3
  %192 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %191)
  %193 = extractelement <4 x float> %192, i32 0
  %194 = extractelement <4 x float> %192, i32 1
  %195 = extractelement <4 x float> %192, i32 2
  %196 = extractelement <4 x float> %192, i32 3
  %197 = call float @fabs(float %195)
  %198 = fdiv float 1.000000e+00, %197
  %199 = fmul float %193, %198
  %200 = fadd float %199, 1.500000e+00
  %201 = fmul float %194, %198
  %202 = fadd float %201, 1.500000e+00
  %203 = bitcast float %202 to i32
  %204 = bitcast float %200 to i32
  %205 = bitcast float %196 to i32
  %206 = insertelement <4 x i32> undef, i32 %203, i32 0
  %207 = insertelement <4 x i32> %206, i32 %204, i32 1
  %208 = insertelement <4 x i32> %207, i32 %205, i32 2
  %209 = insertelement <4 x i32> %208, i32 undef, i32 3
  %210 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %209, <32 x i8> %65, <16 x i8> %67, i32 4)
  %211 = extractelement <4 x float> %210, i32 0
  %212 = extractelement <4 x float> %210, i32 1
  %213 = extractelement <4 x float> %210, i32 2
  %214 = fmul float %211, 4.000000e+00
  %215 = fadd float %214, %temp28.0
  %216 = fmul float %212, 4.000000e+00
  %217 = fadd float %216, %temp29.0
  %218 = fmul float %213, 4.000000e+00
  %219 = fadd float %218, %temp30.0
  %220 = bitcast float %78 to i32
  %221 = bitcast float %79 to i32
  %222 = insertelement <2 x i32> undef, i32 %220, i32 0
  %223 = insertelement <2 x i32> %222, i32 %221, i32 1
  %224 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %61, <16 x i8> %63, i32 2)
  %225 = extractelement <4 x float> %224, i32 0
  %226 = extractelement <4 x float> %224, i32 1
  %227 = extractelement <4 x float> %224, i32 2
  %228 = extractelement <4 x float> %224, i32 3
  %229 = fmul float %225, %45
  %230 = fmul float %226, %46
  %231 = fmul float %227, %47
  %232 = fmul float %228, %48
  %233 = call float @llvm.AMDGPU.lrp(float %232, float %229, float %215)
  %234 = call float @llvm.AMDGPU.lrp(float %232, float %230, float %217)
  %235 = call float @llvm.AMDGPU.lrp(float %232, float %231, float %219)
  %236 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %185)
  %237 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %186)
  %238 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %187)
  %239 = fmul float %119, %233
  %240 = fmul float %120, %234
  %241 = fmul float %121, %235
  %242 = fmul float %236, %97
  %243 = fadd float %242, %239
  %244 = fmul float %237, %97
  %245 = fadd float %244, %240
  %246 = fmul float %238, %97
  %247 = fadd float %246, %241
  %248 = fcmp uge float %77, %25
  %249 = select i1 %248, float %77, float %25
  %250 = call float @llvm.AMDIL.clamp.(float %249, float 0.000000e+00, float 1.000000e+00)
  %251 = call float @llvm.AMDGPU.lrp(float %250, float %243, float %22)
  %252 = call float @llvm.AMDGPU.lrp(float %250, float %245, float %23)
  %253 = call float @llvm.AMDGPU.lrp(float %250, float %247, float %24)
  %254 = call i32 @llvm.SI.packf16(float %251, float %252)
  %255 = bitcast i32 %254 to float
  %256 = call i32 @llvm.SI.packf16(float %253, float %122)
  %257 = bitcast i32 %256 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %255, float %257, float %255, float %257)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100b00
c8110b01
c80c0a00
c80d0a01
100a0703
d2820006
04160904
c8140c00
c8150c01
d2820006
041a0b05
7e0c5b06
10080d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000002
0021e6f2
10180504
10060d03
10160503
10060d05
101a0503
7e1c0280
d28a0003
0436190b
d28c0002
0436190b
d28e0004
0436190b
d2880005
0436190b
d206010a
02010104
7e14550a
7e1e02ff
3fc00000
d2820004
043e1502
d2820003
043e1503
c0840304
c0c60508
bf8c007f
f0800700
00430203
c0840100
bf8c0070
c200090c
bf8c007f
100a1600
c200090d
bf8c007f
d2820005
0414010c
c200090e
bf8c007f
d2820005
0414010d
c200091f
bf8c007f
062a0a00
d2060805
02010115
080c0af2
c2000919
bf8c007f
100e0c00
c2008915
bf8c007f
d2820009
041c0305
d2060007
22010115
d2060807
02010107
08100ef2
10141000
c200091d
bf8c007f
d282000a
04280107
d0020000
02010115
d200000f
0001e480
d206000f
2201010f
7e1e110f
d10a0000
0201010f
d2000009
00021509
d2820013
0425ed03
c8280900
c8290901
c8240800
c8250801
c0860300
c0c80500
bf8c007f
f0800f00
00640f09
c2010927
bf8c0070
10122402
081412f2
1026270a
c2010925
bf8c007f
10282002
d2820016
044e2909
c85c0100
c85d0101
c2010934
c2018935
bf8c007f
7e260203
d2060013
02022602
c2010936
bf8c007f
06262602
c2010937
bf8c007f
06262602
c201092d
bf8c007f
06262602
d2060813
02010113
082826f2
d2820017
04522f13
102c2d17
c85c0500
c85d0501
c8600400
c8610401
10323118
d282001a
04662f17
c8640600
c8650601
d282001a
046a3319
7e345b1a
102e3517
c2010911
bf8c007f
082e2e02
10303518
c2010910
bf8c007f
08303002
10363118
d282001b
046e2f17
10323519
c2010912
bf8c007f
08323202
d282001a
046e3319
7e345b1a
102e3517
10303518
10301718
d2820017
04621917
10303519
d282000b
045e1b18
d206080b
0201010b
7e164f0b
c2010928
bf8c007f
0e161602
7e164b0b
7e1802ff
3c23d70a
d00c0002
02021915
d200000c
0009e480
d206000c
2201010c
7e18110c
361818f2
1016190b
c2010921
bf8c007f
10181602
1018190a
d282000d
04310109
c2010930
c2018931
bf8c007f
7e180203
d206000c
02021802
c2010932
bf8c007f
06181802
c2010933
bf8c007f
06181802
c201092c
bf8c007f
06181802
d206080c
0201010c
d2820015
045a190d
c8340700
c8350701
c201090b
bf8c007f
d00c0004
0200050d
7e1c0202
d200000d
00121b0e
d206080d
0201010d
081c1af2
c2010909
bf8c007f
102c1c02
d2820015
045a2b0d
c2010918
bf8c007f
102c0c02
c2018914
bf8c007f
d2820016
04580705
102e1002
c201091c
bf8c007f
d2820017
045c0507
d2000016
00022f16
d2820016
0459ed02
102c2d0a
c2010924
bf8c007f
102e1e02
d2820016
045a2f09
c85c0000
c85d0001
d2820017
04522f13
102c2d17
c2010920
bf8c007f
102e1602
102e2f0a
d2820017
045d0109
d2820016
045a1917
c2010908
bf8c007f
102e1c02
d2820016
045e2d0d
5e2a2b16
c201091a
bf8c007f
100c0c02
c2018916
bf8c007f
d2820005
04180705
100c1002
c201091e
bf8c007f
d2820006
04180507
d2000005
00020d05
d2820002
0415ed04
1004050a
c2000926
bf8c007f
10062200
d2820002
040a0709
c80c0200
c80d0201
d2820003
04520713
10040503
c2000922
bf8c007f
10061600
1006070a
d2820003
040d0109
d2820002
040a1903
c200090a
bf8c007f
10061c00
d2820002
040e050d
c80c0300
c80d0301
d2820000
04520713
5e000102
f8001c0f
00150015
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..15]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2].xyz, IN[1].xyzz, CONST[13].wwww
  6: MUL TEMP[3], CONST[0], TEMP[2].xxxx
  7: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
  8: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
  9: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 10: RSQ TEMP[3].x, TEMP[3].xxxx
 11: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 12: MUL TEMP[3], CONST[0], TEMP[0].xxxx
 13: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
 14: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 15: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 16: MOV TEMP[3].w, IMM[0].yyyy
 17: MOV TEMP[3].xyz, TEMP[0].xyzx
 18: MOV TEMP[4].w, IMM[0].yyyy
 19: MOV TEMP[4].xyz, TEMP[0].xyzx
 20: DP4 TEMP[3].x, CONST[14], TEMP[3]
 21: DP4 TEMP[4].x, CONST[15], TEMP[4]
 22: MOV TEMP[3].y, TEMP[4].xxxx
 23: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[11].xyzz
 24: MAD TEMP[4].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 25: MOV TEMP[0].w, TEMP[4].xxxx
 26: MOV TEMP[4].zw, TEMP[2].yyxy
 27: MOV TEMP[2].x, TEMP[2].zzzz
 28: MOV TEMP[4].xy, TEMP[3].xyxx
 29: MOV OUT[1], CONST[8]
 30: MOV OUT[4], TEMP[2]
 31: MOV OUT[2], TEMP[0]
 32: MOV OUT[0], TEMP[1]
 33: MOV OUT[3], TEMP[4]
 34: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = fmul float %66, %46
  %76 = fadd float %75, %43
  %77 = fmul float %67, %47
  %78 = fadd float %77, %44
  %79 = fmul float %68, %48
  %80 = fadd float %79, %45
  %81 = fmul float %23, %76
  %82 = fmul float %24, %76
  %83 = fmul float %25, %76
  %84 = fmul float %26, %76
  %85 = fmul float %27, %78
  %86 = fadd float %85, %81
  %87 = fmul float %28, %78
  %88 = fadd float %87, %82
  %89 = fmul float %29, %78
  %90 = fadd float %89, %83
  %91 = fmul float %30, %78
  %92 = fadd float %91, %84
  %93 = fmul float %31, %80
  %94 = fadd float %93, %86
  %95 = fmul float %32, %80
  %96 = fadd float %95, %88
  %97 = fmul float %33, %80
  %98 = fadd float %97, %90
  %99 = fmul float %34, %80
  %100 = fadd float %99, %92
  %101 = fadd float %94, %35
  %102 = fadd float %96, %36
  %103 = fadd float %98, %37
  %104 = fadd float %100, %38
  %105 = fmul float %72, %54
  %106 = fmul float %73, %54
  %107 = fmul float %74, %54
  %108 = fmul float %11, %105
  %109 = fmul float %12, %105
  %110 = fmul float %13, %105
  %111 = fmul float %14, %106
  %112 = fadd float %111, %108
  %113 = fmul float %15, %106
  %114 = fadd float %113, %109
  %115 = fmul float %16, %106
  %116 = fadd float %115, %110
  %117 = fmul float %17, %107
  %118 = fadd float %117, %112
  %119 = fmul float %18, %107
  %120 = fadd float %119, %114
  %121 = fmul float %19, %107
  %122 = fadd float %121, %116
  %123 = fmul float %118, %118
  %124 = fmul float %120, %120
  %125 = fadd float %124, %123
  %126 = fmul float %122, %122
  %127 = fadd float %125, %126
  %128 = call float @llvm.AMDGPU.rsq(float %127)
  %129 = fmul float %118, %128
  %130 = fmul float %120, %128
  %131 = fmul float %122, %128
  %132 = fmul float %11, %76
  %133 = fmul float %12, %76
  %134 = fmul float %13, %76
  %135 = fmul float %14, %78
  %136 = fadd float %135, %132
  %137 = fmul float %15, %78
  %138 = fadd float %137, %133
  %139 = fmul float %16, %78
  %140 = fadd float %139, %134
  %141 = fmul float %17, %80
  %142 = fadd float %141, %136
  %143 = fmul float %18, %80
  %144 = fadd float %143, %138
  %145 = fmul float %19, %80
  %146 = fadd float %145, %140
  %147 = fadd float %142, %20
  %148 = fadd float %144, %21
  %149 = fadd float %146, %22
  %150 = fmul float %55, %147
  %151 = fmul float %56, %148
  %152 = fadd float %150, %151
  %153 = fmul float %57, %149
  %154 = fadd float %152, %153
  %155 = fmul float %58, 1.000000e+00
  %156 = fadd float %154, %155
  %157 = fmul float %59, %147
  %158 = fmul float %60, %148
  %159 = fadd float %157, %158
  %160 = fmul float %61, %149
  %161 = fadd float %159, %160
  %162 = fmul float %62, 1.000000e+00
  %163 = fadd float %161, %162
  %164 = fsub float -0.000000e+00, %49
  %165 = fadd float %147, %164
  %166 = fsub float -0.000000e+00, %50
  %167 = fadd float %148, %166
  %168 = fsub float -0.000000e+00, %51
  %169 = fadd float %149, %168
  %170 = fmul float %103, %52
  %171 = fadd float %170, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %165, float %167, float %169, float %171)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %156, float %163, float %129, float %130)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %131, float %130, float %131, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %101, float %102, float %103, float %104)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c2020129
c2028125
bf8c0070
7e020205
d2820001
04040904
c2020128
c2028124
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012a
c2028126
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820008
04140904
c2020102
bf8c007f
100a0404
c2028106
bf8c007f
d2820005
04160205
c204010a
bf8c007f
d2820005
04160608
c204810e
bf8c007f
060a0a09
c204812e
bf8c007f
0a120a09
c2048101
bf8c007f
100c0409
c2058105
bf8c007f
d2820006
041a020b
c2050109
bf8c007f
d2820006
041a060a
c206010d
bf8c007f
060c0c0c
c206012d
bf8c007f
0a140c0c
c2060100
bf8c007f
100e040c
c2068104
bf8c007f
d2820007
041e020d
c2070108
bf8c007f
d2820007
041e060e
c207810c
bf8c007f
060e0e0f
c207812c
bf8c007f
0a160e0f
f800021f
08090a0b
c0880704
bf8c000f
e00c2000
80040900
c2030137
bf8c0070
10001406
10101206
101a1009
d282000d
0436000b
10121606
d282000a
0436120a
1016100c
d282000b
042e000d
d282000b
042e120e
1018170b
d282000c
0432150a
10101004
d2820000
04220005
d2820000
04021208
d2820008
04320100
7e125b08
1010130a
1014130b
c202013d
bf8c007f
10160c04
c202013c
bf8c007f
d282000b
042e0e04
c202013e
bf8c007f
d282000b
042e0a04
c202013f
bf8c007f
06161604
c2020139
bf8c007f
100c0c04
c2020138
bf8c007f
d2820006
041a0e04
c202013a
bf8c007f
d2820005
041a0a04
c202013b
bf8c007f
060a0a04
f800022f
080a0b05
10001300
bf8c070f
7e0a0280
f800023f
05000800
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0], LOCAL
  0: MOV TEMP[0].xy, IN[1].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MUL TEMP[0], IN[0], TEMP[0]
  3: MOV OUT[0], TEMP[0]
  4: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %30 = bitcast float %28 to i32
  %31 = bitcast float %29 to i32
  %32 = insertelement <2 x i32> undef, i32 %30, i32 0
  %33 = insertelement <2 x i32> %32, i32 %31, i32 1
  %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %33, <32 x i8> %21, <16 x i8> %23, i32 2)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %24, %35
  %40 = fmul float %25, %36
  %41 = fmul float %26, %37
  %42 = fmul float %27, %38
  %43 = fcmp ugt float %42, 0.000000e+00
  %44 = sext i1 %43 to i32
  %45 = trunc i32 %44 to i1
  %46 = select i1 %45, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %46)
  %47 = call i32 @llvm.SI.packf16(float %39, float %40)
  %48 = bitcast i32 %47 to float
  %49 = call i32 @llvm.SI.packf16(float %41, float %42)
  %50 = bitcast i32 %49 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kill(float)

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0500
c80d0501
c8080400
c8090401
c0800300
c0c40500
bf8c007f
f0800f00
00020202
c8180300
c8190301
bf8c0770
100c0b06
d0080000
02010106
d2000007
0001e4f3
7c260e80
c81c0200
c81d0201
100e0907
5e0c0d07
c81c0100
c81d0101
100e0707
c8200000
c8210001
10000508
5e000f00
f8001c0f
06000600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    2.0000,     3.0000,     4.0000,     5.0000}
IMM[1] FLT32 {    6.0000,     7.0000,     0.1250,     1.0000}
  0: MOV TEMP[0].xy, IN[0].xyyy
  1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
  2: ADD TEMP[1].xy, IN[0].xyyy, IN[0].zwww
  3: MOV TEMP[1].xy, TEMP[1].xyyy
  4: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
  5: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].xyzz
  6: MAD TEMP[2].xy, IN[0].zwww, IMM[0].xxxx, IN[0].xyyy
  7: MOV TEMP[2].xy, TEMP[2].xyyy
  8: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
  9: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
 10: MAD TEMP[2].xy, IN[0].zwww, IMM[0].yyyy, IN[0].xyyy
 11: MOV TEMP[2].xy, TEMP[2].xyyy
 12: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
 13: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
 14: MAD TEMP[2].xy, IN[0].zwww, IMM[0].zzzz, IN[0].xyyy
 15: MOV TEMP[2].xy, TEMP[2].xyyy
 16: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
 17: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
 18: MAD TEMP[2].xy, IN[0].zwww, IMM[0].wwww, IN[0].xyyy
 19: MOV TEMP[2].xy, TEMP[2].xyyy
 20: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
 21: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
 22: MAD TEMP[2].xy, IN[0].zwww, IMM[1].xxxx, IN[0].xyyy
 23: MOV TEMP[2].xy, TEMP[2].xyyy
 24: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
 25: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
 26: MAD TEMP[2].xy, IN[0].zwww, IMM[1].yyyy, IN[0].xyyy
 27: MOV TEMP[2].xy, TEMP[2].xyyy
 28: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
 29: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
 30: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[1].zzzz
 31: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[1].xxxx
 32: MAD TEMP[0].xyz, TEMP[0].xyzz, CONST[1].yyyy, TEMP[1].xyzz
 33: MOV TEMP[1].w, IMM[1].wwww
 34: MOV TEMP[1].xyz, TEMP[0].xyzx
 35: MOV OUT[0], TEMP[1]
 36: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !0
  %26 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0
  %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %32 = bitcast float %28 to i32
  %33 = bitcast float %29 to i32
  %34 = insertelement <2 x i32> undef, i32 %32, i32 0
  %35 = insertelement <2 x i32> %34, i32 %33, i32 1
  %36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %25, <16 x i8> %27, i32 2)
  %37 = extractelement <4 x float> %36, i32 0
  %38 = extractelement <4 x float> %36, i32 1
  %39 = extractelement <4 x float> %36, i32 2
  %40 = fadd float %28, %30
  %41 = fadd float %29, %31
  %42 = bitcast float %40 to i32
  %43 = bitcast float %41 to i32
  %44 = insertelement <2 x i32> undef, i32 %42, i32 0
  %45 = insertelement <2 x i32> %44, i32 %43, i32 1
  %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %25, <16 x i8> %27, i32 2)
  %47 = extractelement <4 x float> %46, i32 0
  %48 = extractelement <4 x float> %46, i32 1
  %49 = extractelement <4 x float> %46, i32 2
  %50 = fadd float %37, %47
  %51 = fadd float %38, %48
  %52 = fadd float %39, %49
  %53 = fmul float %30, 2.000000e+00
  %54 = fadd float %53, %28
  %55 = fmul float %31, 2.000000e+00
  %56 = fadd float %55, %29
  %57 = bitcast float %54 to i32
  %58 = bitcast float %56 to i32
  %59 = insertelement <2 x i32> undef, i32 %57, i32 0
  %60 = insertelement <2 x i32> %59, i32 %58, i32 1
  %61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %25, <16 x i8> %27, i32 2)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = fadd float %50, %62
  %66 = fadd float %51, %63
  %67 = fadd float %52, %64
  %68 = fmul float %30, 3.000000e+00
  %69 = fadd float %68, %28
  %70 = fmul float %31, 3.000000e+00
  %71 = fadd float %70, %29
  %72 = bitcast float %69 to i32
  %73 = bitcast float %71 to i32
  %74 = insertelement <2 x i32> undef, i32 %72, i32 0
  %75 = insertelement <2 x i32> %74, i32 %73, i32 1
  %76 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %25, <16 x i8> %27, i32 2)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = fadd float %65, %77
  %81 = fadd float %66, %78
  %82 = fadd float %67, %79
  %83 = fmul float %30, 4.000000e+00
  %84 = fadd float %83, %28
  %85 = fmul float %31, 4.000000e+00
  %86 = fadd float %85, %29
  %87 = bitcast float %84 to i32
  %88 = bitcast float %86 to i32
  %89 = insertelement <2 x i32> undef, i32 %87, i32 0
  %90 = insertelement <2 x i32> %89, i32 %88, i32 1
  %91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %90, <32 x i8> %25, <16 x i8> %27, i32 2)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = extractelement <4 x float> %91, i32 1
  %94 = extractelement <4 x float> %91, i32 2
  %95 = fadd float %80, %92
  %96 = fadd float %81, %93
  %97 = fadd float %82, %94
  %98 = fmul float %30, 5.000000e+00
  %99 = fadd float %98, %28
  %100 = fmul float %31, 5.000000e+00
  %101 = fadd float %100, %29
  %102 = bitcast float %99 to i32
  %103 = bitcast float %101 to i32
  %104 = insertelement <2 x i32> undef, i32 %102, i32 0
  %105 = insertelement <2 x i32> %104, i32 %103, i32 1
  %106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %105, <32 x i8> %25, <16 x i8> %27, i32 2)
  %107 = extractelement <4 x float> %106, i32 0
  %108 = extractelement <4 x float> %106, i32 1
  %109 = extractelement <4 x float> %106, i32 2
  %110 = fadd float %95, %107
  %111 = fadd float %96, %108
  %112 = fadd float %97, %109
  %113 = fmul float %30, 6.000000e+00
  %114 = fadd float %113, %28
  %115 = fmul float %31, 6.000000e+00
  %116 = fadd float %115, %29
  %117 = bitcast float %114 to i32
  %118 = bitcast float %116 to i32
  %119 = insertelement <2 x i32> undef, i32 %117, i32 0
  %120 = insertelement <2 x i32> %119, i32 %118, i32 1
  %121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %25, <16 x i8> %27, i32 2)
  %122 = extractelement <4 x float> %121, i32 0
  %123 = extractelement <4 x float> %121, i32 1
  %124 = extractelement <4 x float> %121, i32 2
  %125 = fadd float %110, %122
  %126 = fadd float %111, %123
  %127 = fadd float %112, %124
  %128 = fmul float %30, 7.000000e+00
  %129 = fadd float %128, %28
  %130 = fmul float %31, 7.000000e+00
  %131 = fadd float %130, %29
  %132 = bitcast float %129 to i32
  %133 = bitcast float %131 to i32
  %134 = insertelement <2 x i32> undef, i32 %132, i32 0
  %135 = insertelement <2 x i32> %134, i32 %133, i32 1
  %136 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %25, <16 x i8> %27, i32 2)
  %137 = extractelement <4 x float> %136, i32 0
  %138 = extractelement <4 x float> %136, i32 1
  %139 = extractelement <4 x float> %136, i32 2
  %140 = fadd float %125, %137
  %141 = fadd float %126, %138
  %142 = fadd float %127, %139
  %143 = fmul float %140, 1.250000e-01
  %144 = fmul float %141, 1.250000e-01
  %145 = fmul float %142, 1.250000e-01
  %146 = fmul float %143, %22
  %147 = fmul float %144, %22
  %148 = fmul float %145, %22
  %149 = fmul float %37, %23
  %150 = fadd float %149, %146
  %151 = fmul float %38, %23
  %152 = fadd float %151, %147
  %153 = fmul float %39, %23
  %154 = fadd float %153, %148
  %155 = call i32 @llvm.SI.packf16(float %150, float %152)
  %156 = bitcast i32 %155 to float
  %157 = call i32 @llvm.SI.packf16(float %154, float 1.000000e+00)
  %158 = bitcast i32 %157 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %156, float %158, float %156, float %158)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180300
c8190301
c8200100
c8210101
06060d08
c8300200
c8310201
c81c0000
c81d0001
06041907
c0840300
c0c60500
bf8c007f
f0800700
00430302
f0800700
00430007
bf8c0770
061a0901
06120d06
06141109
0616190c
06120f0b
f0800700
00430909
bf8c0770
0620150d
7e1e02ff
40400000
d282000e
04221f06
d282000d
041e1f0c
f0800700
00430d0d
bf8c0770
06261d10
d2820011
0421ed06
d2820010
041ded0c
f0800700
00431010
bf8c0770
062c2313
7e2a02ff
40a00000
d2820014
04222b06
d2820013
041e2b0c
f0800700
00431313
bf8c0770
06322916
7e3002ff
40c00000
d2820017
04223106
d2820016
041e310c
f0800700
00431616
bf8c0770
06322f19
7e3802ff
40e00000
d282001b
04223906
d282001a
041e390c
f0800700
0043061a
bf8c0770
06180f19
101818ff
3e000000
c0820100
bf8c007f
c2000504
bf8c007f
10181800
c2008505
bf8c007f
d282000c
04300301
06320700
06321319
06321b19
06322119
06322719
06322d19
06320d19
103232ff
3e000000
10323200
d2820019
04640300
5e181919
06060b02
06061703
06061f03
06062503
06062b03
06063103
06061103
100606ff
3e000000
10060600
d2820000
040c0302
d25e0000
0201e500
f8001c0f
000c000c
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..1]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     2.0000,    -1.0000,     0.0000}
  0: MOV TEMP[0].w, IMM[0].xxxx
  1: MOV TEMP[0].xyz, IN[0].xyzx
  2: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
  3: MAD TEMP[2].xy, IMM[0].yyyy, TEMP[1].xyyy, IMM[0].zzzz
  4: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[1].xxxx
  5: MOV TEMP[1].zw, TEMP[2].yyxy
  6: MOV OUT[1], TEMP[1]
  7: MOV OUT[0], TEMP[0]
  8: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %17 = load <16 x i8> addrspace(2)* %16, !tbaa !0
  %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %17, i32 0, i32 %5)
  %19 = extractelement <4 x float> %18, i32 0
  %20 = extractelement <4 x float> %18, i32 1
  %21 = extractelement <4 x float> %18, i32 2
  %22 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %5)
  %25 = extractelement <4 x float> %24, i32 0
  %26 = extractelement <4 x float> %24, i32 1
  %27 = fmul float %25, %11
  %28 = fadd float %27, %13
  %29 = fmul float %26, %12
  %30 = fadd float %29, %14
  %31 = fmul float 2.000000e+00, %28
  %32 = fadd float %31, -1.000000e+00
  %33 = fmul float 2.000000e+00, %30
  %34 = fadd float %33, -1.000000e+00
  %35 = fmul float %32, %15
  %36 = fmul float %34, %15
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %28, float %30, float %35, float %36)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %19, float %20, float %21, float 1.000000e+00)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020100
c0800100
bf8c0070
c2020101
c2028103
bf8c007f
7e0a0205
d2820005
04140902
060c0b05
060c0cf3
c2020104
bf8c007f
100c0c04
c2028100
c2000102
bf8c007f
7e0e0200
d2820001
041c0b01
06040301
060404f3
10040404
f800020f
06020501
c0800700
bf8c000f
e00c2000
80000000
7e0802f2
bf8c0770
f80008cf
04020100
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[2..14]
DCL TEMP[0]
DCL TEMP[1..8], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.0000,     0.0100}
IMM[1] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[4].xxxx
  2: MOV TEMP[1].xy, IN[3].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP4 TEMP[3].x, IMM[0].yyyy, CONST[12]
  9: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[11].xxxx
 10: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 11: RSQ TEMP[4].x, TEMP[4].xxxx
 12: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 14: DP4 TEMP[2].x, IMM[0].yyyy, CONST[13]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[11].yyyy
 16: LRP TEMP[2], TEMP[2].xxxx, IN[1], IMM[0].yyyy
 17: MOV TEMP[4].w, TEMP[2]
 18: DP3 TEMP[5].x, TEMP[1].xyzz, CONST[3].xyzz
 19: ADD TEMP[5].x, TEMP[5].xxxx, CONST[7].wwww
 20: MOV_SAT TEMP[6].x, TEMP[5].xxxx
 21: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[5].xyzz, CONST[6].xyzz
 22: MOV_SAT TEMP[7].x, -TEMP[5].xxxx
 23: LRP TEMP[7].xyz, TEMP[7].xxxx, CONST[7].xyzz, CONST[6].xyzz
 24: SLT TEMP[8].x, TEMP[5].xxxx, IMM[0].zzzz
 25: F2I TEMP[8].x, -TEMP[8]
 26: UIF TEMP[8].xxxx :1
 27:   MOV TEMP[7].xyz, TEMP[7].xyzx
 28: ELSE :1
 29:   MOV TEMP[7].xyz, TEMP[6].xyzx
 30: ENDIF
 31: DP3 TEMP[6].x, IN[2].xyzz, IN[2].xyzz
 32: RSQ TEMP[6].x, TEMP[6].xxxx
 33: MUL TEMP[6].xyz, IN[2].xyzz, TEMP[6].xxxx
 34: ADD TEMP[6].xyz, CONST[4].xyzz, -TEMP[6].xyzz
 35: DP3 TEMP[8].x, TEMP[6].xyzz, TEMP[6].xyzz
 36: RSQ TEMP[8].x, TEMP[8].xxxx
 37: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xxxx
 38: DP3_SAT TEMP[6].x, TEMP[6].xyzz, TEMP[1].xyzz
 39: POW TEMP[6].x, TEMP[6].xxxx, CONST[10].xxxx
 40: SGE TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 41: F2I TEMP[5].x, -TEMP[5]
 42: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
 43: MUL TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx
 44: MUL TEMP[5].xyz, CONST[8].xyzz, TEMP[5].xxxx
 45: MOV TEMP[1].xyz, TEMP[1].xyzz
 46: TEX TEMP[1].xyz, TEMP[1], SAMP[1], CUBE
 47: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[7].xyzz
 48: MOV TEMP[6].xy, IN[3].xyyy
 49: TEX TEMP[6], TEMP[6], SAMP[0], 2D
 50: MUL TEMP[6], TEMP[6], CONST[9]
 51: LRP TEMP[1].xyz, TEMP[6].wwww, TEMP[6].xyzz, TEMP[1].xyzz
 52: LRP TEMP[5].xyz, TEMP[6].wwww, IMM[0].zzzz, TEMP[5].xyzz
 53: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz
 54: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[1].xyzz
 55: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 56: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 57: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[4].xyzz, CONST[2].xyzz
 58: MOV OUT[0], TEMP[4]
 59: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 204)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %85 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %87 = bitcast float %83 to i32
  %88 = icmp ne i32 %87, 0
  %. = select i1 %88, float -1.000000e+00, float 1.000000e+00
  %89 = fmul float 1.000000e+00, %52
  %90 = fmul float 1.000000e+00, %53
  %91 = fadd float %89, %90
  %92 = fmul float 1.000000e+00, %54
  %93 = fadd float %91, %92
  %94 = fmul float 1.000000e+00, %55
  %95 = fadd float %93, %94
  %96 = fadd float %95, %50
  %97 = call float @llvm.AMDIL.clamp.(float %96, float 0.000000e+00, float 1.000000e+00)
  %98 = fmul float %80, %80
  %99 = fmul float %81, %81
  %100 = fadd float %99, %98
  %101 = fmul float %82, %82
  %102 = fadd float %100, %101
  %103 = call float @llvm.AMDGPU.rsq(float %102)
  %104 = fmul float %80, %103
  %105 = fmul float %81, %103
  %106 = fmul float %82, %103
  %107 = fmul float %104, %.
  %108 = fmul float %105, %.
  %109 = fmul float %106, %.
  %110 = fmul float 1.000000e+00, %56
  %111 = fmul float 1.000000e+00, %57
  %112 = fadd float %110, %111
  %113 = fmul float 1.000000e+00, %58
  %114 = fadd float %112, %113
  %115 = fmul float 1.000000e+00, %59
  %116 = fadd float %114, %115
  %117 = fadd float %116, %51
  %118 = call float @llvm.AMDIL.clamp.(float %117, float 0.000000e+00, float 1.000000e+00)
  %119 = call float @llvm.AMDGPU.lrp(float %118, float %70, float 1.000000e+00)
  %120 = call float @llvm.AMDGPU.lrp(float %118, float %71, float 1.000000e+00)
  %121 = call float @llvm.AMDGPU.lrp(float %118, float %72, float 1.000000e+00)
  %122 = call float @llvm.AMDGPU.lrp(float %118, float %73, float 1.000000e+00)
  %123 = fmul float %107, %26
  %124 = fmul float %108, %27
  %125 = fadd float %124, %123
  %126 = fmul float %109, %28
  %127 = fadd float %125, %126
  %128 = fadd float %127, %41
  %129 = call float @llvm.AMDIL.clamp.(float %128, float 0.000000e+00, float 1.000000e+00)
  %130 = call float @llvm.AMDGPU.lrp(float %129, float %32, float %35)
  %131 = call float @llvm.AMDGPU.lrp(float %129, float %33, float %36)
  %132 = call float @llvm.AMDGPU.lrp(float %129, float %34, float %37)
  %133 = fsub float -0.000000e+00, %128
  %134 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00)
  %135 = call float @llvm.AMDGPU.lrp(float %134, float %38, float %35)
  %136 = call float @llvm.AMDGPU.lrp(float %134, float %39, float %36)
  %137 = call float @llvm.AMDGPU.lrp(float %134, float %40, float %37)
  %138 = fcmp ult float %128, 0.000000e+00
  %139 = select i1 %138, float 1.000000e+00, float 0.000000e+00
  %140 = fsub float -0.000000e+00, %139
  %141 = fptosi float %140 to i32
  %142 = bitcast i32 %141 to float
  %143 = bitcast float %142 to i32
  %144 = icmp ne i32 %143, 0
  %temp28.0 = select i1 %144, float %135, float %130
  %temp29.0 = select i1 %144, float %136, float %131
  %temp30.0 = select i1 %144, float %137, float %132
  %145 = fmul float %74, %74
  %146 = fmul float %75, %75
  %147 = fadd float %146, %145
  %148 = fmul float %76, %76
  %149 = fadd float %147, %148
  %150 = call float @llvm.AMDGPU.rsq(float %149)
  %151 = fmul float %74, %150
  %152 = fmul float %75, %150
  %153 = fmul float %76, %150
  %154 = fsub float -0.000000e+00, %151
  %155 = fadd float %29, %154
  %156 = fsub float -0.000000e+00, %152
  %157 = fadd float %30, %156
  %158 = fsub float -0.000000e+00, %153
  %159 = fadd float %31, %158
  %160 = fmul float %155, %155
  %161 = fmul float %157, %157
  %162 = fadd float %161, %160
  %163 = fmul float %159, %159
  %164 = fadd float %162, %163
  %165 = call float @llvm.AMDGPU.rsq(float %164)
  %166 = fmul float %155, %165
  %167 = fmul float %157, %165
  %168 = fmul float %159, %165
  %169 = fmul float %166, %107
  %170 = fmul float %167, %108
  %171 = fadd float %170, %169
  %172 = fmul float %168, %109
  %173 = fadd float %171, %172
  %174 = call float @llvm.AMDIL.clamp.(float %173, float 0.000000e+00, float 1.000000e+00)
  %175 = call float @llvm.pow.f32(float %174, float %49)
  %176 = fcmp uge float %128, 0x3F847AE140000000
  %177 = select i1 %176, float 1.000000e+00, float 0.000000e+00
  %178 = fsub float -0.000000e+00, %177
  %179 = fptosi float %178 to i32
  %180 = bitcast i32 %179 to float
  %181 = bitcast float %180 to i32
  %182 = and i32 %181, 1065353216
  %183 = bitcast i32 %182 to float
  %184 = fmul float %175, %183
  %185 = fmul float %42, %184
  %186 = fmul float %43, %184
  %187 = fmul float %44, %184
  %188 = insertelement <4 x float> undef, float %107, i32 0
  %189 = insertelement <4 x float> %188, float %108, i32 1
  %190 = insertelement <4 x float> %189, float %109, i32 2
  %191 = insertelement <4 x float> %190, float 0.000000e+00, i32 3
  %192 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %191)
  %193 = extractelement <4 x float> %192, i32 0
  %194 = extractelement <4 x float> %192, i32 1
  %195 = extractelement <4 x float> %192, i32 2
  %196 = extractelement <4 x float> %192, i32 3
  %197 = call float @fabs(float %195)
  %198 = fdiv float 1.000000e+00, %197
  %199 = fmul float %193, %198
  %200 = fadd float %199, 1.500000e+00
  %201 = fmul float %194, %198
  %202 = fadd float %201, 1.500000e+00
  %203 = bitcast float %202 to i32
  %204 = bitcast float %200 to i32
  %205 = bitcast float %196 to i32
  %206 = insertelement <4 x i32> undef, i32 %203, i32 0
  %207 = insertelement <4 x i32> %206, i32 %204, i32 1
  %208 = insertelement <4 x i32> %207, i32 %205, i32 2
  %209 = insertelement <4 x i32> %208, i32 undef, i32 3
  %210 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %209, <32 x i8> %65, <16 x i8> %67, i32 4)
  %211 = extractelement <4 x float> %210, i32 0
  %212 = extractelement <4 x float> %210, i32 1
  %213 = extractelement <4 x float> %210, i32 2
  %214 = fmul float %211, 4.000000e+00
  %215 = fadd float %214, %temp28.0
  %216 = fmul float %212, 4.000000e+00
  %217 = fadd float %216, %temp29.0
  %218 = fmul float %213, 4.000000e+00
  %219 = fadd float %218, %temp30.0
  %220 = bitcast float %78 to i32
  %221 = bitcast float %79 to i32
  %222 = insertelement <2 x i32> undef, i32 %220, i32 0
  %223 = insertelement <2 x i32> %222, i32 %221, i32 1
  %224 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %61, <16 x i8> %63, i32 2)
  %225 = extractelement <4 x float> %224, i32 0
  %226 = extractelement <4 x float> %224, i32 1
  %227 = extractelement <4 x float> %224, i32 2
  %228 = extractelement <4 x float> %224, i32 3
  %229 = fmul float %225, %45
  %230 = fmul float %226, %46
  %231 = fmul float %227, %47
  %232 = fmul float %228, %48
  %233 = call float @llvm.AMDGPU.lrp(float %232, float %229, float %215)
  %234 = call float @llvm.AMDGPU.lrp(float %232, float %230, float %217)
  %235 = call float @llvm.AMDGPU.lrp(float %232, float %231, float %219)
  %236 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %185)
  %237 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %186)
  %238 = call float @llvm.AMDGPU.lrp(float %232, float 0.000000e+00, float %187)
  %239 = fmul float %119, %233
  %240 = fmul float %120, %234
  %241 = fmul float %121, %235
  %242 = fmul float %236, %97
  %243 = fadd float %242, %239
  %244 = fmul float %237, %97
  %245 = fadd float %244, %240
  %246 = fmul float %238, %97
  %247 = fadd float %246, %241
  %248 = fcmp uge float %77, %25
  %249 = select i1 %248, float %77, float %25
  %250 = call float @llvm.AMDIL.clamp.(float %249, float 0.000000e+00, float 1.000000e+00)
  %251 = call float @llvm.AMDGPU.lrp(float %250, float %243, float %22)
  %252 = call float @llvm.AMDGPU.lrp(float %250, float %245, float %23)
  %253 = call float @llvm.AMDGPU.lrp(float %250, float %247, float %24)
  %254 = call i32 @llvm.SI.packf16(float %251, float %252)
  %255 = bitcast i32 %254 to float
  %256 = call i32 @llvm.SI.packf16(float %253, float %122)
  %257 = bitcast i32 %256 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %255, float %257, float %255, float %257)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100b00
c8110b01
c80c0a00
c80d0a01
100a0703
d2820006
04160904
c8140c00
c8150c01
d2820006
041a0b05
7e0c5b06
10080d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000002
0021e6f2
10180504
10060d03
10160503
10060d05
101a0503
7e1c0280
d28a0003
0436190b
d28c0002
0436190b
d28e0004
0436190b
d2880005
0436190b
d206010a
02010104
7e14550a
7e1e02ff
3fc00000
d2820004
043e1502
d2820003
043e1503
c0840304
c0c60508
bf8c007f
f0800700
00430203
c0840100
bf8c0070
c200090c
bf8c007f
100a1600
c200090d
bf8c007f
d2820005
0414010c
c200090e
bf8c007f
d2820005
0414010d
c200091f
bf8c007f
062a0a00
d2060805
02010115
080c0af2
c2000919
bf8c007f
100e0c00
c2008915
bf8c007f
d2820009
041c0305
d2060007
22010115
d2060807
02010107
08100ef2
10141000
c200091d
bf8c007f
d282000a
04280107
d0020000
02010115
d200000f
0001e480
d206000f
2201010f
7e1e110f
d10a0000
0201010f
d2000009
00021509
d2820013
0425ed03
c8280900
c8290901
c8240800
c8250801
c0860300
c0c80500
bf8c007f
f0800f00
00640f09
c2010927
bf8c0070
10122402
081412f2
1026270a
c2010925
bf8c007f
10282002
d2820016
044e2909
c85c0100
c85d0101
c2010934
c2018935
bf8c007f
7e260203
d2060013
02022602
c2010936
bf8c007f
06262602
c2010937
bf8c007f
06262602
c201092d
bf8c007f
06262602
d2060813
02010113
082826f2
d2820017
04522f13
102c2d17
c85c0500
c85d0501
c8600400
c8610401
10323118
d282001a
04662f17
c8640600
c8650601
d282001a
046a3319
7e345b1a
102e3517
c2010911
bf8c007f
082e2e02
10303518
c2010910
bf8c007f
08303002
10363118
d282001b
046e2f17
10323519
c2010912
bf8c007f
08323202
d282001a
046e3319
7e345b1a
102e3517
10303518
10301718
d2820017
04621917
10303519
d282000b
045e1b18
d206080b
0201010b
7e164f0b
c2010928
bf8c007f
0e161602
7e164b0b
7e1802ff
3c23d70a
d00c0002
02021915
d200000c
0009e480
d206000c
2201010c
7e18110c
361818f2
1016190b
c2010921
bf8c007f
10181602
1018190a
d282000d
04310109
c2010930
c2018931
bf8c007f
7e180203
d206000c
02021802
c2010932
bf8c007f
06181802
c2010933
bf8c007f
06181802
c201092c
bf8c007f
06181802
d206080c
0201010c
d2820015
045a190d
c8340700
c8350701
c201090b
bf8c007f
d00c0004
0200050d
7e1c0202
d200000d
00121b0e
d206080d
0201010d
081c1af2
c2010909
bf8c007f
102c1c02
d2820015
045a2b0d
c2010918
bf8c007f
102c0c02
c2018914
bf8c007f
d2820016
04580705
102e1002
c201091c
bf8c007f
d2820017
045c0507
d2000016
00022f16
d2820016
0459ed02
102c2d0a
c2010924
bf8c007f
102e1e02
d2820016
045a2f09
c85c0000
c85d0001
d2820017
04522f13
102c2d17
c2010920
bf8c007f
102e1602
102e2f0a
d2820017
045d0109
d2820016
045a1917
c2010908
bf8c007f
102e1c02
d2820016
045e2d0d
5e2a2b16
c201091a
bf8c007f
100c0c02
c2018916
bf8c007f
d2820005
04180705
100c1002
c201091e
bf8c007f
d2820006
04180507
d2000005
00020d05
d2820002
0415ed04
1004050a
c2000926
bf8c007f
10062200
d2820002
040a0709
c80c0200
c80d0201
d2820003
04520713
10040503
c2000922
bf8c007f
10061600
1006070a
d2820003
040d0109
d2820002
040a1903
c200090a
bf8c007f
10061c00
d2820002
040e050d
c80c0300
c80d0301
d2820000
04520713
5e000102
f8001c0f
00150015
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..207]
DCL TEMP[0..7], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww
  6: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz
  7: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy
  8: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx
  9: UARL ADDR[0].x, TEMP[6].xxxx
 10: MUL TEMP[6], CONST[ADDR[0].x+16], IN[1].xxxx
 11: UARL ADDR[0].x, TEMP[5].xxxx
 12: MAD TEMP[5], CONST[ADDR[0].x+16], IN[1].yyyy, TEMP[6]
 13: UARL ADDR[0].x, TEMP[4].xxxx
 14: MAD TEMP[4], CONST[ADDR[0].x+16], IN[1].zzzz, TEMP[5]
 15: UARL ADDR[0].x, TEMP[3].xxxx
 16: UARL ADDR[0].x, TEMP[3].xxxx
 17: MAD TEMP[3], CONST[ADDR[0].x+16], IN[1].wwww, TEMP[4]
 18: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 19: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 20: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 21: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 22: UARL ADDR[0].x, TEMP[7].xxxx
 23: MUL TEMP[7], CONST[ADDR[0].x+16], IN[1].xxxx
 24: UARL ADDR[0].x, TEMP[6].xxxx
 25: MAD TEMP[6], CONST[ADDR[0].x+16], IN[1].yyyy, TEMP[7]
 26: UARL ADDR[0].x, TEMP[5].xxxx
 27: MAD TEMP[5], CONST[ADDR[0].x+16], IN[1].zzzz, TEMP[6]
 28: UARL ADDR[0].x, TEMP[4].xxxx
 29: UARL ADDR[0].x, TEMP[4].xxxx
 30: MAD TEMP[4], CONST[ADDR[0].x+16], IN[1].wwww, TEMP[5]
 31: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 32: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 33: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 34: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 35: UARL ADDR[0].x, TEMP[0].xxxx
 36: MUL TEMP[0], CONST[ADDR[0].x+16], IN[1].xxxx
 37: UARL ADDR[0].x, TEMP[7].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+16], IN[1].yyyy, TEMP[0]
 39: UARL ADDR[0].x, TEMP[6].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+16], IN[1].zzzz, TEMP[0]
 41: UARL ADDR[0].x, TEMP[5].xxxx
 42: UARL ADDR[0].x, TEMP[5].xxxx
 43: MAD TEMP[0], CONST[ADDR[0].x+16], IN[1].wwww, TEMP[0]
 44: DP4 TEMP[5].x, TEMP[1], TEMP[3]
 45: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 46: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 47: DP4 TEMP[3].x, TEMP[2], TEMP[3]
 48: DP4 TEMP[4].x, TEMP[2], TEMP[4]
 49: MOV TEMP[3].y, TEMP[4].xxxx
 50: DP4 TEMP[0].x, TEMP[2], TEMP[0]
 51: MOV TEMP[3].z, TEMP[0].xxxx
 52: MUL TEMP[0], CONST[4], TEMP[5].xxxx
 53: MAD TEMP[0], CONST[5], TEMP[6].xxxx, TEMP[0]
 54: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 55: ADD TEMP[0], TEMP[0], CONST[7]
 56: MUL TEMP[2].xyz, TEMP[3].xyzz, CONST[13].wwww
 57: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 58: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 59: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 60: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 61: RSQ TEMP[3].x, TEMP[3].xxxx
 62: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 63: MUL TEMP[3], CONST[0], TEMP[5].xxxx
 64: MAD TEMP[3], CONST[1], TEMP[6].xxxx, TEMP[3]
 65: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[3]
 66: ADD TEMP[1].xyz, TEMP[1], CONST[3]
 67: MOV TEMP[3].w, IMM[0].xxxx
 68: MOV TEMP[3].xyz, TEMP[1].xyzx
 69: MOV TEMP[4].w, IMM[0].xxxx
 70: MOV TEMP[4].xyz, TEMP[1].xyzx
 71: DP4 TEMP[3].x, CONST[14], TEMP[3]
 72: DP4 TEMP[4].x, CONST[15], TEMP[4]
 73: MOV TEMP[3].y, TEMP[4].xxxx
 74: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[11].xyzz
 75: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].xxxx, CONST[12].yyyy
 76: MOV TEMP[1].w, TEMP[4].xxxx
 77: MOV TEMP[4].zw, TEMP[2].yyxy
 78: MOV TEMP[2].x, TEMP[2].zzzz
 79: MOV TEMP[4].xy, TEMP[3].xyxx
 80: MOV OUT[1], CONST[8]
 81: MOV OUT[4], TEMP[2]
 82: MOV OUT[2], TEMP[1]
 83: MOV OUT[0], TEMP[0]
 84: MOV OUT[3], TEMP[4]
 85: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0
  %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %5)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = extractelement <4 x float> %71, i32 3
  %76 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %77 = load <16 x i8> addrspace(2)* %76, !tbaa !0
  %78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %77, i32 0, i32 %5)
  %79 = extractelement <4 x float> %78, i32 0
  %80 = extractelement <4 x float> %78, i32 1
  %81 = extractelement <4 x float> %78, i32 2
  %82 = extractelement <4 x float> %78, i32 3
  %83 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
  %85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %5)
  %86 = extractelement <4 x float> %85, i32 0
  %87 = extractelement <4 x float> %85, i32 1
  %88 = extractelement <4 x float> %85, i32 2
  %89 = fptosi float %79 to i32
  %90 = fptosi float %80 to i32
  %91 = fptosi float %81 to i32
  %92 = fptosi float %82 to i32
  %93 = bitcast i32 %89 to float
  %94 = bitcast i32 %90 to float
  %95 = bitcast i32 %91 to float
  %96 = bitcast i32 %92 to float
  %97 = fmul float %66, %46
  %98 = fadd float %97, %43
  %99 = fmul float %67, %47
  %100 = fadd float %99, %44
  %101 = fmul float %68, %48
  %102 = fadd float %101, %45
  %103 = bitcast float %96 to i32
  %104 = mul i32 3, %103
  %105 = bitcast i32 %104 to float
  %106 = bitcast float %95 to i32
  %107 = mul i32 3, %106
  %108 = bitcast i32 %107 to float
  %109 = bitcast float %94 to i32
  %110 = mul i32 3, %109
  %111 = bitcast i32 %110 to float
  %112 = bitcast float %93 to i32
  %113 = mul i32 3, %112
  %114 = bitcast i32 %113 to float
  %115 = bitcast float %114 to i32
  %116 = shl i32 %115, 4
  %117 = add i32 %116, 256
  %118 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %117)
  %119 = fmul float %118, %72
  %120 = shl i32 %115, 4
  %121 = add i32 %120, 260
  %122 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %121)
  %123 = fmul float %122, %72
  %124 = shl i32 %115, 4
  %125 = add i32 %124, 264
  %126 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %125)
  %127 = fmul float %126, %72
  %128 = shl i32 %115, 4
  %129 = add i32 %128, 268
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %72
  %132 = bitcast float %111 to i32
  %133 = shl i32 %132, 4
  %134 = add i32 %133, 256
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %73
  %137 = fadd float %136, %119
  %138 = shl i32 %132, 4
  %139 = add i32 %138, 260
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %73
  %142 = fadd float %141, %123
  %143 = shl i32 %132, 4
  %144 = add i32 %143, 264
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %73
  %147 = fadd float %146, %127
  %148 = shl i32 %132, 4
  %149 = add i32 %148, 268
  %150 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %149)
  %151 = fmul float %150, %73
  %152 = fadd float %151, %131
  %153 = bitcast float %108 to i32
  %154 = shl i32 %153, 4
  %155 = add i32 %154, 256
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %74
  %158 = fadd float %157, %137
  %159 = shl i32 %153, 4
  %160 = add i32 %159, 260
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %74
  %163 = fadd float %162, %142
  %164 = shl i32 %153, 4
  %165 = add i32 %164, 264
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %74
  %168 = fadd float %167, %147
  %169 = shl i32 %153, 4
  %170 = add i32 %169, 268
  %171 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %170)
  %172 = fmul float %171, %74
  %173 = fadd float %172, %152
  %174 = bitcast float %105 to i32
  %175 = shl i32 %174, 4
  %176 = add i32 %175, 256
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %75
  %179 = fadd float %178, %158
  %180 = shl i32 %174, 4
  %181 = add i32 %180, 260
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %75
  %184 = fadd float %183, %163
  %185 = shl i32 %174, 4
  %186 = add i32 %185, 264
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %75
  %189 = fadd float %188, %168
  %190 = shl i32 %174, 4
  %191 = add i32 %190, 268
  %192 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %191)
  %193 = fmul float %192, %75
  %194 = fadd float %193, %173
  %195 = bitcast float %96 to i32
  %196 = mul i32 3, %195
  %197 = add i32 %196, 1
  %198 = bitcast i32 %197 to float
  %199 = bitcast float %95 to i32
  %200 = mul i32 3, %199
  %201 = add i32 %200, 1
  %202 = bitcast i32 %201 to float
  %203 = bitcast float %94 to i32
  %204 = mul i32 3, %203
  %205 = add i32 %204, 1
  %206 = bitcast i32 %205 to float
  %207 = bitcast float %93 to i32
  %208 = mul i32 3, %207
  %209 = add i32 %208, 1
  %210 = bitcast i32 %209 to float
  %211 = bitcast float %210 to i32
  %212 = shl i32 %211, 4
  %213 = add i32 %212, 256
  %214 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %213)
  %215 = fmul float %214, %72
  %216 = shl i32 %211, 4
  %217 = add i32 %216, 260
  %218 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %217)
  %219 = fmul float %218, %72
  %220 = shl i32 %211, 4
  %221 = add i32 %220, 264
  %222 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %221)
  %223 = fmul float %222, %72
  %224 = shl i32 %211, 4
  %225 = add i32 %224, 268
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %72
  %228 = bitcast float %206 to i32
  %229 = shl i32 %228, 4
  %230 = add i32 %229, 256
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %73
  %233 = fadd float %232, %215
  %234 = shl i32 %228, 4
  %235 = add i32 %234, 260
  %236 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %235)
  %237 = fmul float %236, %73
  %238 = fadd float %237, %219
  %239 = shl i32 %228, 4
  %240 = add i32 %239, 264
  %241 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %240)
  %242 = fmul float %241, %73
  %243 = fadd float %242, %223
  %244 = shl i32 %228, 4
  %245 = add i32 %244, 268
  %246 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %245)
  %247 = fmul float %246, %73
  %248 = fadd float %247, %227
  %249 = bitcast float %202 to i32
  %250 = shl i32 %249, 4
  %251 = add i32 %250, 256
  %252 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %251)
  %253 = fmul float %252, %74
  %254 = fadd float %253, %233
  %255 = shl i32 %249, 4
  %256 = add i32 %255, 260
  %257 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %256)
  %258 = fmul float %257, %74
  %259 = fadd float %258, %238
  %260 = shl i32 %249, 4
  %261 = add i32 %260, 264
  %262 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %261)
  %263 = fmul float %262, %74
  %264 = fadd float %263, %243
  %265 = shl i32 %249, 4
  %266 = add i32 %265, 268
  %267 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %266)
  %268 = fmul float %267, %74
  %269 = fadd float %268, %248
  %270 = bitcast float %198 to i32
  %271 = shl i32 %270, 4
  %272 = add i32 %271, 256
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = fmul float %273, %75
  %275 = fadd float %274, %254
  %276 = shl i32 %270, 4
  %277 = add i32 %276, 260
  %278 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %277)
  %279 = fmul float %278, %75
  %280 = fadd float %279, %259
  %281 = shl i32 %270, 4
  %282 = add i32 %281, 264
  %283 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %282)
  %284 = fmul float %283, %75
  %285 = fadd float %284, %264
  %286 = shl i32 %270, 4
  %287 = add i32 %286, 268
  %288 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %287)
  %289 = fmul float %288, %75
  %290 = fadd float %289, %269
  %291 = bitcast float %96 to i32
  %292 = mul i32 3, %291
  %293 = add i32 %292, 2
  %294 = bitcast i32 %293 to float
  %295 = bitcast float %95 to i32
  %296 = mul i32 3, %295
  %297 = add i32 %296, 2
  %298 = bitcast i32 %297 to float
  %299 = bitcast float %94 to i32
  %300 = mul i32 3, %299
  %301 = add i32 %300, 2
  %302 = bitcast i32 %301 to float
  %303 = bitcast float %93 to i32
  %304 = mul i32 3, %303
  %305 = add i32 %304, 2
  %306 = bitcast i32 %305 to float
  %307 = bitcast float %306 to i32
  %308 = shl i32 %307, 4
  %309 = add i32 %308, 256
  %310 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %309)
  %311 = fmul float %310, %72
  %312 = shl i32 %307, 4
  %313 = add i32 %312, 260
  %314 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %313)
  %315 = fmul float %314, %72
  %316 = shl i32 %307, 4
  %317 = add i32 %316, 264
  %318 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %317)
  %319 = fmul float %318, %72
  %320 = shl i32 %307, 4
  %321 = add i32 %320, 268
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %72
  %324 = bitcast float %302 to i32
  %325 = shl i32 %324, 4
  %326 = add i32 %325, 256
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %73
  %329 = fadd float %328, %311
  %330 = shl i32 %324, 4
  %331 = add i32 %330, 260
  %332 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %331)
  %333 = fmul float %332, %73
  %334 = fadd float %333, %315
  %335 = shl i32 %324, 4
  %336 = add i32 %335, 264
  %337 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %336)
  %338 = fmul float %337, %73
  %339 = fadd float %338, %319
  %340 = shl i32 %324, 4
  %341 = add i32 %340, 268
  %342 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %341)
  %343 = fmul float %342, %73
  %344 = fadd float %343, %323
  %345 = bitcast float %298 to i32
  %346 = shl i32 %345, 4
  %347 = add i32 %346, 256
  %348 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %347)
  %349 = fmul float %348, %74
  %350 = fadd float %349, %329
  %351 = shl i32 %345, 4
  %352 = add i32 %351, 260
  %353 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %352)
  %354 = fmul float %353, %74
  %355 = fadd float %354, %334
  %356 = shl i32 %345, 4
  %357 = add i32 %356, 264
  %358 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %357)
  %359 = fmul float %358, %74
  %360 = fadd float %359, %339
  %361 = shl i32 %345, 4
  %362 = add i32 %361, 268
  %363 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %362)
  %364 = fmul float %363, %74
  %365 = fadd float %364, %344
  %366 = bitcast float %294 to i32
  %367 = shl i32 %366, 4
  %368 = add i32 %367, 256
  %369 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %368)
  %370 = fmul float %369, %75
  %371 = fadd float %370, %350
  %372 = shl i32 %366, 4
  %373 = add i32 %372, 260
  %374 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %373)
  %375 = fmul float %374, %75
  %376 = fadd float %375, %355
  %377 = shl i32 %366, 4
  %378 = add i32 %377, 264
  %379 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %378)
  %380 = fmul float %379, %75
  %381 = fadd float %380, %360
  %382 = shl i32 %366, 4
  %383 = add i32 %382, 268
  %384 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %383)
  %385 = fmul float %384, %75
  %386 = fadd float %385, %365
  %387 = fmul float %98, %179
  %388 = fmul float %100, %184
  %389 = fadd float %387, %388
  %390 = fmul float %102, %189
  %391 = fadd float %389, %390
  %392 = fmul float 1.000000e+00, %194
  %393 = fadd float %391, %392
  %394 = fmul float %98, %275
  %395 = fmul float %100, %280
  %396 = fadd float %394, %395
  %397 = fmul float %102, %285
  %398 = fadd float %396, %397
  %399 = fmul float 1.000000e+00, %290
  %400 = fadd float %398, %399
  %401 = fmul float %98, %371
  %402 = fmul float %100, %376
  %403 = fadd float %401, %402
  %404 = fmul float %102, %381
  %405 = fadd float %403, %404
  %406 = fmul float 1.000000e+00, %386
  %407 = fadd float %405, %406
  %408 = fmul float %86, %179
  %409 = fmul float %87, %184
  %410 = fadd float %408, %409
  %411 = fmul float %88, %189
  %412 = fadd float %410, %411
  %413 = fmul float 1.000000e+00, %194
  %414 = fadd float %412, %413
  %415 = fmul float %86, %275
  %416 = fmul float %87, %280
  %417 = fadd float %415, %416
  %418 = fmul float %88, %285
  %419 = fadd float %417, %418
  %420 = fmul float 1.000000e+00, %290
  %421 = fadd float %419, %420
  %422 = fmul float %86, %371
  %423 = fmul float %87, %376
  %424 = fadd float %422, %423
  %425 = fmul float %88, %381
  %426 = fadd float %424, %425
  %427 = fmul float 1.000000e+00, %386
  %428 = fadd float %426, %427
  %429 = fmul float %23, %393
  %430 = fmul float %24, %393
  %431 = fmul float %25, %393
  %432 = fmul float %26, %393
  %433 = fmul float %27, %400
  %434 = fadd float %433, %429
  %435 = fmul float %28, %400
  %436 = fadd float %435, %430
  %437 = fmul float %29, %400
  %438 = fadd float %437, %431
  %439 = fmul float %30, %400
  %440 = fadd float %439, %432
  %441 = fmul float %31, %407
  %442 = fadd float %441, %434
  %443 = fmul float %32, %407
  %444 = fadd float %443, %436
  %445 = fmul float %33, %407
  %446 = fadd float %445, %438
  %447 = fmul float %34, %407
  %448 = fadd float %447, %440
  %449 = fadd float %442, %35
  %450 = fadd float %444, %36
  %451 = fadd float %446, %37
  %452 = fadd float %448, %38
  %453 = fmul float %414, %54
  %454 = fmul float %421, %54
  %455 = fmul float %428, %54
  %456 = fmul float %11, %453
  %457 = fmul float %12, %453
  %458 = fmul float %13, %453
  %459 = fmul float %14, %454
  %460 = fadd float %459, %456
  %461 = fmul float %15, %454
  %462 = fadd float %461, %457
  %463 = fmul float %16, %454
  %464 = fadd float %463, %458
  %465 = fmul float %17, %455
  %466 = fadd float %465, %460
  %467 = fmul float %18, %455
  %468 = fadd float %467, %462
  %469 = fmul float %19, %455
  %470 = fadd float %469, %464
  %471 = fmul float %466, %466
  %472 = fmul float %468, %468
  %473 = fadd float %472, %471
  %474 = fmul float %470, %470
  %475 = fadd float %473, %474
  %476 = call float @llvm.AMDGPU.rsq(float %475)
  %477 = fmul float %466, %476
  %478 = fmul float %468, %476
  %479 = fmul float %470, %476
  %480 = fmul float %11, %393
  %481 = fmul float %12, %393
  %482 = fmul float %13, %393
  %483 = fmul float %14, %400
  %484 = fadd float %483, %480
  %485 = fmul float %15, %400
  %486 = fadd float %485, %481
  %487 = fmul float %16, %400
  %488 = fadd float %487, %482
  %489 = fmul float %17, %407
  %490 = fadd float %489, %484
  %491 = fmul float %18, %407
  %492 = fadd float %491, %486
  %493 = fmul float %19, %407
  %494 = fadd float %493, %488
  %495 = fadd float %490, %20
  %496 = fadd float %492, %21
  %497 = fadd float %494, %22
  %498 = fmul float %55, %495
  %499 = fmul float %56, %496
  %500 = fadd float %498, %499
  %501 = fmul float %57, %497
  %502 = fadd float %500, %501
  %503 = fmul float %58, 1.000000e+00
  %504 = fadd float %502, %503
  %505 = fmul float %59, %495
  %506 = fmul float %60, %496
  %507 = fadd float %505, %506
  %508 = fmul float %61, %497
  %509 = fadd float %507, %508
  %510 = fmul float %62, 1.000000e+00
  %511 = fadd float %509, %510
  %512 = fsub float -0.000000e+00, %49
  %513 = fadd float %495, %512
  %514 = fsub float -0.000000e+00, %50
  %515 = fadd float %496, %514
  %516 = fsub float -0.000000e+00, %51
  %517 = fadd float %497, %516
  %518 = fmul float %451, %52
  %519 = fadd float %518, %53
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %513, float %515, float %517, float %519)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %504, float %511, float %477, float %478)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %479, float %478, float %479, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %449, float %450, float %451, float %452)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840708
bf8c000f
e00c2000
80021100
bf8c0770
7e021111
d2d6000d
02010701
34041a84
4a0204ff
00000100
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020400
bf8c0770
10020901
7e061112
d2d6000e
02010703
34121c84
4a0612ff
00000100
e0301000
80000303
bf8c0770
d2820001
04060b03
7e061113
d2d60010
02010703
34142084
4a0614ff
00000100
e0301000
80000303
bf8c0770
d2820001
04060d03
7e061114
d2d60011
02010703
34162284
4a0616ff
00000100
e0301000
80000303
bf8c0770
d2820001
04060f03
4a0604ff
00000104
e0301000
80000303
bf8c0770
10060903
4a1012ff
00000104
e0301000
80000808
bf8c0770
d2820003
040e0b08
4a1014ff
00000104
e0301000
80000808
bf8c0770
d2820003
040e0d08
4a1016ff
00000104
e0301000
80000808
bf8c0770
d2820003
040e0f08
c0840700
bf8c007f
e00c2000
80021500
c2020129
c2028125
bf8c0070
7e100205
d2820012
04200916
10100712
c2020128
c2028124
bf8c007f
7e180205
d2820014
04300915
d282000c
04220314
4a1004ff
00000108
e0301000
80000808
bf8c0770
10100908
4a1e12ff
00000108
e0301000
80000f0f
bf8c0770
d2820008
04220b0f
4a1e14ff
00000108
e0301000
80000f0f
bf8c0770
d2820008
04220d0f
4a1e16ff
00000108
e0301000
80000f0f
bf8c0770
d2820008
04220f0f
c202012a
c2028126
bf8c007f
7e1e0205
d2820015
043c0917
d282000c
04321115
4a0404ff
0000010c
e0301000
80000202
bf8c0770
10040902
4a1212ff
0000010c
e0301000
80000909
bf8c0770
d2820002
040a0b09
4a1214ff
0000010c
e0301000
80000909
bf8c0770
d2820002
040a0d09
4a1216ff
0000010c
e0301000
80000909
bf8c0770
d2820009
040a0f09
0604130c
c2020112
bf8c007f
102c0404
4a141a81
34141484
4a1614ff
00000100
e0301000
80000b0b
bf8c0770
1016090b
4a181c81
34261884
4a1826ff
00000100
e0301000
80000c0c
bf8c0770
d282000b
042e0b0c
4a182081
342e1884
4a182eff
00000100
e0301000
80000c0c
bf8c0770
d282000b
042e0d0c
4a182281
34301884
4a1830ff
00000100
e0301000
80000c0c
bf8c0770
d282000b
042e0f0c
4a1814ff
00000104
e0301000
80000c0c
bf8c0770
1018090c
4a1e26ff
00000104
e0301000
80000f0f
bf8c0770
d282000c
04320b0f
4a1e2eff
00000104
e0301000
80000f0f
bf8c0770
d282000c
04320d0f
4a1e30ff
00000104
e0301000
80000f0f
bf8c0770
d282000c
04320f0f
101e1912
d2820019
043e1714
4a1e14ff
00000108
e0301000
80000f0f
bf8c0770
101e090f
4a3426ff
00000108
e0301000
80001a1a
bf8c0770
d282000f
043e0b1a
4a342eff
00000108
e0301000
80001a1a
bf8c0770
d282000f
043e0d1a
4a3430ff
00000108
e0301000
80001a1a
bf8c0770
d282000f
043e0f1a
d2820019
04661f15
4a1414ff
0000010c
e0301000
80000a0a
bf8c0770
1014090a
4a2626ff
0000010c
e0301000
80001313
bf8c0770
d282000a
042a0b13
4a262eff
0000010c
e0301000
80001313
bf8c0770
d282000a
042a0d13
4a2630ff
0000010c
e0301000
80001313
bf8c0770
d2820013
042a0f13
06142719
c2020116
bf8c007f
d2820016
045a1404
4a1a1a82
342e1a84
4a1a2eff
00000100
e0301000
80000d0d
bf8c0770
101a090d
4a1c1c82
34301c84
4a1c30ff
00000100
e0301000
80000e0e
bf8c0770
d282000d
04360b0e
4a1c2082
34321c84
4a1c32ff
00000100
e0301000
80000e0e
bf8c0770
d282000d
04360d0e
4a1c2282
34221c84
4a1c22ff
00000100
e0301000
80000e0e
bf8c0770
d282000d
04360f0e
4a1c2eff
00000104
e0301000
80000e0e
bf8c0770
101c090e
4a2030ff
00000104
e0301000
80001010
bf8c0770
d282000e
043a0b10
4a2032ff
00000104
e0301000
80001010
bf8c0770
d282000e
043a0d10
4a2022ff
00000104
e0301000
80001010
bf8c0770
d282000e
043a0f10
10201d12
d2820012
04421b14
4a202eff
00000108
e0301000
80001010
bf8c0770
10200910
4a2830ff
00000108
e0301000
80001414
bf8c0770
d2820010
04420b14
4a2832ff
00000108
e0301000
80001414
bf8c0770
d2820010
04420d14
4a2822ff
00000108
e0301000
80001414
bf8c0770
d2820010
04420f14
d2820012
044a2115
4a282eff
0000010c
e0301000
80001414
bf8c0770
10280914
4a2a30ff
0000010c
e0301000
80001515
bf8c0770
d2820014
04520b15
4a2a32ff
0000010c
e0301000
80001515
bf8c0770
d2820014
04520d15
4a2222ff
0000010c
e0301000
80001111
bf8c0770
d2820006
04520f11
06080d12
c202011a
bf8c007f
d2820005
045a0804
c202011e
bf8c007f
060a0a04
c2020130
c2028131
bf8c007f
7e0e0205
d2820007
041c0905
c2020102
bf8c007f
10220404
c2028106
bf8c007f
d2820011
04461405
c204010a
bf8c007f
d2820011
04460808
c204810e
bf8c007f
06222209
c204812e
bf8c007f
0a282209
c2070101
bf8c007f
1024040e
c2050105
bf8c007f
d2820012
044a140a
c2048109
bf8c007f
d2820012
044a0809
c205810d
bf8c007f
0624240b
c205812d
bf8c007f
0a2c240b
c2058100
bf8c007f
102a040b
c2060104
bf8c007f
d2820015
0456140c
c2068108
bf8c007f
d2820015
0456080d
c207810c
bf8c007f
062a2a0f
c207812c
bf8c007f
0a2e2a0f
f800021f
07141617
c207813d
bf8c000f
100e240f
c207813c
bf8c007f
d2820007
041e2a0f
c207813e
bf8c007f
d2820007
041e220f
c207813f
bf8c007f
060e0e0f
c2078139
bf8c007f
1024240f
c2078138
bf8c007f
d2820012
044a2a0f
c207813a
bf8c007f
d2820011
044a220f
c207813b
bf8c007f
0622220f
c088070c
bf8c007f
e00c2000
80041400
bf8c0770
10000715
d2820000
04020314
d2820000
04021116
06001300
c2030137
bf8c007f
10000006
1006000e
10021915
d2820001
04061714
d2820001
04061f16
06022701
10020206
d2820003
040e020a
10101d15
d2820008
04221b14
d2820008
04222116
060c0d08
100c0c06
d2820003
040e0c09
1010000b
d2820008
0422020c
d2820008
04220c0d
10121108
d2820009
04260703
10000004
d2820000
04020205
d2820000
04020c08
d2820001
04260100
7e0c5b01
10020d03
10060d08
f800022f
01030711
10000d00
bf8c070f
7e0602f2
f800023f
03000100
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020804
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10020404
c2020115
bf8c007f
d2820001
04061404
c2020119
bf8c007f
d2820001
04060804
c202011d
bf8c007f
06020204
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820002
040a1404
c2020118
bf8c007f
d2820002
040a0804
c200011c
bf8c007f
06040400
f80008cf
00050102
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xyz, IMM[0].xxxx
  1: MOV TEMP[1].xy, IN[0].xyyy
  2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D
  3: MOV TEMP[0].w, TEMP[1].wwww
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 3
  %32 = fcmp ugt float %31, 0x3FD99999A0000000
  %33 = sext i1 %32 to i32
  %34 = trunc i32 %33 to i1
  %35 = select i1 %34, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %35)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float %31)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kill(float)

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800800
00010002
7e0202ff
3ecccccd
bf8c0770
d0080000
02020300
d2000001
0001e4f3
7c260280
7e0202f2
f800180f
00010101
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..198]
DCL TEMP[0..6], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[6].xyzz, CONST[5].xyzz
  3: UMUL TEMP[2].x, IMM[1].xxxx, TEMP[0].wwww
  4: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].zzzz
  5: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].yyyy
  6: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].xxxx
  7: UARL ADDR[0].x, TEMP[5].xxxx
  8: MUL TEMP[5], CONST[ADDR[0].x+7], IN[1].xxxx
  9: UARL ADDR[0].x, TEMP[4].xxxx
 10: MAD TEMP[4], CONST[ADDR[0].x+7], IN[1].yyyy, TEMP[5]
 11: UARL ADDR[0].x, TEMP[3].xxxx
 12: MAD TEMP[3], CONST[ADDR[0].x+7], IN[1].zzzz, TEMP[4]
 13: UARL ADDR[0].x, TEMP[2].xxxx
 14: MAD TEMP[2], CONST[ADDR[0].x+7], IN[1].wwww, TEMP[3]
 15: DP4 TEMP[2].x, TEMP[1], TEMP[2]
 16: UMAD TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 17: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 18: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 19: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 20: UARL ADDR[0].x, TEMP[6].xxxx
 21: MUL TEMP[6], CONST[ADDR[0].x+7], IN[1].xxxx
 22: UARL ADDR[0].x, TEMP[5].xxxx
 23: MAD TEMP[5], CONST[ADDR[0].x+7], IN[1].yyyy, TEMP[6]
 24: UARL ADDR[0].x, TEMP[4].xxxx
 25: MAD TEMP[4], CONST[ADDR[0].x+7], IN[1].zzzz, TEMP[5]
 26: UARL ADDR[0].x, TEMP[3].xxxx
 27: MAD TEMP[3], CONST[ADDR[0].x+7], IN[1].wwww, TEMP[4]
 28: DP4 TEMP[3].x, TEMP[1], TEMP[3]
 29: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 30: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 31: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 32: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 33: UARL ADDR[0].x, TEMP[0].xxxx
 34: MUL TEMP[0], CONST[ADDR[0].x+7], IN[1].xxxx
 35: UARL ADDR[0].x, TEMP[6].xxxx
 36: MAD TEMP[0], CONST[ADDR[0].x+7], IN[1].yyyy, TEMP[0]
 37: UARL ADDR[0].x, TEMP[5].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+7], IN[1].zzzz, TEMP[0]
 39: UARL ADDR[0].x, TEMP[4].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+7], IN[1].wwww, TEMP[0]
 41: DP4 TEMP[0].x, TEMP[1], TEMP[0]
 42: MUL TEMP[1], CONST[0], TEMP[2].xxxx
 43: MAD TEMP[1], CONST[1], TEMP[3].xxxx, TEMP[1]
 44: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[1]
 45: ADD TEMP[0], TEMP[0], CONST[3]
 46: MAD TEMP[1].xy, IN[3].xyyy, CONST[4].xyyy, CONST[4].zwww
 47: MOV OUT[1], TEMP[1]
 48: MOV OUT[0], TEMP[0]
 49: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %37 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0
  %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %5)
  %40 = extractelement <4 x float> %39, i32 0
  %41 = extractelement <4 x float> %39, i32 1
  %42 = extractelement <4 x float> %39, i32 2
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = extractelement <4 x float> %45, i32 3
  %50 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %5)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = extractelement <4 x float> %52, i32 1
  %55 = extractelement <4 x float> %52, i32 2
  %56 = extractelement <4 x float> %52, i32 3
  %57 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0
  %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %5)
  %60 = extractelement <4 x float> %59, i32 0
  %61 = extractelement <4 x float> %59, i32 1
  %62 = fptosi float %53 to i32
  %63 = fptosi float %54 to i32
  %64 = fptosi float %55 to i32
  %65 = fptosi float %56 to i32
  %66 = bitcast i32 %62 to float
  %67 = bitcast i32 %63 to float
  %68 = bitcast i32 %64 to float
  %69 = bitcast i32 %65 to float
  %70 = fmul float %40, %34
  %71 = fadd float %70, %31
  %72 = fmul float %41, %35
  %73 = fadd float %72, %32
  %74 = fmul float %42, %36
  %75 = fadd float %74, %33
  %76 = bitcast float %69 to i32
  %77 = mul i32 3, %76
  %78 = bitcast i32 %77 to float
  %79 = bitcast float %68 to i32
  %80 = mul i32 3, %79
  %81 = bitcast i32 %80 to float
  %82 = bitcast float %67 to i32
  %83 = mul i32 3, %82
  %84 = bitcast i32 %83 to float
  %85 = bitcast float %66 to i32
  %86 = mul i32 3, %85
  %87 = bitcast i32 %86 to float
  %88 = bitcast float %87 to i32
  %89 = shl i32 %88, 4
  %90 = add i32 %89, 112
  %91 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %90)
  %92 = fmul float %91, %46
  %93 = shl i32 %88, 4
  %94 = add i32 %93, 116
  %95 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %94)
  %96 = fmul float %95, %46
  %97 = shl i32 %88, 4
  %98 = add i32 %97, 120
  %99 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %98)
  %100 = fmul float %99, %46
  %101 = shl i32 %88, 4
  %102 = add i32 %101, 124
  %103 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %102)
  %104 = fmul float %103, %46
  %105 = bitcast float %84 to i32
  %106 = shl i32 %105, 4
  %107 = add i32 %106, 112
  %108 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %107)
  %109 = fmul float %108, %47
  %110 = fadd float %109, %92
  %111 = shl i32 %105, 4
  %112 = add i32 %111, 116
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %47
  %115 = fadd float %114, %96
  %116 = shl i32 %105, 4
  %117 = add i32 %116, 120
  %118 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %117)
  %119 = fmul float %118, %47
  %120 = fadd float %119, %100
  %121 = shl i32 %105, 4
  %122 = add i32 %121, 124
  %123 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %122)
  %124 = fmul float %123, %47
  %125 = fadd float %124, %104
  %126 = bitcast float %81 to i32
  %127 = shl i32 %126, 4
  %128 = add i32 %127, 112
  %129 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %128)
  %130 = fmul float %129, %48
  %131 = fadd float %130, %110
  %132 = shl i32 %126, 4
  %133 = add i32 %132, 116
  %134 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %133)
  %135 = fmul float %134, %48
  %136 = fadd float %135, %115
  %137 = shl i32 %126, 4
  %138 = add i32 %137, 120
  %139 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %138)
  %140 = fmul float %139, %48
  %141 = fadd float %140, %120
  %142 = shl i32 %126, 4
  %143 = add i32 %142, 124
  %144 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %143)
  %145 = fmul float %144, %48
  %146 = fadd float %145, %125
  %147 = bitcast float %78 to i32
  %148 = shl i32 %147, 4
  %149 = add i32 %148, 112
  %150 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %149)
  %151 = fmul float %150, %49
  %152 = fadd float %151, %131
  %153 = shl i32 %147, 4
  %154 = add i32 %153, 116
  %155 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %154)
  %156 = fmul float %155, %49
  %157 = fadd float %156, %136
  %158 = shl i32 %147, 4
  %159 = add i32 %158, 120
  %160 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %159)
  %161 = fmul float %160, %49
  %162 = fadd float %161, %141
  %163 = shl i32 %147, 4
  %164 = add i32 %163, 124
  %165 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %164)
  %166 = fmul float %165, %49
  %167 = fadd float %166, %146
  %168 = fmul float %71, %152
  %169 = fmul float %73, %157
  %170 = fadd float %168, %169
  %171 = fmul float %75, %162
  %172 = fadd float %170, %171
  %173 = fmul float 1.000000e+00, %167
  %174 = fadd float %172, %173
  %175 = bitcast float %69 to i32
  %176 = mul i32 3, %175
  %177 = add i32 %176, 1
  %178 = bitcast i32 %177 to float
  %179 = bitcast float %68 to i32
  %180 = mul i32 3, %179
  %181 = add i32 %180, 1
  %182 = bitcast i32 %181 to float
  %183 = bitcast float %67 to i32
  %184 = mul i32 3, %183
  %185 = add i32 %184, 1
  %186 = bitcast i32 %185 to float
  %187 = bitcast float %66 to i32
  %188 = mul i32 3, %187
  %189 = add i32 %188, 1
  %190 = bitcast i32 %189 to float
  %191 = bitcast float %190 to i32
  %192 = shl i32 %191, 4
  %193 = add i32 %192, 112
  %194 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %193)
  %195 = fmul float %194, %46
  %196 = shl i32 %191, 4
  %197 = add i32 %196, 116
  %198 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %197)
  %199 = fmul float %198, %46
  %200 = shl i32 %191, 4
  %201 = add i32 %200, 120
  %202 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %201)
  %203 = fmul float %202, %46
  %204 = shl i32 %191, 4
  %205 = add i32 %204, 124
  %206 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %205)
  %207 = fmul float %206, %46
  %208 = bitcast float %186 to i32
  %209 = shl i32 %208, 4
  %210 = add i32 %209, 112
  %211 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %210)
  %212 = fmul float %211, %47
  %213 = fadd float %212, %195
  %214 = shl i32 %208, 4
  %215 = add i32 %214, 116
  %216 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %215)
  %217 = fmul float %216, %47
  %218 = fadd float %217, %199
  %219 = shl i32 %208, 4
  %220 = add i32 %219, 120
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %47
  %223 = fadd float %222, %203
  %224 = shl i32 %208, 4
  %225 = add i32 %224, 124
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %47
  %228 = fadd float %227, %207
  %229 = bitcast float %182 to i32
  %230 = shl i32 %229, 4
  %231 = add i32 %230, 112
  %232 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %231)
  %233 = fmul float %232, %48
  %234 = fadd float %233, %213
  %235 = shl i32 %229, 4
  %236 = add i32 %235, 116
  %237 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %236)
  %238 = fmul float %237, %48
  %239 = fadd float %238, %218
  %240 = shl i32 %229, 4
  %241 = add i32 %240, 120
  %242 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %241)
  %243 = fmul float %242, %48
  %244 = fadd float %243, %223
  %245 = shl i32 %229, 4
  %246 = add i32 %245, 124
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %48
  %249 = fadd float %248, %228
  %250 = bitcast float %178 to i32
  %251 = shl i32 %250, 4
  %252 = add i32 %251, 112
  %253 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %252)
  %254 = fmul float %253, %49
  %255 = fadd float %254, %234
  %256 = shl i32 %250, 4
  %257 = add i32 %256, 116
  %258 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %257)
  %259 = fmul float %258, %49
  %260 = fadd float %259, %239
  %261 = shl i32 %250, 4
  %262 = add i32 %261, 120
  %263 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %262)
  %264 = fmul float %263, %49
  %265 = fadd float %264, %244
  %266 = shl i32 %250, 4
  %267 = add i32 %266, 124
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %49
  %270 = fadd float %269, %249
  %271 = fmul float %71, %255
  %272 = fmul float %73, %260
  %273 = fadd float %271, %272
  %274 = fmul float %75, %265
  %275 = fadd float %273, %274
  %276 = fmul float 1.000000e+00, %270
  %277 = fadd float %275, %276
  %278 = bitcast float %69 to i32
  %279 = mul i32 3, %278
  %280 = add i32 %279, 2
  %281 = bitcast i32 %280 to float
  %282 = bitcast float %68 to i32
  %283 = mul i32 3, %282
  %284 = add i32 %283, 2
  %285 = bitcast i32 %284 to float
  %286 = bitcast float %67 to i32
  %287 = mul i32 3, %286
  %288 = add i32 %287, 2
  %289 = bitcast i32 %288 to float
  %290 = bitcast float %66 to i32
  %291 = mul i32 3, %290
  %292 = add i32 %291, 2
  %293 = bitcast i32 %292 to float
  %294 = bitcast float %293 to i32
  %295 = shl i32 %294, 4
  %296 = add i32 %295, 112
  %297 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %296)
  %298 = fmul float %297, %46
  %299 = shl i32 %294, 4
  %300 = add i32 %299, 116
  %301 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %300)
  %302 = fmul float %301, %46
  %303 = shl i32 %294, 4
  %304 = add i32 %303, 120
  %305 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %304)
  %306 = fmul float %305, %46
  %307 = shl i32 %294, 4
  %308 = add i32 %307, 124
  %309 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %308)
  %310 = fmul float %309, %46
  %311 = bitcast float %289 to i32
  %312 = shl i32 %311, 4
  %313 = add i32 %312, 112
  %314 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %313)
  %315 = fmul float %314, %47
  %316 = fadd float %315, %298
  %317 = shl i32 %311, 4
  %318 = add i32 %317, 116
  %319 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %318)
  %320 = fmul float %319, %47
  %321 = fadd float %320, %302
  %322 = shl i32 %311, 4
  %323 = add i32 %322, 120
  %324 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %323)
  %325 = fmul float %324, %47
  %326 = fadd float %325, %306
  %327 = shl i32 %311, 4
  %328 = add i32 %327, 124
  %329 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %328)
  %330 = fmul float %329, %47
  %331 = fadd float %330, %310
  %332 = bitcast float %285 to i32
  %333 = shl i32 %332, 4
  %334 = add i32 %333, 112
  %335 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %334)
  %336 = fmul float %335, %48
  %337 = fadd float %336, %316
  %338 = shl i32 %332, 4
  %339 = add i32 %338, 116
  %340 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %339)
  %341 = fmul float %340, %48
  %342 = fadd float %341, %321
  %343 = shl i32 %332, 4
  %344 = add i32 %343, 120
  %345 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %344)
  %346 = fmul float %345, %48
  %347 = fadd float %346, %326
  %348 = shl i32 %332, 4
  %349 = add i32 %348, 124
  %350 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %349)
  %351 = fmul float %350, %48
  %352 = fadd float %351, %331
  %353 = bitcast float %281 to i32
  %354 = shl i32 %353, 4
  %355 = add i32 %354, 112
  %356 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %355)
  %357 = fmul float %356, %49
  %358 = fadd float %357, %337
  %359 = shl i32 %353, 4
  %360 = add i32 %359, 116
  %361 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %360)
  %362 = fmul float %361, %49
  %363 = fadd float %362, %342
  %364 = shl i32 %353, 4
  %365 = add i32 %364, 120
  %366 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %365)
  %367 = fmul float %366, %49
  %368 = fadd float %367, %347
  %369 = shl i32 %353, 4
  %370 = add i32 %369, 124
  %371 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %370)
  %372 = fmul float %371, %49
  %373 = fadd float %372, %352
  %374 = fmul float %71, %358
  %375 = fmul float %73, %363
  %376 = fadd float %374, %375
  %377 = fmul float %75, %368
  %378 = fadd float %376, %377
  %379 = fmul float 1.000000e+00, %373
  %380 = fadd float %378, %379
  %381 = fmul float %11, %174
  %382 = fmul float %12, %174
  %383 = fmul float %13, %174
  %384 = fmul float %14, %174
  %385 = fmul float %15, %277
  %386 = fadd float %385, %381
  %387 = fmul float %16, %277
  %388 = fadd float %387, %382
  %389 = fmul float %17, %277
  %390 = fadd float %389, %383
  %391 = fmul float %18, %277
  %392 = fadd float %391, %384
  %393 = fmul float %19, %380
  %394 = fadd float %393, %386
  %395 = fmul float %20, %380
  %396 = fadd float %395, %388
  %397 = fmul float %21, %380
  %398 = fadd float %397, %390
  %399 = fmul float %22, %380
  %400 = fadd float %399, %392
  %401 = fadd float %394, %23
  %402 = fadd float %396, %24
  %403 = fadd float %398, %25
  %404 = fadd float %400, %26
  %405 = fmul float %60, %27
  %406 = fadd float %405, %29
  %407 = fmul float %61, %28
  %408 = fadd float %407, %30
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %406, float %408, float %390, float %392)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %401, float %402, float %403, float %404)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020f00
bf8c0770
7e02110f
d2d60005
02010701
34120a84
4a0212ff
00000070
c0800100
bf8c007f
e0301000
80000601
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
100e0306
7e0c1110
d2d60006
02010706
341a0c84
4a101aff
00000070
e0301000
80000808
bf8c0770
d2820008
041e0508
7e0e1111
d2d60007
02010707
341c0e84
4a141cff
00000070
e0301000
80000a0a
bf8c0770
d282000a
0422070a
7e101112
d2d60008
02010708
341e1084
4a161eff
00000070
e0301000
80000b0b
bf8c0770
d282000c
042a090b
4a1412ff
00000074
e0301000
80000a0a
bf8c0770
1014030a
4a161aff
00000074
e0301000
80000b0b
bf8c0770
d282000a
042a050b
4a161cff
00000074
e0301000
80000b0b
bf8c0770
d282000a
042a070b
4a161eff
00000074
e0301000
80000b0b
bf8c0770
d282000b
042a090b
c0840700
bf8c007f
e00c2000
80021000
c2020119
c2028115
bf8c0070
7e140205
d282000a
04280911
1028170a
c2020118
c2028114
bf8c007f
7e160205
d282000b
042c0910
d2820014
0452190b
4a1812ff
00000078
e0301000
80000c0c
bf8c0770
1018030c
4a2a1aff
00000078
e0301000
80001515
bf8c0770
d282000c
04320515
4a2a1cff
00000078
e0301000
80001515
bf8c0770
d282000c
04320715
4a2a1eff
00000078
e0301000
80001515
bf8c0770
d2820015
04320915
c202011a
c2028116
bf8c007f
7e180205
d282000c
04300912
d2820010
04522b0c
4a1212ff
0000007c
e0301000
80000909
bf8c0770
10120309
4a1a1aff
0000007c
e0301000
80000d0d
bf8c0770
d2820009
0426050d
4a1a1cff
0000007c
e0301000
80000d0d
bf8c0770
d2820009
0426070d
4a1a1eff
0000007c
e0301000
80000d0d
bf8c0770
d2820009
0426090d
06121310
c2020103
bf8c007f
101c1204
4a1a0a81
341a1a84
4a1e1aff
00000070
e0301000
80000f0f
bf8c0770
1020030f
4a1e0c81
341e1e84
4a221eff
00000070
e0301000
80001111
bf8c0770
d2820011
04420511
4a200e81
34202084
4a2420ff
00000070
e0301000
80001212
bf8c0770
d2820012
04460712
4a221081
34222284
4a2622ff
00000070
e0301000
80001313
bf8c0770
d2820012
044a0913
4a261aff
00000074
e0301000
80001313
bf8c0770
10260313
4a281eff
00000074
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
00000074
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
00000074
e0301000
80001414
bf8c0770
d2820013
044e0914
1026270a
d2820012
044e250b
4a261aff
00000078
e0301000
80001313
bf8c0770
10260313
4a281eff
00000078
e0301000
80001414
bf8c0770
d2820013
044e0514
4a2820ff
00000078
e0301000
80001414
bf8c0770
d2820013
044e0714
4a2822ff
00000078
e0301000
80001414
bf8c0770
d2820013
044e0914
d2820012
044a270c
4a1a1aff
0000007c
e0301000
80000d0d
bf8c0770
101a030d
4a1e1eff
0000007c
e0301000
80000f0f
bf8c0770
d282000d
0436050f
4a1e20ff
0000007c
e0301000
80000f0f
bf8c0770
d282000d
0436070f
4a1e22ff
0000007c
e0301000
80000f0f
bf8c0770
d282000d
0436090f
061a1b12
c2020107
bf8c007f
d282000e
043a1a04
c2020102
bf8c007f
101e1204
c2020106
bf8c007f
d282000f
043e1a04
c082070c
bf8c007f
e00c2000
80011000
c2020111
c2028113
bf8c0070
7e000205
d2820000
04000911
c2020110
c2028112
bf8c007f
7e280205
d2820010
04500910
f800020f
0e0f0010
bf8c070f
4a000a82
34000084
4a0a00ff
00000070
e0301000
80000505
bf8c0770
10200305
4a0a0c82
340a0a84
4a0c0aff
00000070
e0301000
80000606
bf8c0770
d2820010
04420506
4a0c0e82
340c0c84
4a0e0cff
00000070
e0301000
80000707
bf8c0770
d2820010
04420707
4a0e1082
340e0e84
4a100eff
00000070
e0301000
80000808
bf8c0770
d2820008
04420908
4a2000ff
00000074
e0301000
80001010
bf8c0770
10200310
4a220aff
00000074
e0301000
80001111
bf8c0770
d2820010
04420511
4a220cff
00000074
e0301000
80001111
bf8c0770
d2820010
04420711
4a220eff
00000074
e0301000
80001111
bf8c0770
d2820010
04420911
1014210a
d2820008
042a110b
4a1400ff
00000078
e0301000
80000a0a
bf8c0770
1014030a
4a160aff
00000078
e0301000
80000b0b
bf8c0770
d282000a
042a050b
4a160cff
00000078
e0301000
80000b0b
bf8c0770
d282000a
042a070b
4a160eff
00000078
e0301000
80000b0b
bf8c0770
d282000a
042a090b
d2820008
0422150c
4a0000ff
0000007c
e0301000
80000000
bf8c0770
10000300
4a0a0aff
0000007c
e0301000
80000505
bf8c0770
d2820000
04020505
4a0a0cff
0000007c
e0301000
80000505
bf8c0770
d2820000
04020705
4a0a0eff
0000007c
e0301000
80000505
bf8c0770
d2820000
04020905
06000108
c202010b
bf8c007f
d2820001
043a0004
c202010f
bf8c007f
06020204
c202010a
bf8c007f
d2820002
043e0004
c202010e
bf8c007f
06040404
c2020101
bf8c007f
10061204
c2020105
bf8c007f
d2820003
040e1a04
c2020109
bf8c007f
d2820003
040e0004
c202010d
bf8c007f
06060604
c2020100
bf8c007f
10081204
c2020104
bf8c007f
d2820004
04121a04
c2020108
bf8c007f
d2820000
04120004
c200010c
bf8c007f
06000000
f80008cf
01020300
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xyz, IMM[0].xxxx
  1: MOV TEMP[1].xy, IN[0].xyyy
  2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D
  3: MOV TEMP[0].w, TEMP[1].wwww
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 3
  %32 = fcmp ugt float %31, 0x3F80101020000000
  %33 = sext i1 %32 to i32
  %34 = trunc i32 %33 to i1
  %35 = select i1 %34, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %35)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float %31)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kill(float)

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800800
00010002
7e0202ff
3c008081
bf8c0770
d0080000
02020300
d2000001
0001e4f3
7c260280
7e0202f2
f800180f
00010101
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0].xyz, IMM[0].xxxx
  1: MOV TEMP[1].xy, IN[0].xyyy
  2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D
  3: MOV TEMP[0].w, TEMP[1].wwww
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %21 = load <32 x i8> addrspace(2)* %20, !tbaa !0
  %22 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0
  %24 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %25 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %26 = bitcast float %24 to i32
  %27 = bitcast float %25 to i32
  %28 = insertelement <2 x i32> undef, i32 %26, i32 0
  %29 = insertelement <2 x i32> %28, i32 %27, i32 1
  %30 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %29, <32 x i8> %21, <16 x i8> %23, i32 2)
  %31 = extractelement <4 x float> %30, i32 3
  %32 = fcmp ugt float %31, 0x3FDADADAE0000000
  %33 = sext i1 %32 to i32
  %34 = trunc i32 %33 to i1
  %35 = select i1 %34, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %35)
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float %31)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kill(float)

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0100
c80d0101
c8080000
c8090001
c0800300
c0c20500
bf8c007f
f0800800
00010002
7e0202ff
3ed6d6d7
bf8c0770
d0080000
02020300
d2000001
0001e4f3
7c260280
7e0202f2
f800180f
00010101
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MOV OUT[0], IMM[0].xxxx
  1: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00)
  ret void
}

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
SI CODE:
7e0002f2
f800180f
00000000
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL CONST[0..197]
DCL TEMP[0..6], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[5].xyzz, CONST[4].xyzz
  3: UMUL TEMP[2].x, IMM[1].xxxx, TEMP[0].wwww
  4: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].zzzz
  5: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].yyyy
  6: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].xxxx
  7: UARL ADDR[0].x, TEMP[5].xxxx
  8: MUL TEMP[5], CONST[ADDR[0].x+6], IN[1].xxxx
  9: UARL ADDR[0].x, TEMP[4].xxxx
 10: MAD TEMP[4], CONST[ADDR[0].x+6], IN[1].yyyy, TEMP[5]
 11: UARL ADDR[0].x, TEMP[3].xxxx
 12: MAD TEMP[3], CONST[ADDR[0].x+6], IN[1].zzzz, TEMP[4]
 13: UARL ADDR[0].x, TEMP[2].xxxx
 14: MAD TEMP[2], CONST[ADDR[0].x+6], IN[1].wwww, TEMP[3]
 15: DP4 TEMP[2].x, TEMP[1], TEMP[2]
 16: UMAD TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 17: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 18: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 19: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 20: UARL ADDR[0].x, TEMP[6].xxxx
 21: MUL TEMP[6], CONST[ADDR[0].x+6], IN[1].xxxx
 22: UARL ADDR[0].x, TEMP[5].xxxx
 23: MAD TEMP[5], CONST[ADDR[0].x+6], IN[1].yyyy, TEMP[6]
 24: UARL ADDR[0].x, TEMP[4].xxxx
 25: MAD TEMP[4], CONST[ADDR[0].x+6], IN[1].zzzz, TEMP[5]
 26: UARL ADDR[0].x, TEMP[3].xxxx
 27: MAD TEMP[3], CONST[ADDR[0].x+6], IN[1].wwww, TEMP[4]
 28: DP4 TEMP[3].x, TEMP[1], TEMP[3]
 29: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 30: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 31: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 32: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 33: UARL ADDR[0].x, TEMP[0].xxxx
 34: MUL TEMP[0], CONST[ADDR[0].x+6], IN[1].xxxx
 35: UARL ADDR[0].x, TEMP[6].xxxx
 36: MAD TEMP[0], CONST[ADDR[0].x+6], IN[1].yyyy, TEMP[0]
 37: UARL ADDR[0].x, TEMP[5].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+6], IN[1].zzzz, TEMP[0]
 39: UARL ADDR[0].x, TEMP[4].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+6], IN[1].wwww, TEMP[0]
 41: DP4 TEMP[0].x, TEMP[1], TEMP[0]
 42: MUL TEMP[1], CONST[0], TEMP[2].xxxx
 43: MAD TEMP[1], CONST[1], TEMP[3].xxxx, TEMP[1]
 44: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[1]
 45: ADD TEMP[0], TEMP[0], CONST[3]
 46: MOV OUT[0], TEMP[0]
 47: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %33 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %34 = load <16 x i8> addrspace(2)* %33, !tbaa !0
  %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %5)
  %36 = extractelement <4 x float> %35, i32 0
  %37 = extractelement <4 x float> %35, i32 1
  %38 = extractelement <4 x float> %35, i32 2
  %39 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0
  %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %5)
  %42 = extractelement <4 x float> %41, i32 0
  %43 = extractelement <4 x float> %41, i32 1
  %44 = extractelement <4 x float> %41, i32 2
  %45 = extractelement <4 x float> %41, i32 3
  %46 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %5)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = extractelement <4 x float> %48, i32 1
  %51 = extractelement <4 x float> %48, i32 2
  %52 = extractelement <4 x float> %48, i32 3
  %53 = fptosi float %49 to i32
  %54 = fptosi float %50 to i32
  %55 = fptosi float %51 to i32
  %56 = fptosi float %52 to i32
  %57 = bitcast i32 %53 to float
  %58 = bitcast i32 %54 to float
  %59 = bitcast i32 %55 to float
  %60 = bitcast i32 %56 to float
  %61 = fmul float %36, %30
  %62 = fadd float %61, %27
  %63 = fmul float %37, %31
  %64 = fadd float %63, %28
  %65 = fmul float %38, %32
  %66 = fadd float %65, %29
  %67 = bitcast float %60 to i32
  %68 = mul i32 3, %67
  %69 = bitcast i32 %68 to float
  %70 = bitcast float %59 to i32
  %71 = mul i32 3, %70
  %72 = bitcast i32 %71 to float
  %73 = bitcast float %58 to i32
  %74 = mul i32 3, %73
  %75 = bitcast i32 %74 to float
  %76 = bitcast float %57 to i32
  %77 = mul i32 3, %76
  %78 = bitcast i32 %77 to float
  %79 = bitcast float %78 to i32
  %80 = shl i32 %79, 4
  %81 = add i32 %80, 96
  %82 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %81)
  %83 = fmul float %82, %42
  %84 = shl i32 %79, 4
  %85 = add i32 %84, 100
  %86 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %85)
  %87 = fmul float %86, %42
  %88 = shl i32 %79, 4
  %89 = add i32 %88, 104
  %90 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %89)
  %91 = fmul float %90, %42
  %92 = shl i32 %79, 4
  %93 = add i32 %92, 108
  %94 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %93)
  %95 = fmul float %94, %42
  %96 = bitcast float %75 to i32
  %97 = shl i32 %96, 4
  %98 = add i32 %97, 96
  %99 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %98)
  %100 = fmul float %99, %43
  %101 = fadd float %100, %83
  %102 = shl i32 %96, 4
  %103 = add i32 %102, 100
  %104 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %103)
  %105 = fmul float %104, %43
  %106 = fadd float %105, %87
  %107 = shl i32 %96, 4
  %108 = add i32 %107, 104
  %109 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %108)
  %110 = fmul float %109, %43
  %111 = fadd float %110, %91
  %112 = shl i32 %96, 4
  %113 = add i32 %112, 108
  %114 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %113)
  %115 = fmul float %114, %43
  %116 = fadd float %115, %95
  %117 = bitcast float %72 to i32
  %118 = shl i32 %117, 4
  %119 = add i32 %118, 96
  %120 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %119)
  %121 = fmul float %120, %44
  %122 = fadd float %121, %101
  %123 = shl i32 %117, 4
  %124 = add i32 %123, 100
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %44
  %127 = fadd float %126, %106
  %128 = shl i32 %117, 4
  %129 = add i32 %128, 104
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %44
  %132 = fadd float %131, %111
  %133 = shl i32 %117, 4
  %134 = add i32 %133, 108
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %44
  %137 = fadd float %136, %116
  %138 = bitcast float %69 to i32
  %139 = shl i32 %138, 4
  %140 = add i32 %139, 96
  %141 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %140)
  %142 = fmul float %141, %45
  %143 = fadd float %142, %122
  %144 = shl i32 %138, 4
  %145 = add i32 %144, 100
  %146 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %145)
  %147 = fmul float %146, %45
  %148 = fadd float %147, %127
  %149 = shl i32 %138, 4
  %150 = add i32 %149, 104
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %45
  %153 = fadd float %152, %132
  %154 = shl i32 %138, 4
  %155 = add i32 %154, 108
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %45
  %158 = fadd float %157, %137
  %159 = fmul float %62, %143
  %160 = fmul float %64, %148
  %161 = fadd float %159, %160
  %162 = fmul float %66, %153
  %163 = fadd float %161, %162
  %164 = fmul float 1.000000e+00, %158
  %165 = fadd float %163, %164
  %166 = bitcast float %60 to i32
  %167 = mul i32 3, %166
  %168 = add i32 %167, 1
  %169 = bitcast i32 %168 to float
  %170 = bitcast float %59 to i32
  %171 = mul i32 3, %170
  %172 = add i32 %171, 1
  %173 = bitcast i32 %172 to float
  %174 = bitcast float %58 to i32
  %175 = mul i32 3, %174
  %176 = add i32 %175, 1
  %177 = bitcast i32 %176 to float
  %178 = bitcast float %57 to i32
  %179 = mul i32 3, %178
  %180 = add i32 %179, 1
  %181 = bitcast i32 %180 to float
  %182 = bitcast float %181 to i32
  %183 = shl i32 %182, 4
  %184 = add i32 %183, 96
  %185 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %184)
  %186 = fmul float %185, %42
  %187 = shl i32 %182, 4
  %188 = add i32 %187, 100
  %189 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %188)
  %190 = fmul float %189, %42
  %191 = shl i32 %182, 4
  %192 = add i32 %191, 104
  %193 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %192)
  %194 = fmul float %193, %42
  %195 = shl i32 %182, 4
  %196 = add i32 %195, 108
  %197 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %196)
  %198 = fmul float %197, %42
  %199 = bitcast float %177 to i32
  %200 = shl i32 %199, 4
  %201 = add i32 %200, 96
  %202 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %201)
  %203 = fmul float %202, %43
  %204 = fadd float %203, %186
  %205 = shl i32 %199, 4
  %206 = add i32 %205, 100
  %207 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %206)
  %208 = fmul float %207, %43
  %209 = fadd float %208, %190
  %210 = shl i32 %199, 4
  %211 = add i32 %210, 104
  %212 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %211)
  %213 = fmul float %212, %43
  %214 = fadd float %213, %194
  %215 = shl i32 %199, 4
  %216 = add i32 %215, 108
  %217 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %216)
  %218 = fmul float %217, %43
  %219 = fadd float %218, %198
  %220 = bitcast float %173 to i32
  %221 = shl i32 %220, 4
  %222 = add i32 %221, 96
  %223 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %222)
  %224 = fmul float %223, %44
  %225 = fadd float %224, %204
  %226 = shl i32 %220, 4
  %227 = add i32 %226, 100
  %228 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %227)
  %229 = fmul float %228, %44
  %230 = fadd float %229, %209
  %231 = shl i32 %220, 4
  %232 = add i32 %231, 104
  %233 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %232)
  %234 = fmul float %233, %44
  %235 = fadd float %234, %214
  %236 = shl i32 %220, 4
  %237 = add i32 %236, 108
  %238 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %237)
  %239 = fmul float %238, %44
  %240 = fadd float %239, %219
  %241 = bitcast float %169 to i32
  %242 = shl i32 %241, 4
  %243 = add i32 %242, 96
  %244 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %243)
  %245 = fmul float %244, %45
  %246 = fadd float %245, %225
  %247 = shl i32 %241, 4
  %248 = add i32 %247, 100
  %249 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %248)
  %250 = fmul float %249, %45
  %251 = fadd float %250, %230
  %252 = shl i32 %241, 4
  %253 = add i32 %252, 104
  %254 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %253)
  %255 = fmul float %254, %45
  %256 = fadd float %255, %235
  %257 = shl i32 %241, 4
  %258 = add i32 %257, 108
  %259 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %258)
  %260 = fmul float %259, %45
  %261 = fadd float %260, %240
  %262 = fmul float %62, %246
  %263 = fmul float %64, %251
  %264 = fadd float %262, %263
  %265 = fmul float %66, %256
  %266 = fadd float %264, %265
  %267 = fmul float 1.000000e+00, %261
  %268 = fadd float %266, %267
  %269 = bitcast float %60 to i32
  %270 = mul i32 3, %269
  %271 = add i32 %270, 2
  %272 = bitcast i32 %271 to float
  %273 = bitcast float %59 to i32
  %274 = mul i32 3, %273
  %275 = add i32 %274, 2
  %276 = bitcast i32 %275 to float
  %277 = bitcast float %58 to i32
  %278 = mul i32 3, %277
  %279 = add i32 %278, 2
  %280 = bitcast i32 %279 to float
  %281 = bitcast float %57 to i32
  %282 = mul i32 3, %281
  %283 = add i32 %282, 2
  %284 = bitcast i32 %283 to float
  %285 = bitcast float %284 to i32
  %286 = shl i32 %285, 4
  %287 = add i32 %286, 96
  %288 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %287)
  %289 = fmul float %288, %42
  %290 = shl i32 %285, 4
  %291 = add i32 %290, 100
  %292 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %291)
  %293 = fmul float %292, %42
  %294 = shl i32 %285, 4
  %295 = add i32 %294, 104
  %296 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %295)
  %297 = fmul float %296, %42
  %298 = shl i32 %285, 4
  %299 = add i32 %298, 108
  %300 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %299)
  %301 = fmul float %300, %42
  %302 = bitcast float %280 to i32
  %303 = shl i32 %302, 4
  %304 = add i32 %303, 96
  %305 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %304)
  %306 = fmul float %305, %43
  %307 = fadd float %306, %289
  %308 = shl i32 %302, 4
  %309 = add i32 %308, 100
  %310 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %309)
  %311 = fmul float %310, %43
  %312 = fadd float %311, %293
  %313 = shl i32 %302, 4
  %314 = add i32 %313, 104
  %315 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %314)
  %316 = fmul float %315, %43
  %317 = fadd float %316, %297
  %318 = shl i32 %302, 4
  %319 = add i32 %318, 108
  %320 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %319)
  %321 = fmul float %320, %43
  %322 = fadd float %321, %301
  %323 = bitcast float %276 to i32
  %324 = shl i32 %323, 4
  %325 = add i32 %324, 96
  %326 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %325)
  %327 = fmul float %326, %44
  %328 = fadd float %327, %307
  %329 = shl i32 %323, 4
  %330 = add i32 %329, 100
  %331 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %330)
  %332 = fmul float %331, %44
  %333 = fadd float %332, %312
  %334 = shl i32 %323, 4
  %335 = add i32 %334, 104
  %336 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %335)
  %337 = fmul float %336, %44
  %338 = fadd float %337, %317
  %339 = shl i32 %323, 4
  %340 = add i32 %339, 108
  %341 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %340)
  %342 = fmul float %341, %44
  %343 = fadd float %342, %322
  %344 = bitcast float %272 to i32
  %345 = shl i32 %344, 4
  %346 = add i32 %345, 96
  %347 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %346)
  %348 = fmul float %347, %45
  %349 = fadd float %348, %328
  %350 = shl i32 %344, 4
  %351 = add i32 %350, 100
  %352 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %351)
  %353 = fmul float %352, %45
  %354 = fadd float %353, %333
  %355 = shl i32 %344, 4
  %356 = add i32 %355, 104
  %357 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %356)
  %358 = fmul float %357, %45
  %359 = fadd float %358, %338
  %360 = shl i32 %344, 4
  %361 = add i32 %360, 108
  %362 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %361)
  %363 = fmul float %362, %45
  %364 = fadd float %363, %343
  %365 = fmul float %62, %349
  %366 = fmul float %64, %354
  %367 = fadd float %365, %366
  %368 = fmul float %66, %359
  %369 = fadd float %367, %368
  %370 = fmul float 1.000000e+00, %364
  %371 = fadd float %369, %370
  %372 = fmul float %11, %165
  %373 = fmul float %12, %165
  %374 = fmul float %13, %165
  %375 = fmul float %14, %165
  %376 = fmul float %15, %268
  %377 = fadd float %376, %372
  %378 = fmul float %16, %268
  %379 = fadd float %378, %373
  %380 = fmul float %17, %268
  %381 = fadd float %380, %374
  %382 = fmul float %18, %268
  %383 = fadd float %382, %375
  %384 = fmul float %19, %371
  %385 = fadd float %384, %377
  %386 = fmul float %20, %371
  %387 = fadd float %386, %379
  %388 = fmul float %21, %371
  %389 = fadd float %388, %381
  %390 = fmul float %22, %371
  %391 = fadd float %390, %383
  %392 = fadd float %385, %23
  %393 = fadd float %387, %24
  %394 = fadd float %389, %25
  %395 = fadd float %391, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %392, float %393, float %394, float %395)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020800
bf8c0770
7e021108
d2d60005
02010701
34180a84
4a0218ff
00000060
c0800100
bf8c007f
e0301000
80000601
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
100e0306
7e0c1109
d2d60006
02010706
341a0c84
4a1c1aff
00000060
e0301000
80000e0e
bf8c0770
d282000f
041e050e
7e0e110a
d2d60007
02010707
341c0e84
4a201cff
00000060
e0301000
80001010
bf8c0770
d2820010
043e0710
7e10110b
d2d60008
02010708
341e1084
4a121eff
00000060
e0301000
80000909
bf8c0770
d282000b
04420909
4a1218ff
00000064
e0301000
80000909
bf8c0770
10120309
4a141aff
00000064
e0301000
80000a0a
bf8c0770
d2820009
0426050a
4a141cff
00000064
e0301000
80000a0a
bf8c0770
d2820009
0426070a
4a141eff
00000064
e0301000
80000a0a
bf8c0770
d282000a
0426090a
c0820700
bf8c007f
e00c2000
80011000
c2020115
c2028111
bf8c0070
7e000205
d2820009
04000911
10001509
c2020114
c2028110
bf8c007f
7e140205
d282000a
04280910
d2820000
0402170a
4a1618ff
00000068
e0301000
80000b0b
bf8c0770
1016030b
4a281aff
00000068
e0301000
80001414
bf8c0770
d282000b
042e0514
4a281cff
00000068
e0301000
80001414
bf8c0770
d282000b
042e0714
4a281eff
00000068
e0301000
80001414
bf8c0770
d2820014
042e0914
c2020116
c2028112
bf8c007f
7e160205
d282000b
042c0912
d2820000
0402290b
4a1818ff
0000006c
e0301000
80000c0c
bf8c0770
1018030c
4a1a1aff
0000006c
e0301000
80000d0d
bf8c0770
d282000c
0432050d
4a1a1cff
0000006c
e0301000
80000d0d
bf8c0770
d282000c
0432070d
4a1a1eff
0000006c
e0301000
80000d0d
bf8c0770
d282000c
0432090d
06001900
c2020103
bf8c007f
101a0004
4a180a81
34181884
4a1c18ff
00000060
e0301000
80000e0e
bf8c0770
101e030e
4a1c0c81
341c1c84
4a201cff
00000060
e0301000
80001010
bf8c0770
d2820010
043e0510
4a1e0e81
341e1e84
4a221eff
00000060
e0301000
80001111
bf8c0770
d2820011
04420711
4a201081
34202084
4a2420ff
00000060
e0301000
80001212
bf8c0770
d2820011
04460912
4a2418ff
00000064
e0301000
80001212
bf8c0770
10240312
4a261cff
00000064
e0301000
80001313
bf8c0770
d2820012
044a0513
4a261eff
00000064
e0301000
80001313
bf8c0770
d2820012
044a0713
4a2620ff
00000064
e0301000
80001313
bf8c0770
d2820012
044a0913
10242509
d2820011
044a230a
4a2418ff
00000068
e0301000
80001212
bf8c0770
10240312
4a261cff
00000068
e0301000
80001313
bf8c0770
d2820012
044a0513
4a261eff
00000068
e0301000
80001313
bf8c0770
d2820012
044a0713
4a2620ff
00000068
e0301000
80001313
bf8c0770
d2820012
044a0913
d2820011
0446250b
4a1818ff
0000006c
e0301000
80000c0c
bf8c0770
1018030c
4a1c1cff
0000006c
e0301000
80000e0e
bf8c0770
d282000c
0432050e
4a1c1eff
0000006c
e0301000
80000e0e
bf8c0770
d282000c
0432070e
4a1c20ff
0000006c
e0301000
80000e0e
bf8c0770
d282000c
0432090e
06181911
c2020107
bf8c007f
d282000d
04361804
4a0a0a82
340a0a84
4a1c0aff
00000060
e0301000
80000e0e
bf8c0770
101c030e
4a0c0c82
340c0c84
4a1e0cff
00000060
e0301000
80000f0f
bf8c0770
d282000e
043a050f
4a0e0e82
340e0e84
4a1e0eff
00000060
e0301000
80000f0f
bf8c0770
d282000e
043a070f
4a101082
34101084
4a1e10ff
00000060
e0301000
80000f0f
bf8c0770
d282000e
043a090f
4a1e0aff
00000064
e0301000
80000f0f
bf8c0770
101e030f
4a200cff
00000064
e0301000
80001010
bf8c0770
d282000f
043e0510
4a200eff
00000064
e0301000
80001010
bf8c0770
d282000f
043e0710
4a2010ff
00000064
e0301000
80001010
bf8c0770
d282000f
043e0910
10121f09
d2820009
04261d0a
4a140aff
00000068
e0301000
80000a0a
bf8c0770
1014030a
4a1c0cff
00000068
e0301000
80000e0e
bf8c0770
d282000a
042a050e
4a1c0eff
00000068
e0301000
80000e0e
bf8c0770
d282000a
042a070e
4a1c10ff
00000068
e0301000
80000e0e
bf8c0770
d282000a
042a090e
d2820009
0426150b
4a0a0aff
0000006c
e0301000
80000505
bf8c0770
100a0305
4a0c0cff
0000006c
e0301000
80000606
bf8c0770
d2820005
04160506
4a0c0eff
0000006c
e0301000
80000606
bf8c0770
d2820005
04160706
4a0c10ff
0000006c
e0301000
80000606
bf8c0770
d2820001
04160906
06020309
c202010b
bf8c007f
d2820002
04360204
c202010f
bf8c007f
06040404
c2020102
bf8c007f
10060004
c2020106
bf8c007f
d2820003
040e1804
c202010a
bf8c007f
d2820003
040e0204
c202010e
bf8c007f
06060604
c2020101
bf8c007f
10080004
c2020105
bf8c007f
d2820004
04121804
c2020109
bf8c007f
d2820004
04120204
c202010d
bf8c007f
06080804
c2020100
bf8c007f
10000004
c2020104
bf8c007f
d2820000
04021804
c2020108
bf8c007f
d2820000
04020204
c200010c
bf8c007f
06000000
f80008cf
02030400
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[2..8]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[3].xxxx, IMM[1].yyyy
 40: MOV TEMP[3].xy, IN[1].xyyy
 41: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
 42: MAD TEMP[3].x, TEMP[3].wwww, CONST[2].yyyy, CONST[2].zzzz
 43: SLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
 44: F2I TEMP[3].x, -TEMP[3]
 45: UIF TEMP[3].xxxx :1
 46:   KILL
 47: ENDIF
 48: MOV TEMP[1].x, TEMP[1].xxxx
 49: MOV TEMP[1].yzw, TEMP[2].yxyz
 50: MOV OUT[0], TEMP[1]
 51: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %26 = load <32 x i8> addrspace(2)* %25, !tbaa !0
  %27 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %30 = load <32 x i8> addrspace(2)* %29, !tbaa !0
  %31 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = fcmp ugt float %16, 0.000000e+00
  %34 = select i1 %33, float 1.000000e+00, float 0.000000e+00
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.AMDIL.clamp.(float %34, float 0.000000e+00, float 1.000000e+00)
  %47 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %48 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %49 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %50 = bitcast float %46 to i32
  %51 = icmp ne i32 %50, 0
  %. = select i1 %51, float -1.000000e+00, float 1.000000e+00
  %52 = bitcast float %35 to i32
  %53 = bitcast float %36 to i32
  %54 = insertelement <2 x i32> undef, i32 %52, i32 0
  %55 = insertelement <2 x i32> %54, i32 %53, i32 1
  %56 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %55, <32 x i8> %30, <16 x i8> %32, i32 2)
  %57 = extractelement <4 x float> %56, i32 1
  %58 = extractelement <4 x float> %56, i32 3
  %59 = fmul float 2.000000e+00, %57
  %60 = fadd float %59, -1.000000e+00
  %61 = fmul float 2.000000e+00, %58
  %62 = fadd float %61, -1.000000e+00
  %63 = fmul float %37, %37
  %64 = fmul float %38, %38
  %65 = fadd float %64, %63
  %66 = fmul float %39, %39
  %67 = fadd float %65, %66
  %68 = call float @llvm.AMDGPU.rsq(float %67)
  %69 = fmul float %37, %68
  %70 = fmul float %38, %68
  %71 = fmul float %39, %68
  %72 = fmul float %60, %60
  %73 = fmul float %62, %62
  %74 = fadd float %72, %73
  %75 = fsub float -0.000000e+00, %74
  %76 = fadd float 1.000000e+00, %75
  %77 = fcmp uge float 0.000000e+00, %76
  %78 = select i1 %77, float 0.000000e+00, float %76
  %79 = call float @llvm.AMDGPU.rsq(float %78)
  %80 = fmul float %79, %78
  %81 = fsub float -0.000000e+00, %78
  %82 = call float @llvm.AMDGPU.cndlt(float %81, float %80, float 0.000000e+00)
  %83 = fmul float %69, %82
  %84 = fmul float %70, %82
  %85 = fmul float %71, %82
  %86 = fmul float %40, %40
  %87 = fmul float %41, %41
  %88 = fadd float %87, %86
  %89 = fmul float %42, %42
  %90 = fadd float %88, %89
  %91 = call float @llvm.AMDGPU.rsq(float %90)
  %92 = fmul float %40, %91
  %93 = fmul float %41, %91
  %94 = fmul float %42, %91
  %95 = fmul float %43, %43
  %96 = fmul float %44, %44
  %97 = fadd float %96, %95
  %98 = fmul float %45, %45
  %99 = fadd float %97, %98
  %100 = call float @llvm.AMDGPU.rsq(float %99)
  %101 = fmul float %43, %100
  %102 = fmul float %44, %100
  %103 = fmul float %45, %100
  %104 = fmul float %101, %62
  %105 = fmul float %102, %62
  %106 = fmul float %103, %62
  %107 = fmul float %92, %60
  %108 = fadd float %107, %104
  %109 = fmul float %93, %60
  %110 = fadd float %109, %105
  %111 = fmul float %94, %60
  %112 = fadd float %111, %106
  %113 = fmul float %83, %.
  %114 = fadd float %113, %108
  %115 = fmul float %84, %.
  %116 = fadd float %115, %110
  %117 = fmul float %85, %.
  %118 = fadd float %117, %112
  %119 = fmul float %114, 5.000000e-01
  %120 = fadd float %119, 5.000000e-01
  %121 = fmul float %116, 5.000000e-01
  %122 = fadd float %121, 5.000000e-01
  %123 = fcmp uge float %118, 0.000000e+00
  %124 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %125 = fsub float -0.000000e+00, %124
  %126 = fptosi float %125 to i32
  %127 = bitcast i32 %126 to float
  %128 = bitcast float %127 to i32
  %129 = icmp ne i32 %128, 0
  %temp4.0 = select i1 %129, float 1.000000e+00, float 0.000000e+00
  %130 = fmul float %24, 9.765625e-04
  %131 = bitcast float %35 to i32
  %132 = bitcast float %36 to i32
  %133 = insertelement <2 x i32> undef, i32 %131, i32 0
  %134 = insertelement <2 x i32> %133, i32 %132, i32 1
  %135 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %134, <32 x i8> %26, <16 x i8> %28, i32 2)
  %136 = extractelement <4 x float> %135, i32 3
  %137 = fmul float %136, %22
  %138 = fadd float %137, %23
  %139 = fcmp ult float %138, 0.000000e+00
  %140 = select i1 %139, float 1.000000e+00, float 0.000000e+00
  %141 = fsub float -0.000000e+00, %140
  %142 = fptosi float %141 to i32
  %143 = bitcast i32 %142 to float
  %144 = bitcast float %143 to i32
  %145 = icmp ne i32 %144, 0
  br i1 %145, label %IF28, label %ENDIF27

IF28:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF27

ENDIF27:                                          ; preds = %main_body, %IF28
  %146 = call i32 @llvm.SI.packf16(float %130, float %120)
  %147 = bitcast i32 %146 to float
  %148 = call i32 @llvm.SI.packf16(float %122, float %temp4.0)
  %149 = bitcast i32 %148 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %147, float %149, float %147, float %149)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840304
c0c60508
bf8c007f
f0800a00
00430603
bf8c0770
060a0d06
060a0af3
060c0f07
060c0cf3
100e0d06
d2820007
041e0b05
080e0ef2
d0060008
02010107
d2000007
00210107
7e105b07
10100f08
d2060007
22010107
d0080008
02020e80
d2000007
00221080
c82c0300
c82d0301
c8240200
c8250201
10101309
d282000a
0422170b
c8200400
c8210401
d282000a
042a1108
7e145b0a
1016150b
10160f0b
c83c0900
c83d0901
c8340800
c8350801
10181b0d
d282000e
04321f0f
c8300a00
c8310a01
d282000e
043a190c
7e1c5b0e
101e1d0f
10240d0f
c84c0600
c84d0601
c8400500
c8410501
101e2110
d2820011
043e2713
c83c0700
c83d0701
d2820000
04461f0f
7e225b00
10002313
d2820000
044a0b00
d0080006
02010102
d2000001
0019e480
d2060801
02010101
d10a0006
02010101
d2000002
0019e6f2
d2820000
0402050b
d2820000
03c1e100
10021509
10020f01
10121d0d
10120d09
10162310
d2820009
04260b0b
d2820001
04260501
d2820001
03c1e101
10101508
100e0f08
10101d0c
100c0d08
1010230f
d2820005
041a0b08
d2820002
04160507
d00c0006
02010102
d2000002
0019e480
d2060002
22010102
7e041102
d10a0006
02010102
d2000002
0019e480
c0840300
c0c60500
bf8c007f
f0800800
00430303
c0800100
bf8c0070
c2020109
c202810a
bf8c007f
7e080205
d2820003
04100903
d0020004
02010103
d2000003
0011e480
d2060003
22010103
7e061103
d10a0004
02010103
c200010c
7e0602ff
3a800000
bf8c007f
10060600
be802404
8980007e
7e0802f3
7c260880
88fe007e
5e000500
5e020303
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..203]
DCL TEMP[0..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: MOV TEMP[3].w, IMM[0].xxxx
  6: MOV TEMP[3].xyz, IN[5].xyzx
  7: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww
  8: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz
  9: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy
 10: UMUL TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx
 11: UARL ADDR[0].x, TEMP[7].xxxx
 12: MUL TEMP[7], CONST[ADDR[0].x+12], IN[1].xxxx
 13: UARL ADDR[0].x, TEMP[6].xxxx
 14: MAD TEMP[6], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[7]
 15: UARL ADDR[0].x, TEMP[5].xxxx
 16: MAD TEMP[5], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[6]
 17: UARL ADDR[0].x, TEMP[4].xxxx
 18: UARL ADDR[0].x, TEMP[4].xxxx
 19: MAD TEMP[4], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[5]
 20: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 21: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 22: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 23: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 24: UARL ADDR[0].x, TEMP[8].xxxx
 25: MUL TEMP[8], CONST[ADDR[0].x+12], IN[1].xxxx
 26: UARL ADDR[0].x, TEMP[7].xxxx
 27: MAD TEMP[7], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[8]
 28: UARL ADDR[0].x, TEMP[6].xxxx
 29: MAD TEMP[6], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[7]
 30: UARL ADDR[0].x, TEMP[5].xxxx
 31: UARL ADDR[0].x, TEMP[5].xxxx
 32: MAD TEMP[5], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[6]
 33: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 34: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 35: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 36: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 37: UARL ADDR[0].x, TEMP[0].xxxx
 38: MUL TEMP[0], CONST[ADDR[0].x+12], IN[1].xxxx
 39: UARL ADDR[0].x, TEMP[8].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[0]
 41: UARL ADDR[0].x, TEMP[7].xxxx
 42: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[0]
 43: UARL ADDR[0].x, TEMP[6].xxxx
 44: UARL ADDR[0].x, TEMP[6].xxxx
 45: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[0]
 46: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 47: DP4 TEMP[7].x, TEMP[1], TEMP[5]
 48: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 49: DP4 TEMP[8].x, TEMP[2], TEMP[4]
 50: DP4 TEMP[9].x, TEMP[2], TEMP[5]
 51: MOV TEMP[8].y, TEMP[9].xxxx
 52: DP4 TEMP[2].x, TEMP[2], TEMP[0]
 53: MOV TEMP[8].z, TEMP[2].xxxx
 54: DP4 TEMP[2].x, TEMP[3], TEMP[4]
 55: DP4 TEMP[4].x, TEMP[3], TEMP[5]
 56: MOV TEMP[2].y, TEMP[4].xxxx
 57: DP4 TEMP[0].x, TEMP[3], TEMP[0]
 58: MOV TEMP[2].z, TEMP[0].xxxx
 59: MUL TEMP[0], CONST[4], TEMP[6].xxxx
 60: MAD TEMP[0], CONST[5], TEMP[7].xxxx, TEMP[0]
 61: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 62: ADD TEMP[0], TEMP[0], CONST[7]
 63: MUL TEMP[1].xyz, TEMP[8].xyzz, CONST[9].wwww
 64: MUL TEMP[3], CONST[0], TEMP[1].xxxx
 65: MAD TEMP[3], CONST[1], TEMP[1].yyyy, TEMP[3]
 66: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[3]
 67: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
 68: RSQ TEMP[3].x, TEMP[3].xxxx
 69: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
 70: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[9].wwww
 71: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 72: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 73: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 74: MAD TEMP[3].xy, IN[4].xyyy, CONST[8].xyyy, CONST[8].zwww
 75: MOV TEMP[3].zw, TEMP[1].yyxy
 76: MOV TEMP[4].x, TEMP[1].zzzz
 77: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 78: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 79: MOV TEMP[4].yzw, TEMP[1].yxyz
 80: MOV TEMP[1].xyz, TEMP[2].xyzx
 81: MOV OUT[1], TEMP[3]
 82: MOV OUT[3], TEMP[1]
 83: MOV OUT[2], TEMP[4]
 84: MOV OUT[0], TEMP[0]
 85: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = fptosi float %63 to i32
  %85 = fptosi float %64 to i32
  %86 = fptosi float %65 to i32
  %87 = fptosi float %66 to i32
  %88 = bitcast i32 %84 to float
  %89 = bitcast i32 %85 to float
  %90 = bitcast i32 %86 to float
  %91 = bitcast i32 %87 to float
  %92 = fmul float %50, %44
  %93 = fadd float %92, %41
  %94 = fmul float %51, %45
  %95 = fadd float %94, %42
  %96 = fmul float %52, %46
  %97 = fadd float %96, %43
  %98 = bitcast float %91 to i32
  %99 = mul i32 3, %98
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %90 to i32
  %102 = mul i32 3, %101
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %89 to i32
  %105 = mul i32 3, %104
  %106 = bitcast i32 %105 to float
  %107 = bitcast float %88 to i32
  %108 = mul i32 3, %107
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = shl i32 %110, 4
  %112 = add i32 %111, 192
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %56
  %115 = shl i32 %110, 4
  %116 = add i32 %115, 196
  %117 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %116)
  %118 = fmul float %117, %56
  %119 = shl i32 %110, 4
  %120 = add i32 %119, 200
  %121 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %120)
  %122 = fmul float %121, %56
  %123 = shl i32 %110, 4
  %124 = add i32 %123, 204
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %56
  %127 = bitcast float %106 to i32
  %128 = shl i32 %127, 4
  %129 = add i32 %128, 192
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %57
  %132 = fadd float %131, %114
  %133 = shl i32 %127, 4
  %134 = add i32 %133, 196
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %57
  %137 = fadd float %136, %118
  %138 = shl i32 %127, 4
  %139 = add i32 %138, 200
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %57
  %142 = fadd float %141, %122
  %143 = shl i32 %127, 4
  %144 = add i32 %143, 204
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %57
  %147 = fadd float %146, %126
  %148 = bitcast float %103 to i32
  %149 = shl i32 %148, 4
  %150 = add i32 %149, 192
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %58
  %153 = fadd float %152, %132
  %154 = shl i32 %148, 4
  %155 = add i32 %154, 196
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %58
  %158 = fadd float %157, %137
  %159 = shl i32 %148, 4
  %160 = add i32 %159, 200
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %58
  %163 = fadd float %162, %142
  %164 = shl i32 %148, 4
  %165 = add i32 %164, 204
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %58
  %168 = fadd float %167, %147
  %169 = bitcast float %100 to i32
  %170 = shl i32 %169, 4
  %171 = add i32 %170, 192
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %59
  %174 = fadd float %173, %153
  %175 = shl i32 %169, 4
  %176 = add i32 %175, 196
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %59
  %179 = fadd float %178, %158
  %180 = shl i32 %169, 4
  %181 = add i32 %180, 200
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %59
  %184 = fadd float %183, %163
  %185 = shl i32 %169, 4
  %186 = add i32 %185, 204
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %59
  %189 = fadd float %188, %168
  %190 = bitcast float %91 to i32
  %191 = mul i32 3, %190
  %192 = add i32 %191, 1
  %193 = bitcast i32 %192 to float
  %194 = bitcast float %90 to i32
  %195 = mul i32 3, %194
  %196 = add i32 %195, 1
  %197 = bitcast i32 %196 to float
  %198 = bitcast float %89 to i32
  %199 = mul i32 3, %198
  %200 = add i32 %199, 1
  %201 = bitcast i32 %200 to float
  %202 = bitcast float %88 to i32
  %203 = mul i32 3, %202
  %204 = add i32 %203, 1
  %205 = bitcast i32 %204 to float
  %206 = bitcast float %205 to i32
  %207 = shl i32 %206, 4
  %208 = add i32 %207, 192
  %209 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %208)
  %210 = fmul float %209, %56
  %211 = shl i32 %206, 4
  %212 = add i32 %211, 196
  %213 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %212)
  %214 = fmul float %213, %56
  %215 = shl i32 %206, 4
  %216 = add i32 %215, 200
  %217 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %216)
  %218 = fmul float %217, %56
  %219 = shl i32 %206, 4
  %220 = add i32 %219, 204
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %56
  %223 = bitcast float %201 to i32
  %224 = shl i32 %223, 4
  %225 = add i32 %224, 192
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %57
  %228 = fadd float %227, %210
  %229 = shl i32 %223, 4
  %230 = add i32 %229, 196
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %57
  %233 = fadd float %232, %214
  %234 = shl i32 %223, 4
  %235 = add i32 %234, 200
  %236 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %235)
  %237 = fmul float %236, %57
  %238 = fadd float %237, %218
  %239 = shl i32 %223, 4
  %240 = add i32 %239, 204
  %241 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %240)
  %242 = fmul float %241, %57
  %243 = fadd float %242, %222
  %244 = bitcast float %197 to i32
  %245 = shl i32 %244, 4
  %246 = add i32 %245, 192
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %58
  %249 = fadd float %248, %228
  %250 = shl i32 %244, 4
  %251 = add i32 %250, 196
  %252 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %251)
  %253 = fmul float %252, %58
  %254 = fadd float %253, %233
  %255 = shl i32 %244, 4
  %256 = add i32 %255, 200
  %257 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %256)
  %258 = fmul float %257, %58
  %259 = fadd float %258, %238
  %260 = shl i32 %244, 4
  %261 = add i32 %260, 204
  %262 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %261)
  %263 = fmul float %262, %58
  %264 = fadd float %263, %243
  %265 = bitcast float %193 to i32
  %266 = shl i32 %265, 4
  %267 = add i32 %266, 192
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %59
  %270 = fadd float %269, %249
  %271 = shl i32 %265, 4
  %272 = add i32 %271, 196
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = fmul float %273, %59
  %275 = fadd float %274, %254
  %276 = shl i32 %265, 4
  %277 = add i32 %276, 200
  %278 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %277)
  %279 = fmul float %278, %59
  %280 = fadd float %279, %259
  %281 = shl i32 %265, 4
  %282 = add i32 %281, 204
  %283 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %282)
  %284 = fmul float %283, %59
  %285 = fadd float %284, %264
  %286 = bitcast float %91 to i32
  %287 = mul i32 3, %286
  %288 = add i32 %287, 2
  %289 = bitcast i32 %288 to float
  %290 = bitcast float %90 to i32
  %291 = mul i32 3, %290
  %292 = add i32 %291, 2
  %293 = bitcast i32 %292 to float
  %294 = bitcast float %89 to i32
  %295 = mul i32 3, %294
  %296 = add i32 %295, 2
  %297 = bitcast i32 %296 to float
  %298 = bitcast float %88 to i32
  %299 = mul i32 3, %298
  %300 = add i32 %299, 2
  %301 = bitcast i32 %300 to float
  %302 = bitcast float %301 to i32
  %303 = shl i32 %302, 4
  %304 = add i32 %303, 192
  %305 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %304)
  %306 = fmul float %305, %56
  %307 = shl i32 %302, 4
  %308 = add i32 %307, 196
  %309 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %308)
  %310 = fmul float %309, %56
  %311 = shl i32 %302, 4
  %312 = add i32 %311, 200
  %313 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %312)
  %314 = fmul float %313, %56
  %315 = shl i32 %302, 4
  %316 = add i32 %315, 204
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = fmul float %317, %56
  %319 = bitcast float %297 to i32
  %320 = shl i32 %319, 4
  %321 = add i32 %320, 192
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %57
  %324 = fadd float %323, %306
  %325 = shl i32 %319, 4
  %326 = add i32 %325, 196
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %57
  %329 = fadd float %328, %310
  %330 = shl i32 %319, 4
  %331 = add i32 %330, 200
  %332 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %331)
  %333 = fmul float %332, %57
  %334 = fadd float %333, %314
  %335 = shl i32 %319, 4
  %336 = add i32 %335, 204
  %337 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %336)
  %338 = fmul float %337, %57
  %339 = fadd float %338, %318
  %340 = bitcast float %293 to i32
  %341 = shl i32 %340, 4
  %342 = add i32 %341, 192
  %343 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %342)
  %344 = fmul float %343, %58
  %345 = fadd float %344, %324
  %346 = shl i32 %340, 4
  %347 = add i32 %346, 196
  %348 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %347)
  %349 = fmul float %348, %58
  %350 = fadd float %349, %329
  %351 = shl i32 %340, 4
  %352 = add i32 %351, 200
  %353 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %352)
  %354 = fmul float %353, %58
  %355 = fadd float %354, %334
  %356 = shl i32 %340, 4
  %357 = add i32 %356, 204
  %358 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %357)
  %359 = fmul float %358, %58
  %360 = fadd float %359, %339
  %361 = bitcast float %289 to i32
  %362 = shl i32 %361, 4
  %363 = add i32 %362, 192
  %364 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %363)
  %365 = fmul float %364, %59
  %366 = fadd float %365, %345
  %367 = shl i32 %361, 4
  %368 = add i32 %367, 196
  %369 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %368)
  %370 = fmul float %369, %59
  %371 = fadd float %370, %350
  %372 = shl i32 %361, 4
  %373 = add i32 %372, 200
  %374 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %373)
  %375 = fmul float %374, %59
  %376 = fadd float %375, %355
  %377 = shl i32 %361, 4
  %378 = add i32 %377, 204
  %379 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %378)
  %380 = fmul float %379, %59
  %381 = fadd float %380, %360
  %382 = fmul float %93, %174
  %383 = fmul float %95, %179
  %384 = fadd float %382, %383
  %385 = fmul float %97, %184
  %386 = fadd float %384, %385
  %387 = fmul float 1.000000e+00, %189
  %388 = fadd float %386, %387
  %389 = fmul float %93, %270
  %390 = fmul float %95, %275
  %391 = fadd float %389, %390
  %392 = fmul float %97, %280
  %393 = fadd float %391, %392
  %394 = fmul float 1.000000e+00, %285
  %395 = fadd float %393, %394
  %396 = fmul float %93, %366
  %397 = fmul float %95, %371
  %398 = fadd float %396, %397
  %399 = fmul float %97, %376
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %381
  %402 = fadd float %400, %401
  %403 = fmul float %70, %174
  %404 = fmul float %71, %179
  %405 = fadd float %403, %404
  %406 = fmul float %72, %184
  %407 = fadd float %405, %406
  %408 = fmul float 1.000000e+00, %189
  %409 = fadd float %407, %408
  %410 = fmul float %70, %270
  %411 = fmul float %71, %275
  %412 = fadd float %410, %411
  %413 = fmul float %72, %280
  %414 = fadd float %412, %413
  %415 = fmul float 1.000000e+00, %285
  %416 = fadd float %414, %415
  %417 = fmul float %70, %366
  %418 = fmul float %71, %371
  %419 = fadd float %417, %418
  %420 = fmul float %72, %376
  %421 = fadd float %419, %420
  %422 = fmul float 1.000000e+00, %381
  %423 = fadd float %421, %422
  %424 = fmul float %81, %174
  %425 = fmul float %82, %179
  %426 = fadd float %424, %425
  %427 = fmul float %83, %184
  %428 = fadd float %426, %427
  %429 = fmul float 1.000000e+00, %189
  %430 = fadd float %428, %429
  %431 = fmul float %81, %270
  %432 = fmul float %82, %275
  %433 = fadd float %431, %432
  %434 = fmul float %83, %280
  %435 = fadd float %433, %434
  %436 = fmul float 1.000000e+00, %285
  %437 = fadd float %435, %436
  %438 = fmul float %81, %366
  %439 = fmul float %82, %371
  %440 = fadd float %438, %439
  %441 = fmul float %83, %376
  %442 = fadd float %440, %441
  %443 = fmul float 1.000000e+00, %381
  %444 = fadd float %442, %443
  %445 = fmul float %20, %388
  %446 = fmul float %21, %388
  %447 = fmul float %22, %388
  %448 = fmul float %23, %388
  %449 = fmul float %24, %395
  %450 = fadd float %449, %445
  %451 = fmul float %25, %395
  %452 = fadd float %451, %446
  %453 = fmul float %26, %395
  %454 = fadd float %453, %447
  %455 = fmul float %27, %395
  %456 = fadd float %455, %448
  %457 = fmul float %28, %402
  %458 = fadd float %457, %450
  %459 = fmul float %29, %402
  %460 = fadd float %459, %452
  %461 = fmul float %30, %402
  %462 = fadd float %461, %454
  %463 = fmul float %31, %402
  %464 = fadd float %463, %456
  %465 = fadd float %458, %32
  %466 = fadd float %460, %33
  %467 = fadd float %462, %34
  %468 = fadd float %464, %35
  %469 = fmul float %409, %40
  %470 = fmul float %416, %40
  %471 = fmul float %423, %40
  %472 = fmul float %11, %469
  %473 = fmul float %12, %469
  %474 = fmul float %13, %469
  %475 = fmul float %14, %470
  %476 = fadd float %475, %472
  %477 = fmul float %15, %470
  %478 = fadd float %477, %473
  %479 = fmul float %16, %470
  %480 = fadd float %479, %474
  %481 = fmul float %17, %471
  %482 = fadd float %481, %476
  %483 = fmul float %18, %471
  %484 = fadd float %483, %478
  %485 = fmul float %19, %471
  %486 = fadd float %485, %480
  %487 = fmul float %482, %482
  %488 = fmul float %484, %484
  %489 = fadd float %488, %487
  %490 = fmul float %486, %486
  %491 = fadd float %489, %490
  %492 = call float @llvm.AMDGPU.rsq(float %491)
  %493 = fmul float %482, %492
  %494 = fmul float %484, %492
  %495 = fmul float %486, %492
  %496 = fmul float %430, %40
  %497 = fmul float %437, %40
  %498 = fmul float %444, %40
  %499 = fmul float %11, %496
  %500 = fmul float %12, %496
  %501 = fmul float %13, %496
  %502 = fmul float %14, %497
  %503 = fadd float %502, %499
  %504 = fmul float %15, %497
  %505 = fadd float %504, %500
  %506 = fmul float %16, %497
  %507 = fadd float %506, %501
  %508 = fmul float %17, %498
  %509 = fadd float %508, %503
  %510 = fmul float %18, %498
  %511 = fadd float %510, %505
  %512 = fmul float %19, %498
  %513 = fadd float %512, %507
  %514 = fmul float %76, %36
  %515 = fadd float %514, %38
  %516 = fmul float %77, %37
  %517 = fadd float %516, %39
  %518 = fmul float %513, %494
  %519 = fmul float %509, %495
  %520 = fmul float %511, %493
  %521 = fsub float -0.000000e+00, %518
  %522 = fmul float %511, %495
  %523 = fadd float %522, %521
  %524 = fsub float -0.000000e+00, %519
  %525 = fmul float %513, %493
  %526 = fadd float %525, %524
  %527 = fsub float -0.000000e+00, %520
  %528 = fmul float %509, %494
  %529 = fadd float %528, %527
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %515, float %517, float %493, float %494)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %495, float %523, float %526, float %529)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %509, float %511, float %513, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %465, float %466, float %467, float %468)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020e00
bf8c0770
7e02110e
d2d6000d
02010701
34081a84
4a0208ff
000000c0
c0800100
bf8c007f
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020600
bf8c0770
10020d01
7e04110f
d2d60012
02010702
340a2484
4a040aff
000000c0
e0301000
80000202
bf8c0770
d2820001
04060f02
7e041110
d2d60014
02010702
34142884
4a0414ff
000000c0
e0301000
80000202
bf8c0770
d2820001
04061102
7e041111
d2d60015
02010702
34162a84
4a0416ff
000000c0
e0301000
80000202
bf8c0770
d2820001
04061302
4a0408ff
000000c4
e0301000
80000202
bf8c0770
10040d02
4a060aff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a0f03
4a0614ff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a1103
4a0616ff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a1303
c084070c
bf8c007f
e00c2000
80020e00
bf8c0770
1006050f
d282000c
040e030e
4a0608ff
000000c8
e0301000
80000303
bf8c0770
10060d03
4a260aff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e0f13
4a2614ff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e1113
4a2616ff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e1313
d282000c
04320710
4a0808ff
000000cc
e0301000
80000404
bf8c0770
10080d04
4a0a0aff
000000cc
e0301000
80000505
bf8c0770
d2820004
04120f05
4a0a14ff
000000cc
e0301000
80000505
bf8c0770
d2820004
04121105
4a0a16ff
000000cc
e0301000
80000505
bf8c0770
d2820004
04121305
060a090c
c2020127
bf8c007f
10260a04
c2028101
bf8c007f
102e2605
4a0a1a81
34180a84
4a0a18ff
000000c0
e0301000
80000505
bf8c0770
100a0d05
4a142481
342c1484
4a142cff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
04160f0a
4a142881
34301484
4a1430ff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
0416110a
4a142a81
34321484
4a1432ff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
0416130a
4a1418ff
000000c4
e0301000
80000a0a
bf8c0770
10140d0a
4a162cff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a0f0b
4a1630ff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a110b
4a1632ff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a130b
1016150f
d282001a
042e0b0e
4a1618ff
000000c8
e0301000
80000b0b
bf8c0770
10160d0b
4a362cff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e0f1b
4a3630ff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e111b
4a3632ff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e131b
d282001a
046a1710
4a1818ff
000000cc
e0301000
80000c0c
bf8c0770
10180d0c
4a2c2cff
000000cc
e0301000
80001616
bf8c0770
d282000c
04320f16
4a2c30ff
000000cc
e0301000
80001616
bf8c0770
d282000c
04321116
4a2c32ff
000000cc
e0301000
80001616
bf8c0770
d282000c
04321316
062c191a
102c2c04
c2040105
bf8c007f
d2820017
045e2c08
4a1a1a82
34301a84
4a1a30ff
000000c0
e0301000
80000d0d
bf8c0770
101a0d0d
4a242482
34322484
4a2432ff
000000c0
e0301000
80001212
bf8c0770
d282000d
04360f12
4a242882
34342484
4a2434ff
000000c0
e0301000
80001212
bf8c0770
d282000d
04361112
4a242a82
342a2484
4a242aff
000000c0
e0301000
80001212
bf8c0770
d282000d
04361312
4a2430ff
000000c4
e0301000
80001212
bf8c0770
10240d12
4a2832ff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a0f14
4a2834ff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a1114
4a282aff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a1314
1028250f
d282001b
04521b0e
4a2830ff
000000c8
e0301000
80001414
bf8c0770
10280d14
4a3832ff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
04520f1c
4a3834ff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
0452111c
4a382aff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
0452131c
d282000e
046e2910
4a1e30ff
000000cc
e0301000
80000f0f
bf8c0770
101e0d0f
4a2032ff
000000cc
e0301000
80001010
bf8c0770
d282000f
043e0f10
4a2034ff
000000cc
e0301000
80001010
bf8c0770
d282000f
043e1110
4a202aff
000000cc
e0301000
80001010
bf8c0770
d2820006
043e1310
060e0d0e
10100e04
c2048109
bf8c007f
d2820007
045e1009
c2050100
bf8c007f
1012260a
c2058104
bf8c007f
d2820009
04262c0b
c2068108
bf8c007f
d282000f
0426100d
10121f0f
d282000e
04260f07
c2060102
bf8c007f
1012260c
c2070106
bf8c007f
d2820009
04262c0e
c207810a
bf8c007f
d2820009
0426100f
d2820008
043a1309
7e1c5b08
100e1d07
10101d0f
c0880710
bf8c007f
e00c2000
80041500
c2080121
c2088123
bf8c0070
7e1e0211
d282000f
043c2116
c2080120
c2088122
bf8c007f
7e200211
d2820010
04402115
f800020f
07080f10
c0880714
bf8c000f
e00c2000
80041500
bf8c0770
101e0516
d282000f
043e0315
d282000f
043e0717
061e090f
101e1e04
10201e05
10221516
d2820011
04460b15
d2820011
04461717
06221911
10222204
d2820010
04422208
10262516
d2820013
044e1b15
d2820013
044e2917
06260d13
102a2604
d2820010
04422a09
102c1110
10261e0a
d2820013
044e220b
d2820013
044e2a0d
102e0f13
082c2d17
10121d09
102e1313
101c1e0c
d282000e
043a220e
d282000e
043a2a0f
1010110e
08102f08
100e0f0e
101e1310
080e0f0f
f800021f
16080709
bf8c070f
7e0e02f2
f800022f
070e1013
c0820700
bf8c000f
e00c2000
80010e00
c202012d
c2028129
bf8c0070
7e000205
d2820007
0400090f
10000507
c202012c
c2028128
bf8c007f
7e040205
d2820002
0408090e
d2820000
04020302
c202012e
c202812a
bf8c007f
7e020205
d2820008
04040910
d2820000
04020708
06000900
c2020113
bf8c007f
10060004
10021507
d2820001
04060b02
d2820001
04061708
06021901
c2020117
bf8c007f
d2820003
040e0204
10082507
d2820002
04121b02
d2820002
040a2908
06040d02
c202011b
bf8c007f
d2820003
040e0404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080004
c2020116
bf8c007f
d2820004
04120204
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0004
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10000004
c2020114
bf8c007f
d2820000
04020204
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..202]
DCL TEMP[0..7], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww
  6: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz
  7: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy
  8: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx
  9: UARL ADDR[0].x, TEMP[6].xxxx
 10: MUL TEMP[6], CONST[ADDR[0].x+11], IN[1].xxxx
 11: UARL ADDR[0].x, TEMP[5].xxxx
 12: MAD TEMP[5], CONST[ADDR[0].x+11], IN[1].yyyy, TEMP[6]
 13: UARL ADDR[0].x, TEMP[4].xxxx
 14: MAD TEMP[4], CONST[ADDR[0].x+11], IN[1].zzzz, TEMP[5]
 15: UARL ADDR[0].x, TEMP[3].xxxx
 16: UARL ADDR[0].x, TEMP[3].xxxx
 17: MAD TEMP[3], CONST[ADDR[0].x+11], IN[1].wwww, TEMP[4]
 18: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 19: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 20: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 21: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 22: UARL ADDR[0].x, TEMP[7].xxxx
 23: MUL TEMP[7], CONST[ADDR[0].x+11], IN[1].xxxx
 24: UARL ADDR[0].x, TEMP[6].xxxx
 25: MAD TEMP[6], CONST[ADDR[0].x+11], IN[1].yyyy, TEMP[7]
 26: UARL ADDR[0].x, TEMP[5].xxxx
 27: MAD TEMP[5], CONST[ADDR[0].x+11], IN[1].zzzz, TEMP[6]
 28: UARL ADDR[0].x, TEMP[4].xxxx
 29: UARL ADDR[0].x, TEMP[4].xxxx
 30: MAD TEMP[4], CONST[ADDR[0].x+11], IN[1].wwww, TEMP[5]
 31: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 32: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 33: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 34: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 35: UARL ADDR[0].x, TEMP[0].xxxx
 36: MUL TEMP[0], CONST[ADDR[0].x+11], IN[1].xxxx
 37: UARL ADDR[0].x, TEMP[7].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+11], IN[1].yyyy, TEMP[0]
 39: UARL ADDR[0].x, TEMP[6].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+11], IN[1].zzzz, TEMP[0]
 41: UARL ADDR[0].x, TEMP[5].xxxx
 42: UARL ADDR[0].x, TEMP[5].xxxx
 43: MAD TEMP[0], CONST[ADDR[0].x+11], IN[1].wwww, TEMP[0]
 44: DP4 TEMP[5].x, TEMP[1], TEMP[3]
 45: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 46: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 47: DP4 TEMP[3].x, TEMP[2], TEMP[3]
 48: DP4 TEMP[4].x, TEMP[2], TEMP[4]
 49: MOV TEMP[3].y, TEMP[4].xxxx
 50: DP4 TEMP[0].x, TEMP[2], TEMP[0]
 51: MOV TEMP[3].z, TEMP[0].xxxx
 52: MUL TEMP[0], CONST[4], TEMP[5].xxxx
 53: MAD TEMP[0], CONST[5], TEMP[6].xxxx, TEMP[0]
 54: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 55: ADD TEMP[0], TEMP[0], CONST[7]
 56: MUL TEMP[1].xyz, TEMP[3].xyzz, CONST[8].wwww
 57: MUL TEMP[2], CONST[0], TEMP[1].xxxx
 58: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
 59: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
 60: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 61: RSQ TEMP[2].x, TEMP[2].xxxx
 62: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 63: MOV OUT[1], TEMP[1]
 64: MOV OUT[0], TEMP[0]
 65: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = extractelement <4 x float> %51, i32 3
  %56 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0
  %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %5)
  %59 = extractelement <4 x float> %58, i32 0
  %60 = extractelement <4 x float> %58, i32 1
  %61 = extractelement <4 x float> %58, i32 2
  %62 = extractelement <4 x float> %58, i32 3
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = fptosi float %59 to i32
  %70 = fptosi float %60 to i32
  %71 = fptosi float %61 to i32
  %72 = fptosi float %62 to i32
  %73 = bitcast i32 %69 to float
  %74 = bitcast i32 %70 to float
  %75 = bitcast i32 %71 to float
  %76 = bitcast i32 %72 to float
  %77 = fmul float %46, %40
  %78 = fadd float %77, %37
  %79 = fmul float %47, %41
  %80 = fadd float %79, %38
  %81 = fmul float %48, %42
  %82 = fadd float %81, %39
  %83 = bitcast float %76 to i32
  %84 = mul i32 3, %83
  %85 = bitcast i32 %84 to float
  %86 = bitcast float %75 to i32
  %87 = mul i32 3, %86
  %88 = bitcast i32 %87 to float
  %89 = bitcast float %74 to i32
  %90 = mul i32 3, %89
  %91 = bitcast i32 %90 to float
  %92 = bitcast float %73 to i32
  %93 = mul i32 3, %92
  %94 = bitcast i32 %93 to float
  %95 = bitcast float %94 to i32
  %96 = shl i32 %95, 4
  %97 = add i32 %96, 176
  %98 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %97)
  %99 = fmul float %98, %52
  %100 = shl i32 %95, 4
  %101 = add i32 %100, 180
  %102 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %101)
  %103 = fmul float %102, %52
  %104 = shl i32 %95, 4
  %105 = add i32 %104, 184
  %106 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %105)
  %107 = fmul float %106, %52
  %108 = shl i32 %95, 4
  %109 = add i32 %108, 188
  %110 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %109)
  %111 = fmul float %110, %52
  %112 = bitcast float %91 to i32
  %113 = shl i32 %112, 4
  %114 = add i32 %113, 176
  %115 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %114)
  %116 = fmul float %115, %53
  %117 = fadd float %116, %99
  %118 = shl i32 %112, 4
  %119 = add i32 %118, 180
  %120 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %119)
  %121 = fmul float %120, %53
  %122 = fadd float %121, %103
  %123 = shl i32 %112, 4
  %124 = add i32 %123, 184
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %53
  %127 = fadd float %126, %107
  %128 = shl i32 %112, 4
  %129 = add i32 %128, 188
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %53
  %132 = fadd float %131, %111
  %133 = bitcast float %88 to i32
  %134 = shl i32 %133, 4
  %135 = add i32 %134, 176
  %136 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %135)
  %137 = fmul float %136, %54
  %138 = fadd float %137, %117
  %139 = shl i32 %133, 4
  %140 = add i32 %139, 180
  %141 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %140)
  %142 = fmul float %141, %54
  %143 = fadd float %142, %122
  %144 = shl i32 %133, 4
  %145 = add i32 %144, 184
  %146 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %145)
  %147 = fmul float %146, %54
  %148 = fadd float %147, %127
  %149 = shl i32 %133, 4
  %150 = add i32 %149, 188
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %54
  %153 = fadd float %152, %132
  %154 = bitcast float %85 to i32
  %155 = shl i32 %154, 4
  %156 = add i32 %155, 176
  %157 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %156)
  %158 = fmul float %157, %55
  %159 = fadd float %158, %138
  %160 = shl i32 %154, 4
  %161 = add i32 %160, 180
  %162 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %161)
  %163 = fmul float %162, %55
  %164 = fadd float %163, %143
  %165 = shl i32 %154, 4
  %166 = add i32 %165, 184
  %167 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %166)
  %168 = fmul float %167, %55
  %169 = fadd float %168, %148
  %170 = shl i32 %154, 4
  %171 = add i32 %170, 188
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %55
  %174 = fadd float %173, %153
  %175 = bitcast float %76 to i32
  %176 = mul i32 3, %175
  %177 = add i32 %176, 1
  %178 = bitcast i32 %177 to float
  %179 = bitcast float %75 to i32
  %180 = mul i32 3, %179
  %181 = add i32 %180, 1
  %182 = bitcast i32 %181 to float
  %183 = bitcast float %74 to i32
  %184 = mul i32 3, %183
  %185 = add i32 %184, 1
  %186 = bitcast i32 %185 to float
  %187 = bitcast float %73 to i32
  %188 = mul i32 3, %187
  %189 = add i32 %188, 1
  %190 = bitcast i32 %189 to float
  %191 = bitcast float %190 to i32
  %192 = shl i32 %191, 4
  %193 = add i32 %192, 176
  %194 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %193)
  %195 = fmul float %194, %52
  %196 = shl i32 %191, 4
  %197 = add i32 %196, 180
  %198 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %197)
  %199 = fmul float %198, %52
  %200 = shl i32 %191, 4
  %201 = add i32 %200, 184
  %202 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %201)
  %203 = fmul float %202, %52
  %204 = shl i32 %191, 4
  %205 = add i32 %204, 188
  %206 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %205)
  %207 = fmul float %206, %52
  %208 = bitcast float %186 to i32
  %209 = shl i32 %208, 4
  %210 = add i32 %209, 176
  %211 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %210)
  %212 = fmul float %211, %53
  %213 = fadd float %212, %195
  %214 = shl i32 %208, 4
  %215 = add i32 %214, 180
  %216 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %215)
  %217 = fmul float %216, %53
  %218 = fadd float %217, %199
  %219 = shl i32 %208, 4
  %220 = add i32 %219, 184
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %53
  %223 = fadd float %222, %203
  %224 = shl i32 %208, 4
  %225 = add i32 %224, 188
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %53
  %228 = fadd float %227, %207
  %229 = bitcast float %182 to i32
  %230 = shl i32 %229, 4
  %231 = add i32 %230, 176
  %232 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %231)
  %233 = fmul float %232, %54
  %234 = fadd float %233, %213
  %235 = shl i32 %229, 4
  %236 = add i32 %235, 180
  %237 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %236)
  %238 = fmul float %237, %54
  %239 = fadd float %238, %218
  %240 = shl i32 %229, 4
  %241 = add i32 %240, 184
  %242 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %241)
  %243 = fmul float %242, %54
  %244 = fadd float %243, %223
  %245 = shl i32 %229, 4
  %246 = add i32 %245, 188
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %54
  %249 = fadd float %248, %228
  %250 = bitcast float %178 to i32
  %251 = shl i32 %250, 4
  %252 = add i32 %251, 176
  %253 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %252)
  %254 = fmul float %253, %55
  %255 = fadd float %254, %234
  %256 = shl i32 %250, 4
  %257 = add i32 %256, 180
  %258 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %257)
  %259 = fmul float %258, %55
  %260 = fadd float %259, %239
  %261 = shl i32 %250, 4
  %262 = add i32 %261, 184
  %263 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %262)
  %264 = fmul float %263, %55
  %265 = fadd float %264, %244
  %266 = shl i32 %250, 4
  %267 = add i32 %266, 188
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %55
  %270 = fadd float %269, %249
  %271 = bitcast float %76 to i32
  %272 = mul i32 3, %271
  %273 = add i32 %272, 2
  %274 = bitcast i32 %273 to float
  %275 = bitcast float %75 to i32
  %276 = mul i32 3, %275
  %277 = add i32 %276, 2
  %278 = bitcast i32 %277 to float
  %279 = bitcast float %74 to i32
  %280 = mul i32 3, %279
  %281 = add i32 %280, 2
  %282 = bitcast i32 %281 to float
  %283 = bitcast float %73 to i32
  %284 = mul i32 3, %283
  %285 = add i32 %284, 2
  %286 = bitcast i32 %285 to float
  %287 = bitcast float %286 to i32
  %288 = shl i32 %287, 4
  %289 = add i32 %288, 176
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = fmul float %290, %52
  %292 = shl i32 %287, 4
  %293 = add i32 %292, 180
  %294 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %293)
  %295 = fmul float %294, %52
  %296 = shl i32 %287, 4
  %297 = add i32 %296, 184
  %298 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %297)
  %299 = fmul float %298, %52
  %300 = shl i32 %287, 4
  %301 = add i32 %300, 188
  %302 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %301)
  %303 = fmul float %302, %52
  %304 = bitcast float %282 to i32
  %305 = shl i32 %304, 4
  %306 = add i32 %305, 176
  %307 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %306)
  %308 = fmul float %307, %53
  %309 = fadd float %308, %291
  %310 = shl i32 %304, 4
  %311 = add i32 %310, 180
  %312 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %311)
  %313 = fmul float %312, %53
  %314 = fadd float %313, %295
  %315 = shl i32 %304, 4
  %316 = add i32 %315, 184
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = fmul float %317, %53
  %319 = fadd float %318, %299
  %320 = shl i32 %304, 4
  %321 = add i32 %320, 188
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %53
  %324 = fadd float %323, %303
  %325 = bitcast float %278 to i32
  %326 = shl i32 %325, 4
  %327 = add i32 %326, 176
  %328 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %327)
  %329 = fmul float %328, %54
  %330 = fadd float %329, %309
  %331 = shl i32 %325, 4
  %332 = add i32 %331, 180
  %333 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %332)
  %334 = fmul float %333, %54
  %335 = fadd float %334, %314
  %336 = shl i32 %325, 4
  %337 = add i32 %336, 184
  %338 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %337)
  %339 = fmul float %338, %54
  %340 = fadd float %339, %319
  %341 = shl i32 %325, 4
  %342 = add i32 %341, 188
  %343 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %342)
  %344 = fmul float %343, %54
  %345 = fadd float %344, %324
  %346 = bitcast float %274 to i32
  %347 = shl i32 %346, 4
  %348 = add i32 %347, 176
  %349 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %348)
  %350 = fmul float %349, %55
  %351 = fadd float %350, %330
  %352 = shl i32 %346, 4
  %353 = add i32 %352, 180
  %354 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %353)
  %355 = fmul float %354, %55
  %356 = fadd float %355, %335
  %357 = shl i32 %346, 4
  %358 = add i32 %357, 184
  %359 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %358)
  %360 = fmul float %359, %55
  %361 = fadd float %360, %340
  %362 = shl i32 %346, 4
  %363 = add i32 %362, 188
  %364 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %363)
  %365 = fmul float %364, %55
  %366 = fadd float %365, %345
  %367 = fmul float %78, %159
  %368 = fmul float %80, %164
  %369 = fadd float %367, %368
  %370 = fmul float %82, %169
  %371 = fadd float %369, %370
  %372 = fmul float 1.000000e+00, %174
  %373 = fadd float %371, %372
  %374 = fmul float %78, %255
  %375 = fmul float %80, %260
  %376 = fadd float %374, %375
  %377 = fmul float %82, %265
  %378 = fadd float %376, %377
  %379 = fmul float 1.000000e+00, %270
  %380 = fadd float %378, %379
  %381 = fmul float %78, %351
  %382 = fmul float %80, %356
  %383 = fadd float %381, %382
  %384 = fmul float %82, %361
  %385 = fadd float %383, %384
  %386 = fmul float 1.000000e+00, %366
  %387 = fadd float %385, %386
  %388 = fmul float %66, %159
  %389 = fmul float %67, %164
  %390 = fadd float %388, %389
  %391 = fmul float %68, %169
  %392 = fadd float %390, %391
  %393 = fmul float 1.000000e+00, %174
  %394 = fadd float %392, %393
  %395 = fmul float %66, %255
  %396 = fmul float %67, %260
  %397 = fadd float %395, %396
  %398 = fmul float %68, %265
  %399 = fadd float %397, %398
  %400 = fmul float 1.000000e+00, %270
  %401 = fadd float %399, %400
  %402 = fmul float %66, %351
  %403 = fmul float %67, %356
  %404 = fadd float %402, %403
  %405 = fmul float %68, %361
  %406 = fadd float %404, %405
  %407 = fmul float 1.000000e+00, %366
  %408 = fadd float %406, %407
  %409 = fmul float %20, %373
  %410 = fmul float %21, %373
  %411 = fmul float %22, %373
  %412 = fmul float %23, %373
  %413 = fmul float %24, %380
  %414 = fadd float %413, %409
  %415 = fmul float %25, %380
  %416 = fadd float %415, %410
  %417 = fmul float %26, %380
  %418 = fadd float %417, %411
  %419 = fmul float %27, %380
  %420 = fadd float %419, %412
  %421 = fmul float %28, %387
  %422 = fadd float %421, %414
  %423 = fmul float %29, %387
  %424 = fadd float %423, %416
  %425 = fmul float %30, %387
  %426 = fadd float %425, %418
  %427 = fmul float %31, %387
  %428 = fadd float %427, %420
  %429 = fadd float %422, %32
  %430 = fadd float %424, %33
  %431 = fadd float %426, %34
  %432 = fadd float %428, %35
  %433 = fmul float %394, %36
  %434 = fmul float %401, %36
  %435 = fmul float %408, %36
  %436 = fmul float %11, %433
  %437 = fmul float %12, %433
  %438 = fmul float %13, %433
  %439 = fmul float %14, %434
  %440 = fadd float %439, %436
  %441 = fmul float %15, %434
  %442 = fadd float %441, %437
  %443 = fmul float %16, %434
  %444 = fadd float %443, %438
  %445 = fmul float %17, %435
  %446 = fadd float %445, %440
  %447 = fmul float %18, %435
  %448 = fadd float %447, %442
  %449 = fmul float %19, %435
  %450 = fadd float %449, %444
  %451 = fmul float %446, %446
  %452 = fmul float %448, %448
  %453 = fadd float %452, %451
  %454 = fmul float %450, %450
  %455 = fadd float %453, %454
  %456 = call float @llvm.AMDGPU.rsq(float %455)
  %457 = fmul float %446, %456
  %458 = fmul float %448, %456
  %459 = fmul float %450, %456
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %457, float %458, float %459, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %429, float %430, float %431, float %432)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020b00
bf8c0770
7e02110b
d2d6000a
02010701
34101484
4a0210ff
000000b0
c0800100
bf8c007f
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020200
bf8c0770
10020501
7e0c110c
d2d6000f
02010706
34121e84
4a0c12ff
000000b0
e0301000
80000606
bf8c0770
d2820001
04060706
7e0c110d
d2d60012
02010706
34202484
4a0c20ff
000000b0
e0301000
80000606
bf8c0770
d2820001
04060906
7e0c110e
d2d60013
02010706
34222684
4a0c22ff
000000b0
e0301000
80000606
bf8c0770
d2820001
04060b06
4a0c10ff
000000b4
e0301000
80000606
bf8c0770
100c0506
4a0e12ff
000000b4
e0301000
80000707
bf8c0770
d2820006
041a0707
4a0e20ff
000000b4
e0301000
80000707
bf8c0770
d2820006
041a0907
4a0e22ff
000000b4
e0301000
80000707
bf8c0770
d2820006
041a0b07
c084070c
bf8c007f
e00c2000
80020b00
bf8c0770
100e0d0c
d2820014
041e030b
4a0e10ff
000000b8
e0301000
80000707
bf8c0770
100e0507
4a2a12ff
000000b8
e0301000
80001515
bf8c0770
d2820007
041e0715
4a2a20ff
000000b8
e0301000
80001515
bf8c0770
d2820007
041e0915
4a2a22ff
000000b8
e0301000
80001515
bf8c0770
d2820007
041e0b15
d2820014
04520f0d
4a1010ff
000000bc
e0301000
80000808
bf8c0770
10100508
4a1212ff
000000bc
e0301000
80000909
bf8c0770
d2820008
04220709
4a1220ff
000000bc
e0301000
80000909
bf8c0770
d2820008
04220909
4a1222ff
000000bc
e0301000
80000909
bf8c0770
d2820008
04220b09
06121114
c2020123
bf8c007f
10221204
c2028101
bf8c007f
102e2205
4a121481
342a1284
4a122aff
000000b0
e0301000
80000909
bf8c0770
10120509
4a201e81
342c2084
4a202cff
000000b0
e0301000
80001010
bf8c0770
d2820009
04260710
4a202481
34302084
4a2030ff
000000b0
e0301000
80001010
bf8c0770
d2820009
04260910
4a202681
34322084
4a2032ff
000000b0
e0301000
80001010
bf8c0770
d2820009
04260b10
4a202aff
000000b4
e0301000
80001010
bf8c0770
10200510
4a282cff
000000b4
e0301000
80001414
bf8c0770
d2820010
04420714
4a2830ff
000000b4
e0301000
80001414
bf8c0770
d2820010
04420914
4a2832ff
000000b4
e0301000
80001414
bf8c0770
d2820010
04420b14
1028210c
d282001a
0452130b
4a282aff
000000b8
e0301000
80001414
bf8c0770
10280514
4a362cff
000000b8
e0301000
80001b1b
bf8c0770
d2820014
0452071b
4a3630ff
000000b8
e0301000
80001b1b
bf8c0770
d2820014
0452091b
4a3632ff
000000b8
e0301000
80001b1b
bf8c0770
d2820014
04520b1b
d282001a
046a290d
4a2a2aff
000000bc
e0301000
80001515
bf8c0770
102a0515
4a2c2cff
000000bc
e0301000
80001616
bf8c0770
d2820015
04560716
4a2c30ff
000000bc
e0301000
80001616
bf8c0770
d2820015
04560916
4a2c32ff
000000bc
e0301000
80001616
bf8c0770
d2820015
04560b16
062c2b1a
102c2c04
c2028105
bf8c007f
d2820017
045e2c05
4a141482
34301484
4a1430ff
000000b0
e0301000
80000a0a
bf8c0770
1014050a
4a1e1e82
34321e84
4a1e32ff
000000b0
e0301000
80000f0f
bf8c0770
d282000a
042a070f
4a1e2482
34341e84
4a1e34ff
000000b0
e0301000
80000f0f
bf8c0770
d282000a
042a090f
4a1e2682
34261e84
4a1e26ff
000000b0
e0301000
80000f0f
bf8c0770
d282000a
042a0b0f
4a1e30ff
000000b4
e0301000
80000f0f
bf8c0770
101e050f
4a2432ff
000000b4
e0301000
80001212
bf8c0770
d282000f
043e0712
4a2434ff
000000b4
e0301000
80001212
bf8c0770
d282000f
043e0912
4a2426ff
000000b4
e0301000
80001212
bf8c0770
d282000f
043e0b12
10241f0c
d282001b
044a150b
4a2430ff
000000b8
e0301000
80001212
bf8c0770
10240512
4a3832ff
000000b8
e0301000
80001c1c
bf8c0770
d2820012
044a071c
4a3834ff
000000b8
e0301000
80001c1c
bf8c0770
d2820012
044a091c
4a3826ff
000000b8
e0301000
80001c1c
bf8c0770
d2820012
044a0b1c
d282000b
046e250d
4a1830ff
000000bc
e0301000
80000c0c
bf8c0770
1018050c
4a1a32ff
000000bc
e0301000
80000d0d
bf8c0770
d282000c
0432070d
4a1a34ff
000000bc
e0301000
80000d0d
bf8c0770
d282000c
0432090d
4a1a26ff
000000bc
e0301000
80000d0d
bf8c0770
d2820002
04320b0d
0606050b
10080604
c2020109
bf8c007f
d2820003
045e0804
c2020100
bf8c007f
100a2204
c2020104
bf8c007f
d2820005
04162c04
c2020108
bf8c007f
d2820005
04160804
10160b05
d282000b
042e0703
c2020102
bf8c007f
10182204
c2020106
bf8c007f
d282000c
04322c04
c202010a
bf8c007f
d2820004
04320804
d282000b
042e0904
7e165b0b
10081704
10061703
100a1705
7e1602f2
f800020f
0b040305
c0820700
bf8c000f
e00c2000
80010b00
c2020129
c2028125
bf8c0070
7e000205
d2820003
0400090c
10000d03
c2020128
c2028124
bf8c007f
7e080205
d2820004
0410090b
d2820000
04020304
c202012a
c2028126
bf8c007f
7e020205
d2820005
0404090d
d2820000
04020f05
06001100
c2020113
bf8c007f
100c0004
10022103
d2820001
04061304
d2820001
04062905
06022b01
c2020117
bf8c007f
d2820006
041a0204
10061f03
d2820003
040e1504
d2820003
040e2505
06040503
c202011b
bf8c007f
d2820003
041a0404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080004
c2020116
bf8c007f
d2820004
04120204
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0004
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10000004
c2020114
bf8c007f
d2820000
04020204
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..203]
DCL TEMP[0..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: MOV TEMP[3].w, IMM[0].xxxx
  6: MOV TEMP[3].xyz, IN[5].xyzx
  7: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww
  8: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz
  9: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy
 10: UMUL TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx
 11: UARL ADDR[0].x, TEMP[7].xxxx
 12: MUL TEMP[7], CONST[ADDR[0].x+12], IN[1].xxxx
 13: UARL ADDR[0].x, TEMP[6].xxxx
 14: MAD TEMP[6], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[7]
 15: UARL ADDR[0].x, TEMP[5].xxxx
 16: MAD TEMP[5], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[6]
 17: UARL ADDR[0].x, TEMP[4].xxxx
 18: UARL ADDR[0].x, TEMP[4].xxxx
 19: MAD TEMP[4], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[5]
 20: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 21: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 22: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 23: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 24: UARL ADDR[0].x, TEMP[8].xxxx
 25: MUL TEMP[8], CONST[ADDR[0].x+12], IN[1].xxxx
 26: UARL ADDR[0].x, TEMP[7].xxxx
 27: MAD TEMP[7], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[8]
 28: UARL ADDR[0].x, TEMP[6].xxxx
 29: MAD TEMP[6], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[7]
 30: UARL ADDR[0].x, TEMP[5].xxxx
 31: UARL ADDR[0].x, TEMP[5].xxxx
 32: MAD TEMP[5], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[6]
 33: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 34: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 35: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 36: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 37: UARL ADDR[0].x, TEMP[0].xxxx
 38: MUL TEMP[0], CONST[ADDR[0].x+12], IN[1].xxxx
 39: UARL ADDR[0].x, TEMP[8].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[0]
 41: UARL ADDR[0].x, TEMP[7].xxxx
 42: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[0]
 43: UARL ADDR[0].x, TEMP[6].xxxx
 44: UARL ADDR[0].x, TEMP[6].xxxx
 45: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[0]
 46: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 47: DP4 TEMP[7].x, TEMP[1], TEMP[5]
 48: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 49: DP4 TEMP[8].x, TEMP[2], TEMP[4]
 50: DP4 TEMP[9].x, TEMP[2], TEMP[5]
 51: MOV TEMP[8].y, TEMP[9].xxxx
 52: DP4 TEMP[2].x, TEMP[2], TEMP[0]
 53: MOV TEMP[8].z, TEMP[2].xxxx
 54: DP4 TEMP[2].x, TEMP[3], TEMP[4]
 55: DP4 TEMP[4].x, TEMP[3], TEMP[5]
 56: MOV TEMP[2].y, TEMP[4].xxxx
 57: DP4 TEMP[0].x, TEMP[3], TEMP[0]
 58: MOV TEMP[2].z, TEMP[0].xxxx
 59: MUL TEMP[0], CONST[4], TEMP[6].xxxx
 60: MAD TEMP[0], CONST[5], TEMP[7].xxxx, TEMP[0]
 61: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 62: ADD TEMP[0], TEMP[0], CONST[7]
 63: MUL TEMP[1].xyz, TEMP[8].xyzz, CONST[9].wwww
 64: MUL TEMP[3], CONST[0], TEMP[1].xxxx
 65: MAD TEMP[3], CONST[1], TEMP[1].yyyy, TEMP[3]
 66: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[3]
 67: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
 68: RSQ TEMP[3].x, TEMP[3].xxxx
 69: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
 70: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[9].wwww
 71: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 72: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 73: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 74: MAD TEMP[3].xy, IN[4].xyyy, CONST[8].xyyy, CONST[8].zwww
 75: MOV TEMP[3].zw, TEMP[1].yyxy
 76: MOV TEMP[4].x, TEMP[1].zzzz
 77: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 78: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 79: MOV TEMP[4].yzw, TEMP[1].yxyz
 80: MOV TEMP[1].xyz, TEMP[2].xyzx
 81: MOV OUT[1], TEMP[3]
 82: MOV OUT[3], TEMP[1]
 83: MOV OUT[2], TEMP[4]
 84: MOV OUT[0], TEMP[0]
 85: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = fptosi float %63 to i32
  %85 = fptosi float %64 to i32
  %86 = fptosi float %65 to i32
  %87 = fptosi float %66 to i32
  %88 = bitcast i32 %84 to float
  %89 = bitcast i32 %85 to float
  %90 = bitcast i32 %86 to float
  %91 = bitcast i32 %87 to float
  %92 = fmul float %50, %44
  %93 = fadd float %92, %41
  %94 = fmul float %51, %45
  %95 = fadd float %94, %42
  %96 = fmul float %52, %46
  %97 = fadd float %96, %43
  %98 = bitcast float %91 to i32
  %99 = mul i32 3, %98
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %90 to i32
  %102 = mul i32 3, %101
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %89 to i32
  %105 = mul i32 3, %104
  %106 = bitcast i32 %105 to float
  %107 = bitcast float %88 to i32
  %108 = mul i32 3, %107
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = shl i32 %110, 4
  %112 = add i32 %111, 192
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %56
  %115 = shl i32 %110, 4
  %116 = add i32 %115, 196
  %117 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %116)
  %118 = fmul float %117, %56
  %119 = shl i32 %110, 4
  %120 = add i32 %119, 200
  %121 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %120)
  %122 = fmul float %121, %56
  %123 = shl i32 %110, 4
  %124 = add i32 %123, 204
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %56
  %127 = bitcast float %106 to i32
  %128 = shl i32 %127, 4
  %129 = add i32 %128, 192
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %57
  %132 = fadd float %131, %114
  %133 = shl i32 %127, 4
  %134 = add i32 %133, 196
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %57
  %137 = fadd float %136, %118
  %138 = shl i32 %127, 4
  %139 = add i32 %138, 200
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %57
  %142 = fadd float %141, %122
  %143 = shl i32 %127, 4
  %144 = add i32 %143, 204
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %57
  %147 = fadd float %146, %126
  %148 = bitcast float %103 to i32
  %149 = shl i32 %148, 4
  %150 = add i32 %149, 192
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %58
  %153 = fadd float %152, %132
  %154 = shl i32 %148, 4
  %155 = add i32 %154, 196
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %58
  %158 = fadd float %157, %137
  %159 = shl i32 %148, 4
  %160 = add i32 %159, 200
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %58
  %163 = fadd float %162, %142
  %164 = shl i32 %148, 4
  %165 = add i32 %164, 204
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %58
  %168 = fadd float %167, %147
  %169 = bitcast float %100 to i32
  %170 = shl i32 %169, 4
  %171 = add i32 %170, 192
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %59
  %174 = fadd float %173, %153
  %175 = shl i32 %169, 4
  %176 = add i32 %175, 196
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %59
  %179 = fadd float %178, %158
  %180 = shl i32 %169, 4
  %181 = add i32 %180, 200
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %59
  %184 = fadd float %183, %163
  %185 = shl i32 %169, 4
  %186 = add i32 %185, 204
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %59
  %189 = fadd float %188, %168
  %190 = bitcast float %91 to i32
  %191 = mul i32 3, %190
  %192 = add i32 %191, 1
  %193 = bitcast i32 %192 to float
  %194 = bitcast float %90 to i32
  %195 = mul i32 3, %194
  %196 = add i32 %195, 1
  %197 = bitcast i32 %196 to float
  %198 = bitcast float %89 to i32
  %199 = mul i32 3, %198
  %200 = add i32 %199, 1
  %201 = bitcast i32 %200 to float
  %202 = bitcast float %88 to i32
  %203 = mul i32 3, %202
  %204 = add i32 %203, 1
  %205 = bitcast i32 %204 to float
  %206 = bitcast float %205 to i32
  %207 = shl i32 %206, 4
  %208 = add i32 %207, 192
  %209 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %208)
  %210 = fmul float %209, %56
  %211 = shl i32 %206, 4
  %212 = add i32 %211, 196
  %213 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %212)
  %214 = fmul float %213, %56
  %215 = shl i32 %206, 4
  %216 = add i32 %215, 200
  %217 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %216)
  %218 = fmul float %217, %56
  %219 = shl i32 %206, 4
  %220 = add i32 %219, 204
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %56
  %223 = bitcast float %201 to i32
  %224 = shl i32 %223, 4
  %225 = add i32 %224, 192
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %57
  %228 = fadd float %227, %210
  %229 = shl i32 %223, 4
  %230 = add i32 %229, 196
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %57
  %233 = fadd float %232, %214
  %234 = shl i32 %223, 4
  %235 = add i32 %234, 200
  %236 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %235)
  %237 = fmul float %236, %57
  %238 = fadd float %237, %218
  %239 = shl i32 %223, 4
  %240 = add i32 %239, 204
  %241 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %240)
  %242 = fmul float %241, %57
  %243 = fadd float %242, %222
  %244 = bitcast float %197 to i32
  %245 = shl i32 %244, 4
  %246 = add i32 %245, 192
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %58
  %249 = fadd float %248, %228
  %250 = shl i32 %244, 4
  %251 = add i32 %250, 196
  %252 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %251)
  %253 = fmul float %252, %58
  %254 = fadd float %253, %233
  %255 = shl i32 %244, 4
  %256 = add i32 %255, 200
  %257 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %256)
  %258 = fmul float %257, %58
  %259 = fadd float %258, %238
  %260 = shl i32 %244, 4
  %261 = add i32 %260, 204
  %262 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %261)
  %263 = fmul float %262, %58
  %264 = fadd float %263, %243
  %265 = bitcast float %193 to i32
  %266 = shl i32 %265, 4
  %267 = add i32 %266, 192
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %59
  %270 = fadd float %269, %249
  %271 = shl i32 %265, 4
  %272 = add i32 %271, 196
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = fmul float %273, %59
  %275 = fadd float %274, %254
  %276 = shl i32 %265, 4
  %277 = add i32 %276, 200
  %278 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %277)
  %279 = fmul float %278, %59
  %280 = fadd float %279, %259
  %281 = shl i32 %265, 4
  %282 = add i32 %281, 204
  %283 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %282)
  %284 = fmul float %283, %59
  %285 = fadd float %284, %264
  %286 = bitcast float %91 to i32
  %287 = mul i32 3, %286
  %288 = add i32 %287, 2
  %289 = bitcast i32 %288 to float
  %290 = bitcast float %90 to i32
  %291 = mul i32 3, %290
  %292 = add i32 %291, 2
  %293 = bitcast i32 %292 to float
  %294 = bitcast float %89 to i32
  %295 = mul i32 3, %294
  %296 = add i32 %295, 2
  %297 = bitcast i32 %296 to float
  %298 = bitcast float %88 to i32
  %299 = mul i32 3, %298
  %300 = add i32 %299, 2
  %301 = bitcast i32 %300 to float
  %302 = bitcast float %301 to i32
  %303 = shl i32 %302, 4
  %304 = add i32 %303, 192
  %305 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %304)
  %306 = fmul float %305, %56
  %307 = shl i32 %302, 4
  %308 = add i32 %307, 196
  %309 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %308)
  %310 = fmul float %309, %56
  %311 = shl i32 %302, 4
  %312 = add i32 %311, 200
  %313 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %312)
  %314 = fmul float %313, %56
  %315 = shl i32 %302, 4
  %316 = add i32 %315, 204
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = fmul float %317, %56
  %319 = bitcast float %297 to i32
  %320 = shl i32 %319, 4
  %321 = add i32 %320, 192
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %57
  %324 = fadd float %323, %306
  %325 = shl i32 %319, 4
  %326 = add i32 %325, 196
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %57
  %329 = fadd float %328, %310
  %330 = shl i32 %319, 4
  %331 = add i32 %330, 200
  %332 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %331)
  %333 = fmul float %332, %57
  %334 = fadd float %333, %314
  %335 = shl i32 %319, 4
  %336 = add i32 %335, 204
  %337 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %336)
  %338 = fmul float %337, %57
  %339 = fadd float %338, %318
  %340 = bitcast float %293 to i32
  %341 = shl i32 %340, 4
  %342 = add i32 %341, 192
  %343 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %342)
  %344 = fmul float %343, %58
  %345 = fadd float %344, %324
  %346 = shl i32 %340, 4
  %347 = add i32 %346, 196
  %348 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %347)
  %349 = fmul float %348, %58
  %350 = fadd float %349, %329
  %351 = shl i32 %340, 4
  %352 = add i32 %351, 200
  %353 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %352)
  %354 = fmul float %353, %58
  %355 = fadd float %354, %334
  %356 = shl i32 %340, 4
  %357 = add i32 %356, 204
  %358 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %357)
  %359 = fmul float %358, %58
  %360 = fadd float %359, %339
  %361 = bitcast float %289 to i32
  %362 = shl i32 %361, 4
  %363 = add i32 %362, 192
  %364 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %363)
  %365 = fmul float %364, %59
  %366 = fadd float %365, %345
  %367 = shl i32 %361, 4
  %368 = add i32 %367, 196
  %369 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %368)
  %370 = fmul float %369, %59
  %371 = fadd float %370, %350
  %372 = shl i32 %361, 4
  %373 = add i32 %372, 200
  %374 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %373)
  %375 = fmul float %374, %59
  %376 = fadd float %375, %355
  %377 = shl i32 %361, 4
  %378 = add i32 %377, 204
  %379 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %378)
  %380 = fmul float %379, %59
  %381 = fadd float %380, %360
  %382 = fmul float %93, %174
  %383 = fmul float %95, %179
  %384 = fadd float %382, %383
  %385 = fmul float %97, %184
  %386 = fadd float %384, %385
  %387 = fmul float 1.000000e+00, %189
  %388 = fadd float %386, %387
  %389 = fmul float %93, %270
  %390 = fmul float %95, %275
  %391 = fadd float %389, %390
  %392 = fmul float %97, %280
  %393 = fadd float %391, %392
  %394 = fmul float 1.000000e+00, %285
  %395 = fadd float %393, %394
  %396 = fmul float %93, %366
  %397 = fmul float %95, %371
  %398 = fadd float %396, %397
  %399 = fmul float %97, %376
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %381
  %402 = fadd float %400, %401
  %403 = fmul float %70, %174
  %404 = fmul float %71, %179
  %405 = fadd float %403, %404
  %406 = fmul float %72, %184
  %407 = fadd float %405, %406
  %408 = fmul float 1.000000e+00, %189
  %409 = fadd float %407, %408
  %410 = fmul float %70, %270
  %411 = fmul float %71, %275
  %412 = fadd float %410, %411
  %413 = fmul float %72, %280
  %414 = fadd float %412, %413
  %415 = fmul float 1.000000e+00, %285
  %416 = fadd float %414, %415
  %417 = fmul float %70, %366
  %418 = fmul float %71, %371
  %419 = fadd float %417, %418
  %420 = fmul float %72, %376
  %421 = fadd float %419, %420
  %422 = fmul float 1.000000e+00, %381
  %423 = fadd float %421, %422
  %424 = fmul float %81, %174
  %425 = fmul float %82, %179
  %426 = fadd float %424, %425
  %427 = fmul float %83, %184
  %428 = fadd float %426, %427
  %429 = fmul float 1.000000e+00, %189
  %430 = fadd float %428, %429
  %431 = fmul float %81, %270
  %432 = fmul float %82, %275
  %433 = fadd float %431, %432
  %434 = fmul float %83, %280
  %435 = fadd float %433, %434
  %436 = fmul float 1.000000e+00, %285
  %437 = fadd float %435, %436
  %438 = fmul float %81, %366
  %439 = fmul float %82, %371
  %440 = fadd float %438, %439
  %441 = fmul float %83, %376
  %442 = fadd float %440, %441
  %443 = fmul float 1.000000e+00, %381
  %444 = fadd float %442, %443
  %445 = fmul float %20, %388
  %446 = fmul float %21, %388
  %447 = fmul float %22, %388
  %448 = fmul float %23, %388
  %449 = fmul float %24, %395
  %450 = fadd float %449, %445
  %451 = fmul float %25, %395
  %452 = fadd float %451, %446
  %453 = fmul float %26, %395
  %454 = fadd float %453, %447
  %455 = fmul float %27, %395
  %456 = fadd float %455, %448
  %457 = fmul float %28, %402
  %458 = fadd float %457, %450
  %459 = fmul float %29, %402
  %460 = fadd float %459, %452
  %461 = fmul float %30, %402
  %462 = fadd float %461, %454
  %463 = fmul float %31, %402
  %464 = fadd float %463, %456
  %465 = fadd float %458, %32
  %466 = fadd float %460, %33
  %467 = fadd float %462, %34
  %468 = fadd float %464, %35
  %469 = fmul float %409, %40
  %470 = fmul float %416, %40
  %471 = fmul float %423, %40
  %472 = fmul float %11, %469
  %473 = fmul float %12, %469
  %474 = fmul float %13, %469
  %475 = fmul float %14, %470
  %476 = fadd float %475, %472
  %477 = fmul float %15, %470
  %478 = fadd float %477, %473
  %479 = fmul float %16, %470
  %480 = fadd float %479, %474
  %481 = fmul float %17, %471
  %482 = fadd float %481, %476
  %483 = fmul float %18, %471
  %484 = fadd float %483, %478
  %485 = fmul float %19, %471
  %486 = fadd float %485, %480
  %487 = fmul float %482, %482
  %488 = fmul float %484, %484
  %489 = fadd float %488, %487
  %490 = fmul float %486, %486
  %491 = fadd float %489, %490
  %492 = call float @llvm.AMDGPU.rsq(float %491)
  %493 = fmul float %482, %492
  %494 = fmul float %484, %492
  %495 = fmul float %486, %492
  %496 = fmul float %430, %40
  %497 = fmul float %437, %40
  %498 = fmul float %444, %40
  %499 = fmul float %11, %496
  %500 = fmul float %12, %496
  %501 = fmul float %13, %496
  %502 = fmul float %14, %497
  %503 = fadd float %502, %499
  %504 = fmul float %15, %497
  %505 = fadd float %504, %500
  %506 = fmul float %16, %497
  %507 = fadd float %506, %501
  %508 = fmul float %17, %498
  %509 = fadd float %508, %503
  %510 = fmul float %18, %498
  %511 = fadd float %510, %505
  %512 = fmul float %19, %498
  %513 = fadd float %512, %507
  %514 = fmul float %76, %36
  %515 = fadd float %514, %38
  %516 = fmul float %77, %37
  %517 = fadd float %516, %39
  %518 = fmul float %513, %494
  %519 = fmul float %509, %495
  %520 = fmul float %511, %493
  %521 = fsub float -0.000000e+00, %518
  %522 = fmul float %511, %495
  %523 = fadd float %522, %521
  %524 = fsub float -0.000000e+00, %519
  %525 = fmul float %513, %493
  %526 = fadd float %525, %524
  %527 = fsub float -0.000000e+00, %520
  %528 = fmul float %509, %494
  %529 = fadd float %528, %527
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %515, float %517, float %493, float %494)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %495, float %523, float %526, float %529)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %509, float %511, float %513, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %465, float %466, float %467, float %468)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020e00
bf8c0770
7e02110e
d2d6000d
02010701
34081a84
4a0208ff
000000c0
c0800100
bf8c007f
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020600
bf8c0770
10020d01
7e04110f
d2d60012
02010702
340a2484
4a040aff
000000c0
e0301000
80000202
bf8c0770
d2820001
04060f02
7e041110
d2d60014
02010702
34142884
4a0414ff
000000c0
e0301000
80000202
bf8c0770
d2820001
04061102
7e041111
d2d60015
02010702
34162a84
4a0416ff
000000c0
e0301000
80000202
bf8c0770
d2820001
04061302
4a0408ff
000000c4
e0301000
80000202
bf8c0770
10040d02
4a060aff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a0f03
4a0614ff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a1103
4a0616ff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a1303
c084070c
bf8c007f
e00c2000
80020e00
bf8c0770
1006050f
d282000c
040e030e
4a0608ff
000000c8
e0301000
80000303
bf8c0770
10060d03
4a260aff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e0f13
4a2614ff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e1113
4a2616ff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e1313
d282000c
04320710
4a0808ff
000000cc
e0301000
80000404
bf8c0770
10080d04
4a0a0aff
000000cc
e0301000
80000505
bf8c0770
d2820004
04120f05
4a0a14ff
000000cc
e0301000
80000505
bf8c0770
d2820004
04121105
4a0a16ff
000000cc
e0301000
80000505
bf8c0770
d2820004
04121305
060a090c
c2020127
bf8c007f
10260a04
c2028101
bf8c007f
102e2605
4a0a1a81
34180a84
4a0a18ff
000000c0
e0301000
80000505
bf8c0770
100a0d05
4a142481
342c1484
4a142cff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
04160f0a
4a142881
34301484
4a1430ff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
0416110a
4a142a81
34321484
4a1432ff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
0416130a
4a1418ff
000000c4
e0301000
80000a0a
bf8c0770
10140d0a
4a162cff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a0f0b
4a1630ff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a110b
4a1632ff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a130b
1016150f
d282001a
042e0b0e
4a1618ff
000000c8
e0301000
80000b0b
bf8c0770
10160d0b
4a362cff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e0f1b
4a3630ff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e111b
4a3632ff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e131b
d282001a
046a1710
4a1818ff
000000cc
e0301000
80000c0c
bf8c0770
10180d0c
4a2c2cff
000000cc
e0301000
80001616
bf8c0770
d282000c
04320f16
4a2c30ff
000000cc
e0301000
80001616
bf8c0770
d282000c
04321116
4a2c32ff
000000cc
e0301000
80001616
bf8c0770
d282000c
04321316
062c191a
102c2c04
c2040105
bf8c007f
d2820017
045e2c08
4a1a1a82
34301a84
4a1a30ff
000000c0
e0301000
80000d0d
bf8c0770
101a0d0d
4a242482
34322484
4a2432ff
000000c0
e0301000
80001212
bf8c0770
d282000d
04360f12
4a242882
34342484
4a2434ff
000000c0
e0301000
80001212
bf8c0770
d282000d
04361112
4a242a82
342a2484
4a242aff
000000c0
e0301000
80001212
bf8c0770
d282000d
04361312
4a2430ff
000000c4
e0301000
80001212
bf8c0770
10240d12
4a2832ff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a0f14
4a2834ff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a1114
4a282aff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a1314
1028250f
d282001b
04521b0e
4a2830ff
000000c8
e0301000
80001414
bf8c0770
10280d14
4a3832ff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
04520f1c
4a3834ff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
0452111c
4a382aff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
0452131c
d282000e
046e2910
4a1e30ff
000000cc
e0301000
80000f0f
bf8c0770
101e0d0f
4a2032ff
000000cc
e0301000
80001010
bf8c0770
d282000f
043e0f10
4a2034ff
000000cc
e0301000
80001010
bf8c0770
d282000f
043e1110
4a202aff
000000cc
e0301000
80001010
bf8c0770
d2820006
043e1310
060e0d0e
10100e04
c2048109
bf8c007f
d2820007
045e1009
c2050100
bf8c007f
1012260a
c2058104
bf8c007f
d2820009
04262c0b
c2068108
bf8c007f
d282000f
0426100d
10121f0f
d282000e
04260f07
c2060102
bf8c007f
1012260c
c2070106
bf8c007f
d2820009
04262c0e
c207810a
bf8c007f
d2820009
0426100f
d2820008
043a1309
7e1c5b08
100e1d07
10101d0f
c0880710
bf8c007f
e00c2000
80041500
c2080121
c2088123
bf8c0070
7e1e0211
d282000f
043c2116
c2080120
c2088122
bf8c007f
7e200211
d2820010
04402115
f800020f
07080f10
c0880714
bf8c000f
e00c2000
80041500
bf8c0770
101e0516
d282000f
043e0315
d282000f
043e0717
061e090f
101e1e04
10201e05
10221516
d2820011
04460b15
d2820011
04461717
06221911
10222204
d2820010
04422208
10262516
d2820013
044e1b15
d2820013
044e2917
06260d13
102a2604
d2820010
04422a09
102c1110
10261e0a
d2820013
044e220b
d2820013
044e2a0d
102e0f13
082c2d17
10121d09
102e1313
101c1e0c
d282000e
043a220e
d282000e
043a2a0f
1010110e
08102f08
100e0f0e
101e1310
080e0f0f
f800021f
16080709
bf8c070f
7e0e02f2
f800022f
070e1013
c0820700
bf8c000f
e00c2000
80010e00
c202012d
c2028129
bf8c0070
7e000205
d2820007
0400090f
10000507
c202012c
c2028128
bf8c007f
7e040205
d2820002
0408090e
d2820000
04020302
c202012e
c202812a
bf8c007f
7e020205
d2820008
04040910
d2820000
04020708
06000900
c2020113
bf8c007f
10060004
10021507
d2820001
04060b02
d2820001
04061708
06021901
c2020117
bf8c007f
d2820003
040e0204
10082507
d2820002
04121b02
d2820002
040a2908
06040d02
c202011b
bf8c007f
d2820003
040e0404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080004
c2020116
bf8c007f
d2820004
04120204
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0004
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10000004
c2020114
bf8c007f
d2820000
04020204
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..3], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
  9: RSQ TEMP[3].x, TEMP[3].xxxx
 10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 13: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 14: F2I TEMP[1].x, -TEMP[1]
 15: UIF TEMP[1].xxxx :1
 16:   MOV TEMP[1].x, IMM[0].yyyy
 17: ELSE :1
 18:   MOV TEMP[1].x, IMM[0].wwww
 19: ENDIF
 20: MOV TEMP[2].z, TEMP[1].xxxx
 21: MUL TEMP[1].x, CONST[2].xxxx, IMM[1].xxxx
 22: MOV TEMP[3].xy, IN[1].xyyy
 23: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
 24: MAD TEMP[3].x, TEMP[3].wwww, CONST[1].yyyy, CONST[1].zzzz
 25: SLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
 26: F2I TEMP[3].x, -TEMP[3]
 27: UIF TEMP[3].xxxx :1
 28:   KILL
 29: ENDIF
 30: MOV TEMP[1].x, TEMP[1].xxxx
 31: MOV TEMP[1].yzw, TEMP[2].yxyz
 32: MOV OUT[0], TEMP[1]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %26 = load <32 x i8> addrspace(2)* %25, !tbaa !0
  %27 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = fcmp ugt float %16, 0.000000e+00
  %30 = select i1 %29, float 1.000000e+00, float 0.000000e+00
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.AMDIL.clamp.(float %30, float 0.000000e+00, float 1.000000e+00)
  %37 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %38 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %39 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %40 = bitcast float %36 to i32
  %41 = icmp ne i32 %40, 0
  %. = select i1 %41, float -1.000000e+00, float 1.000000e+00
  %42 = fmul float %33, %33
  %43 = fmul float %34, %34
  %44 = fadd float %43, %42
  %45 = fmul float %35, %35
  %46 = fadd float %44, %45
  %47 = call float @llvm.AMDGPU.rsq(float %46)
  %48 = fmul float %33, %47
  %49 = fmul float %34, %47
  %50 = fmul float %35, %47
  %51 = fmul float %48, %.
  %52 = fmul float %49, %.
  %53 = fmul float %50, %.
  %54 = fmul float %51, 5.000000e-01
  %55 = fadd float %54, 5.000000e-01
  %56 = fmul float %52, 5.000000e-01
  %57 = fadd float %56, 5.000000e-01
  %58 = fcmp uge float %53, 0.000000e+00
  %59 = select i1 %58, float 1.000000e+00, float 0.000000e+00
  %60 = fsub float -0.000000e+00, %59
  %61 = fptosi float %60 to i32
  %62 = bitcast i32 %61 to float
  %63 = bitcast float %62 to i32
  %64 = icmp ne i32 %63, 0
  %temp4.0 = select i1 %64, float 1.000000e+00, float 0.000000e+00
  %65 = fmul float %24, 9.765625e-04
  %66 = bitcast float %31 to i32
  %67 = bitcast float %32 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %26, <16 x i8> %28, i32 2)
  %71 = extractelement <4 x float> %70, i32 3
  %72 = fmul float %71, %22
  %73 = fadd float %72, %23
  %74 = fcmp ult float %73, 0.000000e+00
  %75 = select i1 %74, float 1.000000e+00, float 0.000000e+00
  %76 = fsub float -0.000000e+00, %75
  %77 = fptosi float %76 to i32
  %78 = bitcast i32 %77 to float
  %79 = bitcast float %78 to i32
  %80 = icmp ne i32 %79, 0
  br i1 %80, label %IF20, label %ENDIF19

IF20:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF19

ENDIF19:                                          ; preds = %main_body, %IF20
  %81 = call i32 @llvm.SI.packf16(float %65, float %55)
  %82 = bitcast i32 %81 to float
  %83 = call i32 @llvm.SI.packf16(float %57, float %temp4.0)
  %84 = bitcast i32 %83 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %82, float %84, float %82, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100300
c8110301
c80c0200
c80d0201
100a0703
d2820006
04160904
c8140400
c8150401
d2820006
041a0b05
7e0c5b06
100e0d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000004
0021e6f2
10040907
d2820002
03c1e102
10060d03
10060903
d2820003
03c1e103
100a0d05
10080905
d00c0008
02010104
d2000004
0021e480
d2060004
22010104
7e081104
d10a0008
02010104
d2000004
0021e480
c8180100
c8190101
c8140000
c8150001
c0840300
c0c60500
bf8c007f
f0800800
00430005
c0800100
bf8c0070
c2020105
c2028106
bf8c007f
7e020205
d2820000
04040900
d0020004
02010100
d2000000
0011e480
d2060000
22010100
7e001100
d10a0004
02010100
c2000108
7e0002ff
3a800000
bf8c007f
10000000
be802404
8980007e
7e0202f3
7c260280
88fe007e
5e020902
5e000700
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..11]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MAD TEMP[2].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 13: MOV TEMP[2].zw, TEMP[1].yyxy
 14: MOV TEMP[1].x, TEMP[1].zzzz
 15: MOV OUT[1], TEMP[2]
 16: MOV OUT[2], TEMP[1]
 17: MOV OUT[0], TEMP[0]
 18: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = fmul float %50, %44
  %65 = fadd float %64, %41
  %66 = fmul float %51, %45
  %67 = fadd float %66, %42
  %68 = fmul float %52, %46
  %69 = fadd float %68, %43
  %70 = fmul float %20, %65
  %71 = fmul float %21, %65
  %72 = fmul float %22, %65
  %73 = fmul float %23, %65
  %74 = fmul float %24, %67
  %75 = fadd float %74, %70
  %76 = fmul float %25, %67
  %77 = fadd float %76, %71
  %78 = fmul float %26, %67
  %79 = fadd float %78, %72
  %80 = fmul float %27, %67
  %81 = fadd float %80, %73
  %82 = fmul float %28, %69
  %83 = fadd float %82, %75
  %84 = fmul float %29, %69
  %85 = fadd float %84, %77
  %86 = fmul float %30, %69
  %87 = fadd float %86, %79
  %88 = fmul float %31, %69
  %89 = fadd float %88, %81
  %90 = fadd float %83, %32
  %91 = fadd float %85, %33
  %92 = fadd float %87, %34
  %93 = fadd float %89, %35
  %94 = fmul float %56, %40
  %95 = fmul float %57, %40
  %96 = fmul float %58, %40
  %97 = fmul float %11, %94
  %98 = fmul float %12, %94
  %99 = fmul float %13, %94
  %100 = fmul float %14, %95
  %101 = fadd float %100, %97
  %102 = fmul float %15, %95
  %103 = fadd float %102, %98
  %104 = fmul float %16, %95
  %105 = fadd float %104, %99
  %106 = fmul float %17, %96
  %107 = fadd float %106, %101
  %108 = fmul float %18, %96
  %109 = fadd float %108, %103
  %110 = fmul float %19, %96
  %111 = fadd float %110, %105
  %112 = fmul float %107, %107
  %113 = fmul float %109, %109
  %114 = fadd float %113, %112
  %115 = fmul float %111, %111
  %116 = fadd float %114, %115
  %117 = call float @llvm.AMDGPU.rsq(float %116)
  %118 = fmul float %107, %117
  %119 = fmul float %109, %117
  %120 = fmul float %111, %117
  %121 = fmul float %62, %36
  %122 = fadd float %121, %38
  %123 = fmul float %63, %37
  %124 = fadd float %123, %39
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %122, float %124, float %118, float %119)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %119, float %120, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %90, float %91, float %92, float %93)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820002
04060804
d2820001
041a0502
7e085b01
10020903
10060905
c0840708
bf8c007f
e00c2000
80020500
c2020121
c2028123
bf8c0070
7e120205
d2820009
04240906
c2020120
c2028122
bf8c007f
7e140205
d2820005
04280905
f800020f
01030905
100e0902
c0820700
bf8c000f
e00c2000
80010300
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020113
bf8c007f
10100404
c2020117
bf8c007f
d2820008
04220004
f800021f
08070107
c202012e
c202812a
bf8c000f
7e020205
d2820001
04040905
c202011b
bf8c007f
d2820003
04220204
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080404
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120204
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160204
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820000
040a0004
c2020118
bf8c007f
d2820000
04020204
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..10]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[8].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MOV OUT[1], TEMP[1]
 13: MOV OUT[0], TEMP[0]
 14: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = fmul float %46, %40
  %56 = fadd float %55, %37
  %57 = fmul float %47, %41
  %58 = fadd float %57, %38
  %59 = fmul float %48, %42
  %60 = fadd float %59, %39
  %61 = fmul float %20, %56
  %62 = fmul float %21, %56
  %63 = fmul float %22, %56
  %64 = fmul float %23, %56
  %65 = fmul float %24, %58
  %66 = fadd float %65, %61
  %67 = fmul float %25, %58
  %68 = fadd float %67, %62
  %69 = fmul float %26, %58
  %70 = fadd float %69, %63
  %71 = fmul float %27, %58
  %72 = fadd float %71, %64
  %73 = fmul float %28, %60
  %74 = fadd float %73, %66
  %75 = fmul float %29, %60
  %76 = fadd float %75, %68
  %77 = fmul float %30, %60
  %78 = fadd float %77, %70
  %79 = fmul float %31, %60
  %80 = fadd float %79, %72
  %81 = fadd float %74, %32
  %82 = fadd float %76, %33
  %83 = fadd float %78, %34
  %84 = fadd float %80, %35
  %85 = fmul float %52, %36
  %86 = fmul float %53, %36
  %87 = fmul float %54, %36
  %88 = fmul float %11, %85
  %89 = fmul float %12, %85
  %90 = fmul float %13, %85
  %91 = fmul float %14, %86
  %92 = fadd float %91, %88
  %93 = fmul float %15, %86
  %94 = fadd float %93, %89
  %95 = fmul float %16, %86
  %96 = fadd float %95, %90
  %97 = fmul float %17, %87
  %98 = fadd float %97, %92
  %99 = fmul float %18, %87
  %100 = fadd float %99, %94
  %101 = fmul float %19, %87
  %102 = fadd float %101, %96
  %103 = fmul float %98, %98
  %104 = fmul float %100, %100
  %105 = fadd float %104, %103
  %106 = fmul float %102, %102
  %107 = fadd float %105, %106
  %108 = call float @llvm.AMDGPU.rsq(float %107)
  %109 = fmul float %98, %108
  %110 = fmul float %100, %108
  %111 = fmul float %102, %108
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %109, float %110, float %111, float %72)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020123
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820001
04060804
d2820002
041a0301
7e045b02
100c0501
100e0503
10100505
c0820700
bf8c007f
e00c2000
80010200
c2020129
c2028125
bf8c0070
7e000205
d2820000
04000903
c2020128
c2028124
bf8c007f
7e020205
d2820001
04040902
c2020113
bf8c007f
10120204
c2020117
bf8c007f
d2820009
04260004
f800020f
09060708
c202012a
c2028126
bf8c000f
7e0c0205
d2820002
04180904
c202011b
bf8c007f
d2820003
04260404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..11]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MUL TEMP[2].xyz, IN[3].xyzz, CONST[9].wwww
 13: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 14: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 15: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 16: MAD TEMP[3].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 17: MOV TEMP[3].zw, TEMP[1].yyxy
 18: MOV TEMP[4].x, TEMP[1].zzzz
 19: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 20: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 21: MOV TEMP[4].yzw, TEMP[1].yxyz
 22: MOV TEMP[1].xyz, TEMP[2].xyzx
 23: MOV OUT[1], TEMP[3]
 24: MOV OUT[3], TEMP[1]
 25: MOV OUT[2], TEMP[4]
 26: MOV OUT[0], TEMP[0]
 27: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = fmul float %50, %44
  %71 = fadd float %70, %41
  %72 = fmul float %51, %45
  %73 = fadd float %72, %42
  %74 = fmul float %52, %46
  %75 = fadd float %74, %43
  %76 = fmul float %20, %71
  %77 = fmul float %21, %71
  %78 = fmul float %22, %71
  %79 = fmul float %23, %71
  %80 = fmul float %24, %73
  %81 = fadd float %80, %76
  %82 = fmul float %25, %73
  %83 = fadd float %82, %77
  %84 = fmul float %26, %73
  %85 = fadd float %84, %78
  %86 = fmul float %27, %73
  %87 = fadd float %86, %79
  %88 = fmul float %28, %75
  %89 = fadd float %88, %81
  %90 = fmul float %29, %75
  %91 = fadd float %90, %83
  %92 = fmul float %30, %75
  %93 = fadd float %92, %85
  %94 = fmul float %31, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %32
  %97 = fadd float %91, %33
  %98 = fadd float %93, %34
  %99 = fadd float %95, %35
  %100 = fmul float %56, %40
  %101 = fmul float %57, %40
  %102 = fmul float %58, %40
  %103 = fmul float %11, %100
  %104 = fmul float %12, %100
  %105 = fmul float %13, %100
  %106 = fmul float %14, %101
  %107 = fadd float %106, %103
  %108 = fmul float %15, %101
  %109 = fadd float %108, %104
  %110 = fmul float %16, %101
  %111 = fadd float %110, %105
  %112 = fmul float %17, %102
  %113 = fadd float %112, %107
  %114 = fmul float %18, %102
  %115 = fadd float %114, %109
  %116 = fmul float %19, %102
  %117 = fadd float %116, %111
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %67, %40
  %128 = fmul float %68, %40
  %129 = fmul float %69, %40
  %130 = fmul float %11, %127
  %131 = fmul float %12, %127
  %132 = fmul float %13, %127
  %133 = fmul float %14, %128
  %134 = fadd float %133, %130
  %135 = fmul float %15, %128
  %136 = fadd float %135, %131
  %137 = fmul float %16, %128
  %138 = fadd float %137, %132
  %139 = fmul float %17, %129
  %140 = fadd float %139, %134
  %141 = fmul float %18, %129
  %142 = fadd float %141, %136
  %143 = fmul float %19, %129
  %144 = fadd float %143, %138
  %145 = fmul float %62, %36
  %146 = fadd float %145, %38
  %147 = fmul float %63, %37
  %148 = fadd float %147, %39
  %149 = fmul float %144, %125
  %150 = fmul float %140, %126
  %151 = fmul float %142, %124
  %152 = fsub float -0.000000e+00, %149
  %153 = fmul float %142, %126
  %154 = fadd float %153, %152
  %155 = fsub float -0.000000e+00, %150
  %156 = fmul float %144, %124
  %157 = fadd float %156, %155
  %158 = fsub float -0.000000e+00, %151
  %159 = fmul float %140, %125
  %160 = fadd float %159, %158
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %146, float %148, float %124, float %125)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %126, float %154, float %157, float %160)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %140, float %142, float %144, float %87)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2040105
bf8c007f
d2820007
041e0208
10060a04
c2048109
bf8c007f
d2820004
041e0609
c2050100
bf8c007f
100a040a
c2058104
bf8c007f
d2820005
0416020b
c2068108
bf8c007f
d2820006
0416060d
100a0d06
d2820005
04160904
c2060102
bf8c007f
1004040c
c2070106
bf8c007f
d2820001
040a020e
c207810a
bf8c007f
d2820003
0406060f
d2820001
04160703
7e0a5b01
10020b04
10040b06
c0880708
bf8c007f
e00c2000
80040600
c2080121
c2088123
bf8c0070
7e080211
d2820004
04102107
c2080120
c2088122
bf8c007f
7e140211
d2820006
04282106
f800020f
01020406
c088070c
bf8c000f
e00c2000
80040900
bf8c0770
100e1404
10101204
10081005
d2820004
04120e08
10121604
d2820004
04121209
10140504
100c100a
d2820006
041a0e0b
d2820006
041a120d
10160306
0814150b
100a0b03
10160b06
1006100c
d2820003
040e0e0e
d2820003
040e120f
10040503
08041702
10020303
100e0b04
08020307
f800021f
0a020105
c0820700
bf8c000f
e00c2000
80010700
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000908
c202012c
c2028128
bf8c007f
7e020205
d2820001
04040907
c2020113
bf8c007f
10040204
c2020117
bf8c007f
d2820005
040a0004
f800022f
05030406
c202012e
c202812a
bf8c000f
7e040205
d2820002
04080909
c202011b
bf8c007f
d2820003
04160404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080204
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0204
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820000
04060004
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..3], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
  9: RSQ TEMP[3].x, TEMP[3].xxxx
 10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 13: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 14: F2I TEMP[1].x, -TEMP[1]
 15: UIF TEMP[1].xxxx :1
 16:   MOV TEMP[1].x, IMM[0].yyyy
 17: ELSE :1
 18:   MOV TEMP[1].x, IMM[0].wwww
 19: ENDIF
 20: MOV TEMP[2].z, TEMP[1].xxxx
 21: MUL TEMP[1].x, CONST[2].xxxx, IMM[1].xxxx
 22: MOV TEMP[3].xy, IN[1].xyyy
 23: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
 24: MAD TEMP[3].x, TEMP[3].wwww, CONST[1].yyyy, CONST[1].zzzz
 25: SLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
 26: F2I TEMP[3].x, -TEMP[3]
 27: UIF TEMP[3].xxxx :1
 28:   KILL
 29: ENDIF
 30: MOV TEMP[1].x, TEMP[1].xxxx
 31: MOV TEMP[1].yzw, TEMP[2].yxyz
 32: MOV OUT[0], TEMP[1]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 24)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %26 = load <32 x i8> addrspace(2)* %25, !tbaa !0
  %27 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = fcmp ugt float %16, 0.000000e+00
  %30 = select i1 %29, float 1.000000e+00, float 0.000000e+00
  %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.AMDIL.clamp.(float %30, float 0.000000e+00, float 1.000000e+00)
  %37 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %38 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %39 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %40 = bitcast float %36 to i32
  %41 = icmp ne i32 %40, 0
  %. = select i1 %41, float -1.000000e+00, float 1.000000e+00
  %42 = fmul float %33, %33
  %43 = fmul float %34, %34
  %44 = fadd float %43, %42
  %45 = fmul float %35, %35
  %46 = fadd float %44, %45
  %47 = call float @llvm.AMDGPU.rsq(float %46)
  %48 = fmul float %33, %47
  %49 = fmul float %34, %47
  %50 = fmul float %35, %47
  %51 = fmul float %48, %.
  %52 = fmul float %49, %.
  %53 = fmul float %50, %.
  %54 = fmul float %51, 5.000000e-01
  %55 = fadd float %54, 5.000000e-01
  %56 = fmul float %52, 5.000000e-01
  %57 = fadd float %56, 5.000000e-01
  %58 = fcmp uge float %53, 0.000000e+00
  %59 = select i1 %58, float 1.000000e+00, float 0.000000e+00
  %60 = fsub float -0.000000e+00, %59
  %61 = fptosi float %60 to i32
  %62 = bitcast i32 %61 to float
  %63 = bitcast float %62 to i32
  %64 = icmp ne i32 %63, 0
  %temp4.0 = select i1 %64, float 1.000000e+00, float 0.000000e+00
  %65 = fmul float %24, 9.765625e-04
  %66 = bitcast float %31 to i32
  %67 = bitcast float %32 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %26, <16 x i8> %28, i32 2)
  %71 = extractelement <4 x float> %70, i32 3
  %72 = fmul float %71, %22
  %73 = fadd float %72, %23
  %74 = fcmp ult float %73, 0.000000e+00
  %75 = select i1 %74, float 1.000000e+00, float 0.000000e+00
  %76 = fsub float -0.000000e+00, %75
  %77 = fptosi float %76 to i32
  %78 = bitcast i32 %77 to float
  %79 = bitcast float %78 to i32
  %80 = icmp ne i32 %79, 0
  br i1 %80, label %IF20, label %ENDIF19

IF20:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF19

ENDIF19:                                          ; preds = %main_body, %IF20
  %81 = call i32 @llvm.SI.packf16(float %65, float %55)
  %82 = bitcast i32 %81 to float
  %83 = call i32 @llvm.SI.packf16(float %57, float %temp4.0)
  %84 = bitcast i32 %83 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %82, float %84, float %82, float %84)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100300
c8110301
c80c0200
c80d0201
100a0703
d2820006
04160904
c8140400
c8150401
d2820006
041a0b05
7e0c5b06
100e0d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000004
0021e6f2
10040907
d2820002
03c1e102
10060d03
10060903
d2820003
03c1e103
100a0d05
10080905
d00c0008
02010104
d2000004
0021e480
d2060004
22010104
7e081104
d10a0008
02010104
d2000004
0021e480
c8180100
c8190101
c8140000
c8150001
c0840300
c0c60500
bf8c007f
f0800800
00430005
c0800100
bf8c0070
c2020105
c2028106
bf8c007f
7e020205
d2820000
04040900
d0020004
02010100
d2000000
0011e480
d2060000
22010100
7e001100
d10a0004
02010100
c2000108
7e0002ff
3a800000
bf8c007f
10000000
be802404
8980007e
7e0202f3
7c260280
88fe007e
5e020902
5e000700
f8001c0f
01000100
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..11]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[0], TEMP[0], CONST[7]
  5: MUL TEMP[1].xyz, IN[1].xyzz, CONST[9].wwww
  6: MUL TEMP[2], CONST[0], TEMP[1].xxxx
  7: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
  8: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
  9: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 10: RSQ TEMP[2].x, TEMP[2].xxxx
 11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 12: MAD TEMP[2].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
 13: MOV TEMP[2].zw, TEMP[1].yyxy
 14: MOV TEMP[1].x, TEMP[1].zzzz
 15: MOV OUT[1], TEMP[2]
 16: MOV OUT[2], TEMP[1]
 17: MOV OUT[0], TEMP[0]
 18: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = fmul float %50, %44
  %65 = fadd float %64, %41
  %66 = fmul float %51, %45
  %67 = fadd float %66, %42
  %68 = fmul float %52, %46
  %69 = fadd float %68, %43
  %70 = fmul float %20, %65
  %71 = fmul float %21, %65
  %72 = fmul float %22, %65
  %73 = fmul float %23, %65
  %74 = fmul float %24, %67
  %75 = fadd float %74, %70
  %76 = fmul float %25, %67
  %77 = fadd float %76, %71
  %78 = fmul float %26, %67
  %79 = fadd float %78, %72
  %80 = fmul float %27, %67
  %81 = fadd float %80, %73
  %82 = fmul float %28, %69
  %83 = fadd float %82, %75
  %84 = fmul float %29, %69
  %85 = fadd float %84, %77
  %86 = fmul float %30, %69
  %87 = fadd float %86, %79
  %88 = fmul float %31, %69
  %89 = fadd float %88, %81
  %90 = fadd float %83, %32
  %91 = fadd float %85, %33
  %92 = fadd float %87, %34
  %93 = fadd float %89, %35
  %94 = fmul float %56, %40
  %95 = fmul float %57, %40
  %96 = fmul float %58, %40
  %97 = fmul float %11, %94
  %98 = fmul float %12, %94
  %99 = fmul float %13, %94
  %100 = fmul float %14, %95
  %101 = fadd float %100, %97
  %102 = fmul float %15, %95
  %103 = fadd float %102, %98
  %104 = fmul float %16, %95
  %105 = fadd float %104, %99
  %106 = fmul float %17, %96
  %107 = fadd float %106, %101
  %108 = fmul float %18, %96
  %109 = fadd float %108, %103
  %110 = fmul float %19, %96
  %111 = fadd float %110, %105
  %112 = fmul float %107, %107
  %113 = fmul float %109, %109
  %114 = fadd float %113, %112
  %115 = fmul float %111, %111
  %116 = fadd float %114, %115
  %117 = call float @llvm.AMDGPU.rsq(float %116)
  %118 = fmul float %107, %117
  %119 = fmul float %109, %117
  %120 = fmul float %111, %117
  %121 = fmul float %62, %36
  %122 = fadd float %121, %38
  %123 = fmul float %63, %37
  %124 = fadd float %123, %39
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %122, float %124, float %118, float %119)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %119, float %120, float %81)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %90, float %91, float %92, float %93)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840704
bf8c007f
e00c2000
80020300
c0800100
bf8c0070
c2020127
bf8c007f
10020804
10040604
c2028101
bf8c007f
100e0405
c2028105
bf8c007f
d2820007
041e0205
10080a04
c2020109
bf8c007f
d2820003
041e0804
c2020100
bf8c007f
100a0404
c2020104
bf8c007f
d2820005
04160204
c2020108
bf8c007f
d2820005
04160804
100c0b05
d2820006
041a0703
c2020102
bf8c007f
10040404
c2020106
bf8c007f
d2820001
040a0204
c202010a
bf8c007f
d2820002
04060804
d2820001
041a0502
7e085b01
10020903
10060905
c0840708
bf8c007f
e00c2000
80020500
c2020121
c2028123
bf8c0070
7e120205
d2820009
04240906
c2020120
c2028122
bf8c007f
7e140205
d2820005
04280905
f800020f
01030905
100e0902
c0820700
bf8c000f
e00c2000
80010300
c202012d
c2028129
bf8c0070
7e000205
d2820000
04000904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020113
bf8c007f
10100404
c2020117
bf8c007f
d2820008
04220004
f800021f
08070107
c202012e
c202812a
bf8c000f
7e020205
d2820001
04040905
c202011b
bf8c007f
d2820003
04220204
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080404
c2020116
bf8c007f
d2820004
04120004
c202011a
bf8c007f
d2820004
04120204
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160004
c2020119
bf8c007f
d2820005
04160204
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820000
040a0004
c2020118
bf8c007f
d2820000
04020204
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[13..14]
DCL CONST[3..12]
DCL TEMP[0]
DCL TEMP[1..8], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     1.0000,     0.0000}
IMM[1] FLT32 { 1024.0000,     0.0100,     0.2126,     0.7152}
IMM[2] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy
  2: MAD TEMP[1], TEMP[0].xyxy, CONST[7], CONST[8]
  3: MOV TEMP[2].xy, TEMP[1].xyyy
  4: TEX TEMP[2], TEMP[2], SAMP[1], 2D
  5: MAD TEMP[3].xyz, TEMP[2].yzww, IMM[0].xxxx, IMM[0].yyyy
  6: MOV TEMP[4].xy, TEMP[3].xyxx
  7: DP2 TEMP[5].x, TEMP[3].xyyy, TEMP[3].xyyy
  8: ADD_SAT TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
  9: RSQ TEMP[6].x, TEMP[5].xxxx
 10: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 11: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 12: MUL TEMP[3].x, TEMP[6].xxxx, TEMP[3].zzzz
 13: MOV TEMP[4].z, TEMP[3].xxxx
 14: MOV TEMP[3].z, IMM[0].zzzz
 15: MOV TEMP[3].xy, TEMP[1].zwzz
 16: MOV TEMP[1].xy, TEMP[1].xyyy
 17: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D
 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[9].zzzz, CONST[9].wwww
 19: RCP TEMP[1].x, TEMP[1].xxxx
 20: MUL TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xxxx
 21: ADD TEMP[3].xyz, CONST[12].xyzz, -TEMP[1].xyzz
 22: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz
 23: RSQ TEMP[5].x, TEMP[5].xxxx
 24: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 25: DP3_SAT TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz
 26: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz
 27: RSQ TEMP[7].x, TEMP[7].xxxx
 28: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[7].xxxx
 29: ADD TEMP[1].xyz, TEMP[5].xyzz, -TEMP[1].xyzz
 30: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz
 31: MAD_SAT TEMP[3].x, CONST[13].xxxx, TEMP[3].xxxx, CONST[13].yyyy
 32: MUL TEMP[7], CONST[3], TEMP[5].xxxx
 33: MAD TEMP[7], CONST[4], TEMP[5].yyyy, TEMP[7]
 34: MAD TEMP[5].xyz, CONST[5], TEMP[5].zzzz, TEMP[7]
 35: MOV TEMP[5].xyz, -TEMP[5].xyzz
 36: TEX TEMP[5].xyz, TEMP[5], SAMP[2], CUBE
 37: MUL TEMP[7].xyz, CONST[10].xyzz, TEMP[3].xxxx
 38: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[6].xxxx
 39: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xyzz
 40: MUL TEMP[3].x, CONST[11].wwww, TEMP[3].xxxx
 41: DP3 TEMP[8].x, TEMP[1].xyzz, TEMP[1].xyzz
 42: RSQ TEMP[8].x, TEMP[8].xxxx
 43: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[8].xxxx
 44: DP3_SAT TEMP[1].x, TEMP[1].xyzz, TEMP[4].xyzz
 45: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
 46: POW TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 47: SGE TEMP[2].x, TEMP[6].xxxx, IMM[1].yyyy
 48: F2I TEMP[2].x, -TEMP[2]
 49: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz
 50: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 51: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx
 52: DP3 TEMP[2].x, TEMP[5].xyzz, IMM[2].xyzz
 53: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
 54: MOV TEMP[7].w, TEMP[1].xxxx
 55: MOV OUT[0], TEMP[7]
 56: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = fmul float %13, %50
  %65 = fadd float %64, %51
  %66 = fmul float %12, %31
  %67 = fadd float %66, %35
  %68 = fmul float %65, %32
  %69 = fadd float %68, %36
  %70 = fmul float %12, %33
  %71 = fadd float %70, %37
  %72 = fmul float %65, %34
  %73 = fadd float %72, %38
  %74 = bitcast float %67 to i32
  %75 = bitcast float %69 to i32
  %76 = insertelement <2 x i32> undef, i32 %74, i32 0
  %77 = insertelement <2 x i32> %76, i32 %75, i32 1
  %78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %57, <16 x i8> %59, i32 2)
  %79 = extractelement <4 x float> %78, i32 0
  %80 = extractelement <4 x float> %78, i32 1
  %81 = extractelement <4 x float> %78, i32 2
  %82 = extractelement <4 x float> %78, i32 3
  %83 = fmul float %80, 2.000000e+00
  %84 = fadd float %83, -1.000000e+00
  %85 = fmul float %81, 2.000000e+00
  %86 = fadd float %85, -1.000000e+00
  %87 = fmul float %82, 2.000000e+00
  %88 = fadd float %87, -1.000000e+00
  %89 = fmul float %84, %84
  %90 = fmul float %86, %86
  %91 = fadd float %89, %90
  %92 = fsub float -0.000000e+00, %91
  %93 = fadd float 1.000000e+00, %92
  %94 = call float @llvm.AMDIL.clamp.(float %93, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDGPU.rsq(float %94)
  %96 = fmul float %95, %94
  %97 = fsub float -0.000000e+00, %94
  %98 = call float @llvm.AMDGPU.cndlt(float %97, float %96, float 0.000000e+00)
  %99 = fmul float %98, %88
  %100 = bitcast float %67 to i32
  %101 = bitcast float %69 to i32
  %102 = insertelement <2 x i32> undef, i32 %100, i32 0
  %103 = insertelement <2 x i32> %102, i32 %101, i32 1
  %104 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %53, <16 x i8> %55, i32 2)
  %105 = extractelement <4 x float> %104, i32 0
  %106 = fmul float %105, %39
  %107 = fadd float %106, %40
  %108 = fdiv float 1.000000e+00, %107
  %109 = fmul float %71, %108
  %110 = fmul float %73, %108
  %111 = fmul float 1.000000e+00, %108
  %112 = fsub float -0.000000e+00, %109
  %113 = fadd float %45, %112
  %114 = fsub float -0.000000e+00, %110
  %115 = fadd float %46, %114
  %116 = fsub float -0.000000e+00, %111
  %117 = fadd float %47, %116
  %118 = fmul float %113, %113
  %119 = fmul float %115, %115
  %120 = fadd float %119, %118
  %121 = fmul float %117, %117
  %122 = fadd float %120, %121
  %123 = call float @llvm.AMDGPU.rsq(float %122)
  %124 = fmul float %113, %123
  %125 = fmul float %115, %123
  %126 = fmul float %117, %123
  %127 = fmul float %84, %124
  %128 = fmul float %86, %125
  %129 = fadd float %128, %127
  %130 = fmul float %99, %126
  %131 = fadd float %129, %130
  %132 = call float @llvm.AMDIL.clamp.(float %131, float 0.000000e+00, float 1.000000e+00)
  %133 = fmul float %109, %109
  %134 = fmul float %110, %110
  %135 = fadd float %134, %133
  %136 = fmul float %111, %111
  %137 = fadd float %135, %136
  %138 = call float @llvm.AMDGPU.rsq(float %137)
  %139 = fmul float %109, %138
  %140 = fmul float %110, %138
  %141 = fmul float %111, %138
  %142 = fsub float -0.000000e+00, %139
  %143 = fadd float %124, %142
  %144 = fsub float -0.000000e+00, %140
  %145 = fadd float %125, %144
  %146 = fsub float -0.000000e+00, %141
  %147 = fadd float %126, %146
  %148 = fmul float %113, %113
  %149 = fmul float %115, %115
  %150 = fadd float %149, %148
  %151 = fmul float %117, %117
  %152 = fadd float %150, %151
  %153 = fmul float %48, %152
  %154 = fadd float %153, %49
  %155 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00)
  %156 = fmul float %22, %124
  %157 = fmul float %23, %124
  %158 = fmul float %24, %124
  %159 = fmul float %25, %125
  %160 = fadd float %159, %156
  %161 = fmul float %26, %125
  %162 = fadd float %161, %157
  %163 = fmul float %27, %125
  %164 = fadd float %163, %158
  %165 = fmul float %28, %126
  %166 = fadd float %165, %160
  %167 = fmul float %29, %126
  %168 = fadd float %167, %162
  %169 = fmul float %30, %126
  %170 = fadd float %169, %164
  %171 = fsub float -0.000000e+00, %166
  %172 = fsub float -0.000000e+00, %168
  %173 = fsub float -0.000000e+00, %170
  %174 = insertelement <4 x float> undef, float %171, i32 0
  %175 = insertelement <4 x float> %174, float %172, i32 1
  %176 = insertelement <4 x float> %175, float %173, i32 2
  %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3
  %178 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %177)
  %179 = extractelement <4 x float> %178, i32 0
  %180 = extractelement <4 x float> %178, i32 1
  %181 = extractelement <4 x float> %178, i32 2
  %182 = extractelement <4 x float> %178, i32 3
  %183 = call float @fabs(float %181)
  %184 = fdiv float 1.000000e+00, %183
  %185 = fmul float %179, %184
  %186 = fadd float %185, 1.500000e+00
  %187 = fmul float %180, %184
  %188 = fadd float %187, 1.500000e+00
  %189 = bitcast float %188 to i32
  %190 = bitcast float %186 to i32
  %191 = bitcast float %182 to i32
  %192 = insertelement <4 x i32> undef, i32 %189, i32 0
  %193 = insertelement <4 x i32> %192, i32 %190, i32 1
  %194 = insertelement <4 x i32> %193, i32 %191, i32 2
  %195 = insertelement <4 x i32> %194, i32 undef, i32 3
  %196 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %195, <32 x i8> %61, <16 x i8> %63, i32 4)
  %197 = extractelement <4 x float> %196, i32 0
  %198 = extractelement <4 x float> %196, i32 1
  %199 = extractelement <4 x float> %196, i32 2
  %200 = fmul float %41, %155
  %201 = fmul float %42, %155
  %202 = fmul float %43, %155
  %203 = fmul float %200, %132
  %204 = fmul float %201, %132
  %205 = fmul float %202, %132
  %206 = fmul float %203, %197
  %207 = fmul float %204, %198
  %208 = fmul float %205, %199
  %209 = fmul float %44, %155
  %210 = fmul float %143, %143
  %211 = fmul float %145, %145
  %212 = fadd float %211, %210
  %213 = fmul float %147, %147
  %214 = fadd float %212, %213
  %215 = call float @llvm.AMDGPU.rsq(float %214)
  %216 = fmul float %143, %215
  %217 = fmul float %145, %215
  %218 = fmul float %147, %215
  %219 = fmul float %216, %84
  %220 = fmul float %217, %86
  %221 = fadd float %220, %219
  %222 = fmul float %218, %99
  %223 = fadd float %221, %222
  %224 = call float @llvm.AMDIL.clamp.(float %223, float 0.000000e+00, float 1.000000e+00)
  %225 = fmul float %79, 1.024000e+03
  %226 = call float @llvm.pow.f32(float %224, float %225)
  %227 = fcmp uge float %132, 0x3F847AE140000000
  %228 = select i1 %227, float 1.000000e+00, float 0.000000e+00
  %229 = fsub float -0.000000e+00, %228
  %230 = fptosi float %229 to i32
  %231 = bitcast i32 %230 to float
  %232 = bitcast float %231 to i32
  %233 = and i32 %232, 1065353216
  %234 = bitcast i32 %233 to float
  %235 = fmul float %226, %234
  %236 = fmul float %209, %235
  %237 = fmul float %197, 0x3FCB367A00000000
  %238 = fmul float %198, 0x3FE6E2EB20000000
  %239 = fadd float %238, %237
  %240 = fmul float %199, 0x3FB27BB300000000
  %241 = fadd float %239, %240
  %242 = fmul float %236, %241
  %243 = call i32 @llvm.SI.packf16(float %206, float %207)
  %244 = bitcast i32 %243 to float
  %245 = call i32 @llvm.SI.packf16(float %208, float %242)
  %246 = bitcast i32 %245 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %244, float %246, float %244, float %246)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840100
bf8c007f
c2000938
c2008939
bf8c007f
7e000201
d2820001
04000103
c200091d
c2008921
bf8c007f
7e000201
d2820007
04000101
c200091c
c2008920
bf8c007f
7e000201
d2820006
04000102
c0860300
c0c80500
bf8c007f
f0800100
00640006
c2000926
c2008927
bf8c0070
7e060201
d2820000
040c0100
7e005500
c200091f
c2008923
bf8c007f
7e060201
d2820001
040c0101
10020101
c2000931
bf8c007f
08060200
c200091e
c2008922
bf8c007f
7e080201
d2820002
04100102
10040102
c2000930
bf8c007f
08120400
10081309
d2820004
04120703
c2000932
bf8c007f
080a0000
d2820008
04120b05
7e145b08
10081503
10061509
c200090c
bf8c007f
10120600
c2000910
bf8c007f
d2820009
04260800
100a1505
c2000914
bf8c007f
d2820009
04260a00
d2060009
22010109
c200090d
bf8c007f
101a0600
c2000911
bf8c007f
d282000d
04360800
c2000915
bf8c007f
d282000d
04360a00
d206000a
2201010d
c200090e
bf8c007f
101a0600
c2000912
bf8c007f
d282000d
04360800
c2000916
bf8c007f
d282000d
04360a00
d206000b
2201010d
7e180280
d28a000e
042e1509
d28c000d
042e1509
d28e000f
042e1509
d2880010
042e1509
d2060109
0201010f
7e125509
7e1402ff
3fc00000
d282000f
042a130d
d282000e
042a130e
c0860308
c0c80510
bf8c007f
f0800700
0064090e
c0800304
c0c60508
bf8c0070
f0800f00
00030c06
bf8c0770
060c1b0d
060e0cf3
100c0707
06201d0e
062020f3
d2820006
041a0910
10222110
d2820011
04460f07
082222f2
d2060811
02010111
7e245b11
10242312
d2060011
22010111
d0080000
02022280
d2000011
00022480
06241f0f
062424f3
10242511
d2820006
041a0b12
d2060806
02010106
c2000934
c2008935
bf8c007f
7e220201
d2820008
04461000
d2060808
02010108
c2000929
bf8c007f
10221000
10220d11
10221511
c2000928
bf8c007f
10261000
10260d13
10261313
5e222313
102612ff
3e59b3d0
7e2802ff
3f371759
d2820013
044e290a
7e2802ff
3d93dd98
d2820013
044e290b
10280502
d2820014
04520301
d2820014
04520100
7e285b14
10022901
08020304
10042902
08040503
10060502
d2820003
040e0301
10002900
08000105
d2820003
040e0100
7e065b03
10020701
10040702
10040f02
d2820001
040a2101
10000700
d2820000
04062500
d2060800
02010100
7e004f00
100218ff
44800000
0e000101
7e004b00
7e0202ff
3c23d70a
d00c0000
02020306
d2000001
0001e480
d2060001
22010101
7e021101
360202f2
10000300
c200092f
bf8c007f
10021000
10000101
10002700
c200092a
bf8c007f
10021000
10020d01
10021701
5e000101
f8001c0f
00110011
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = fmul float %11, %30
  %34 = fmul float %12, %30
  %35 = fmul float %13, %30
  %36 = fmul float %14, %30
  %37 = fmul float %15, %31
  %38 = fadd float %37, %33
  %39 = fmul float %16, %31
  %40 = fadd float %39, %34
  %41 = fmul float %17, %31
  %42 = fadd float %41, %35
  %43 = fmul float %18, %31
  %44 = fadd float %43, %36
  %45 = fmul float %19, %32
  %46 = fadd float %45, %38
  %47 = fmul float %20, %32
  %48 = fadd float %47, %40
  %49 = fmul float %21, %32
  %50 = fadd float %49, %42
  %51 = fmul float %22, %32
  %52 = fadd float %51, %44
  %53 = fadd float %46, %23
  %54 = fadd float %48, %24
  %55 = fadd float %50, %25
  %56 = fadd float %52, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %53, float %54, float %55, float %56)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0820700
bf8c007f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[14..15]
DCL CONST[3..13]
DCL TEMP[0]
DCL TEMP[1..8], LOCAL
IMM[0] FLT32 {    2.0000,    -1.0000,     1.0000,     0.0000}
IMM[1] FLT32 { 1024.0000,     0.0100,     0.2126,     0.7152}
IMM[2] FLT32 {    0.2126,     0.7152,     0.0722,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[15].xxxx, CONST[15].yyyy
  2: MAD TEMP[1], TEMP[0].xyxy, CONST[7], CONST[8]
  3: MOV TEMP[2].xy, TEMP[1].xyyy
  4: TEX TEMP[2], TEMP[2], SAMP[1], 2D
  5: MAD TEMP[3].xyz, TEMP[2].yzww, IMM[0].xxxx, IMM[0].yyyy
  6: MOV TEMP[4].xy, TEMP[3].xyxx
  7: DP2 TEMP[5].x, TEMP[3].xyyy, TEMP[3].xyyy
  8: ADD_SAT TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
  9: RSQ TEMP[6].x, TEMP[5].xxxx
 10: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
 11: CMP TEMP[6].x, -TEMP[5].xxxx, TEMP[6].xxxx, IMM[0].wwww
 12: MUL TEMP[3].x, TEMP[6].xxxx, TEMP[3].zzzz
 13: MOV TEMP[4].z, TEMP[3].xxxx
 14: MOV TEMP[3].z, IMM[0].zzzz
 15: MOV TEMP[3].xy, TEMP[1].zwzz
 16: MOV TEMP[1].xy, TEMP[1].xyyy
 17: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D
 18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[9].zzzz, CONST[9].wwww
 19: RCP TEMP[1].x, TEMP[1].xxxx
 20: MUL TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xxxx
 21: ADD TEMP[3].xyz, CONST[12].xyzz, -TEMP[1].xyzz
 22: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz
 23: RSQ TEMP[5].x, TEMP[5].xxxx
 24: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 25: DP3_SAT TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz
 26: DP3 TEMP[7].x, TEMP[1].xyzz, TEMP[1].xyzz
 27: RSQ TEMP[7].x, TEMP[7].xxxx
 28: MUL TEMP[7].xyz, TEMP[1].xyzz, TEMP[7].xxxx
 29: ADD TEMP[7].xyz, TEMP[5].xyzz, -TEMP[7].xyzz
 30: DP3 TEMP[3].x, TEMP[3].xyzz, CONST[14].xyzz
 31: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx
 32: MAD_SAT TEMP[3].x, CONST[13].xxxx, TEMP[3].xxxx, CONST[13].yyyy
 33: DP3 TEMP[5].x, TEMP[5].xyzz, CONST[14].xyzz
 34: MAD_SAT TEMP[5].x, CONST[13].zzzz, -TEMP[5].xxxx, CONST[13].wwww
 35: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx
 36: MUL TEMP[5], CONST[3], TEMP[1].xxxx
 37: MAD TEMP[5], CONST[4], TEMP[1].yyyy, TEMP[5]
 38: MAD TEMP[1], CONST[5], TEMP[1].zzzz, TEMP[5]
 39: ADD TEMP[1].xyw, TEMP[1], CONST[6]
 40: MOV TEMP[5].xy, TEMP[1].xyyy
 41: MOV TEMP[5].w, TEMP[1].wwww
 42: TXP TEMP[1].xyz, TEMP[5], SAMP[2], 2D
 43: MUL TEMP[5].xyz, CONST[10].xyzz, TEMP[3].xxxx
 44: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
 45: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[1].xyzz
 46: MUL TEMP[3].x, CONST[11].wwww, TEMP[3].xxxx
 47: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz
 48: RSQ TEMP[8].x, TEMP[8].xxxx
 49: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx
 50: DP3_SAT TEMP[4].x, TEMP[7].xyzz, TEMP[4].xyzz
 51: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
 52: POW TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx
 53: SGE TEMP[4].x, TEMP[6].xxxx, IMM[1].yyyy
 54: F2I TEMP[4].x, -TEMP[4]
 55: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz
 56: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
 57: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx
 58: DP3 TEMP[1].x, TEMP[1].xyzz, IMM[2].xyzz
 59: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx
 60: MOV TEMP[5].w, TEMP[1].xxxx
 61: MOV OUT[0], TEMP[5]
 62: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 216)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 220)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %69 = load <32 x i8> addrspace(2)* %68, !tbaa !0
  %70 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = fmul float %13, %58
  %73 = fadd float %72, %59
  %74 = fmul float %12, %34
  %75 = fadd float %74, %38
  %76 = fmul float %73, %35
  %77 = fadd float %76, %39
  %78 = fmul float %12, %36
  %79 = fadd float %78, %40
  %80 = fmul float %73, %37
  %81 = fadd float %80, %41
  %82 = bitcast float %75 to i32
  %83 = bitcast float %77 to i32
  %84 = insertelement <2 x i32> undef, i32 %82, i32 0
  %85 = insertelement <2 x i32> %84, i32 %83, i32 1
  %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %65, <16 x i8> %67, i32 2)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = extractelement <4 x float> %86, i32 2
  %90 = extractelement <4 x float> %86, i32 3
  %91 = fmul float %88, 2.000000e+00
  %92 = fadd float %91, -1.000000e+00
  %93 = fmul float %89, 2.000000e+00
  %94 = fadd float %93, -1.000000e+00
  %95 = fmul float %90, 2.000000e+00
  %96 = fadd float %95, -1.000000e+00
  %97 = fmul float %92, %92
  %98 = fmul float %94, %94
  %99 = fadd float %97, %98
  %100 = fsub float -0.000000e+00, %99
  %101 = fadd float 1.000000e+00, %100
  %102 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00)
  %103 = call float @llvm.AMDGPU.rsq(float %102)
  %104 = fmul float %103, %102
  %105 = fsub float -0.000000e+00, %102
  %106 = call float @llvm.AMDGPU.cndlt(float %105, float %104, float 0.000000e+00)
  %107 = fmul float %106, %96
  %108 = bitcast float %75 to i32
  %109 = bitcast float %77 to i32
  %110 = insertelement <2 x i32> undef, i32 %108, i32 0
  %111 = insertelement <2 x i32> %110, i32 %109, i32 1
  %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %61, <16 x i8> %63, i32 2)
  %113 = extractelement <4 x float> %112, i32 0
  %114 = fmul float %113, %42
  %115 = fadd float %114, %43
  %116 = fdiv float 1.000000e+00, %115
  %117 = fmul float %79, %116
  %118 = fmul float %81, %116
  %119 = fmul float 1.000000e+00, %116
  %120 = fsub float -0.000000e+00, %117
  %121 = fadd float %48, %120
  %122 = fsub float -0.000000e+00, %118
  %123 = fadd float %49, %122
  %124 = fsub float -0.000000e+00, %119
  %125 = fadd float %50, %124
  %126 = fmul float %121, %121
  %127 = fmul float %123, %123
  %128 = fadd float %127, %126
  %129 = fmul float %125, %125
  %130 = fadd float %128, %129
  %131 = call float @llvm.AMDGPU.rsq(float %130)
  %132 = fmul float %121, %131
  %133 = fmul float %123, %131
  %134 = fmul float %125, %131
  %135 = fmul float %92, %132
  %136 = fmul float %94, %133
  %137 = fadd float %136, %135
  %138 = fmul float %107, %134
  %139 = fadd float %137, %138
  %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
  %141 = fmul float %117, %117
  %142 = fmul float %118, %118
  %143 = fadd float %142, %141
  %144 = fmul float %119, %119
  %145 = fadd float %143, %144
  %146 = call float @llvm.AMDGPU.rsq(float %145)
  %147 = fmul float %117, %146
  %148 = fmul float %118, %146
  %149 = fmul float %119, %146
  %150 = fsub float -0.000000e+00, %147
  %151 = fadd float %132, %150
  %152 = fsub float -0.000000e+00, %148
  %153 = fadd float %133, %152
  %154 = fsub float -0.000000e+00, %149
  %155 = fadd float %134, %154
  %156 = fmul float %121, %55
  %157 = fmul float %123, %56
  %158 = fadd float %157, %156
  %159 = fmul float %125, %57
  %160 = fadd float %158, %159
  %161 = fmul float %160, %160
  %162 = fmul float %51, %161
  %163 = fadd float %162, %52
  %164 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00)
  %165 = fmul float %132, %55
  %166 = fmul float %133, %56
  %167 = fadd float %166, %165
  %168 = fmul float %134, %57
  %169 = fadd float %167, %168
  %170 = fsub float -0.000000e+00, %169
  %171 = fmul float %53, %170
  %172 = fadd float %171, %54
  %173 = call float @llvm.AMDIL.clamp.(float %172, float 0.000000e+00, float 1.000000e+00)
  %174 = fmul float %164, %173
  %175 = fmul float %22, %117
  %176 = fmul float %23, %117
  %177 = fmul float %24, %117
  %178 = fmul float %25, %118
  %179 = fadd float %178, %175
  %180 = fmul float %26, %118
  %181 = fadd float %180, %176
  %182 = fmul float %27, %118
  %183 = fadd float %182, %177
  %184 = fmul float %28, %119
  %185 = fadd float %184, %179
  %186 = fmul float %29, %119
  %187 = fadd float %186, %181
  %188 = fmul float %30, %119
  %189 = fadd float %188, %183
  %190 = fadd float %185, %31
  %191 = fadd float %187, %32
  %192 = fadd float %189, %33
  %193 = fdiv float %190, %192
  %194 = fdiv float %191, %192
  %195 = bitcast float %193 to i32
  %196 = bitcast float %194 to i32
  %197 = insertelement <2 x i32> undef, i32 %195, i32 0
  %198 = insertelement <2 x i32> %197, i32 %196, i32 1
  %199 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %198, <32 x i8> %69, <16 x i8> %71, i32 2)
  %200 = extractelement <4 x float> %199, i32 0
  %201 = extractelement <4 x float> %199, i32 1
  %202 = extractelement <4 x float> %199, i32 2
  %203 = fmul float %44, %174
  %204 = fmul float %45, %174
  %205 = fmul float %46, %174
  %206 = fmul float %203, %140
  %207 = fmul float %204, %140
  %208 = fmul float %205, %140
  %209 = fmul float %206, %200
  %210 = fmul float %207, %201
  %211 = fmul float %208, %202
  %212 = fmul float %47, %174
  %213 = fmul float %151, %151
  %214 = fmul float %153, %153
  %215 = fadd float %214, %213
  %216 = fmul float %155, %155
  %217 = fadd float %215, %216
  %218 = call float @llvm.AMDGPU.rsq(float %217)
  %219 = fmul float %151, %218
  %220 = fmul float %153, %218
  %221 = fmul float %155, %218
  %222 = fmul float %219, %92
  %223 = fmul float %220, %94
  %224 = fadd float %223, %222
  %225 = fmul float %221, %107
  %226 = fadd float %224, %225
  %227 = call float @llvm.AMDIL.clamp.(float %226, float 0.000000e+00, float 1.000000e+00)
  %228 = fmul float %87, 1.024000e+03
  %229 = call float @llvm.pow.f32(float %227, float %228)
  %230 = fcmp uge float %140, 0x3F847AE140000000
  %231 = select i1 %230, float 1.000000e+00, float 0.000000e+00
  %232 = fsub float -0.000000e+00, %231
  %233 = fptosi float %232 to i32
  %234 = bitcast i32 %233 to float
  %235 = bitcast float %234 to i32
  %236 = and i32 %235, 1065353216
  %237 = bitcast i32 %236 to float
  %238 = fmul float %229, %237
  %239 = fmul float %212, %238
  %240 = fmul float %200, 0x3FCB367A00000000
  %241 = fmul float %201, 0x3FE6E2EB20000000
  %242 = fadd float %241, %240
  %243 = fmul float %202, 0x3FB27BB300000000
  %244 = fadd float %242, %243
  %245 = fmul float %239, %244
  %246 = call i32 @llvm.SI.packf16(float %209, float %210)
  %247 = bitcast i32 %246 to float
  %248 = call i32 @llvm.SI.packf16(float %211, float %245)
  %249 = bitcast i32 %248 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %247, float %249, float %247, float %249)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840100
bf8c007f
c200093c
c200893d
bf8c007f
7e000201
d2820001
04000103
c200091d
c2008921
bf8c007f
7e000201
d2820004
04000101
c200091c
c2008920
bf8c007f
7e000201
d2820003
04000102
c0860300
c0c80500
bf8c007f
f0800100
00640003
c2000926
c2008927
bf8c0070
7e0a0201
d2820000
04140100
7e005500
c200091f
c2008923
bf8c007f
7e0a0201
d2820001
04140101
10020101
c2000931
bf8c007f
081a0200
c200091e
c2008922
bf8c007f
7e0a0201
d2820002
04140102
100c0102
c2000930
bf8c007f
081e0c00
10041f0f
d2820002
040a1b0d
c2000932
bf8c007f
08200000
d2820002
040a2110
7e0e5b02
10160f0d
10120f0f
c0860304
c0c80508
bf8c007f
f0800f00
00640203
bf8c0770
06100703
061010f3
10181308
06140904
061414f3
d2820011
0432170a
1018150a
d282000c
04321108
081818f2
d206080c
0201010c
7e1c5b0c
101c190e
d206000c
2201010c
d0080000
02021880
d200000c
00021c80
061c0b05
061c1cf3
10181d0c
101c0f10
d2820007
04461d0c
d2060807
02010107
c2000938
bf8c007f
10221200
c2008939
bf8c007f
d2820011
0444030b
c203093a
bf8c007f
d2820011
04440d0e
c2038936
bf8c007f
10222207
c2038937
bf8c007f
08222207
d2060811
02010111
101e1e00
d282000d
043c030d
d282000d
04340d10
101a1b0d
c2000934
c2008935
bf8c007f
7e1e0201
d282000d
043e1a00
d206080d
0201010d
101a230d
c2000929
bf8c007f
101e1a00
10240f0f
c200090d
bf8c007f
101e0c00
c2000911
bf8c007f
d282000f
043e0200
c2000915
bf8c007f
d282000f
043e0000
c2000919
bf8c007f
061e1e00
c200090f
bf8c007f
10200c00
c2000913
bf8c007f
d2820010
04420200
c2000917
bf8c007f
d2820010
04420000
c200091b
bf8c007f
06202000
7e225510
1020230f
c200090c
bf8c007f
10260c00
c2000910
bf8c007f
d2820013
044e0200
c2000914
bf8c007f
d2820013
044e0000
c2000918
bf8c007f
06262600
101e2313
c0800308
c0c60510
bf8c007f
f0800700
00030f0f
bf8c0770
10242112
c2000928
bf8c007f
10261a00
10260f13
10261f13
5e242513
10261eff
3e59b3d0
7e2802ff
3f371759
d2820013
044e2910
7e2802ff
3d93dd98
d2820013
044e2911
10280d06
d2820014
04520301
d2820014
04520100
7e285b14
10022901
0802030b
100c2906
080c0d09
10120d06
d2820009
04260301
10002900
0800010e
d2820009
04260100
7e125b09
10021301
100c1306
100c1106
d2820001
041a1501
10001300
d2820000
04061900
d2060800
02010100
7e004f00
100204ff
44800000
0e000101
7e004b00
7e0202ff
3c23d70a
d00c0000
02020307
d2000001
0001e480
d2060001
22010101
7e021101
360202f2
10000300
c200092f
bf8c007f
10021a00
10000101
10002700
c200092a
bf8c007f
10021a00
10020f01
10022301
5e000101
f8001c0f
00120012
bf810000
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: ADD TEMP[0], TEMP[0], CONST[3]
  4: MOV OUT[0], TEMP[0]
  5: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 44)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 60)
  %27 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %28 = load <16 x i8> addrspace(2)* %27, !tbaa !0
  %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %28, i32 0, i32 %5)
  %30 = extractelement <4 x float> %29, i32 0
  %31 = extractelement <4 x float> %29, i32 1
  %32 = extractelement <4 x float> %29, i32 2
  %33 = fmul float %11, %30
  %34 = fmul float %12, %30
  %35 = fmul float %13, %30
  %36 = fmul float %14, %30
  %37 = fmul float %15, %31
  %38 = fadd float %37, %33
  %39 = fmul float %16, %31
  %40 = fadd float %39, %34
  %41 = fmul float %17, %31
  %42 = fadd float %41, %35
  %43 = fmul float %18, %31
  %44 = fadd float %43, %36
  %45 = fmul float %19, %32
  %46 = fadd float %45, %38
  %47 = fmul float %20, %32
  %48 = fadd float %47, %40
  %49 = fmul float %21, %32
  %50 = fadd float %49, %42
  %51 = fmul float %22, %32
  %52 = fadd float %51, %44
  %53 = fadd float %46, %23
  %54 = fadd float %48, %24
  %55 = fadd float %50, %25
  %56 = fadd float %52, %26
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %53, float %54, float %55, float %56)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0820700
bf8c007f
e00c2000
80010000
c0800100
bf8c0070
c2020103
bf8c007f
10080004
c2020107
bf8c007f
d2820004
04120204
c202010b
bf8c007f
d2820004
04120404
c202010f
bf8c007f
06080804
c2020102
bf8c007f
100a0004
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820005
04160404
c202010e
bf8c007f
060a0a04
c2020101
bf8c007f
100c0004
c2020105
bf8c007f
d2820006
041a0204
c2020109
bf8c007f
d2820006
041a0404
c202010d
bf8c007f
060c0c04
c2020100
bf8c007f
100e0004
c2020104
bf8c007f
d2820007
041e0204
c2020108
bf8c007f
d2820000
041e0404
c200010c
bf8c007f
06000000
f80008cf
04050600
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[8..9]
DCL CONST[1..7]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {    1.0000,     0.2126,     0.7152,     0.0722}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy
  2: DP4 TEMP[1].x, IMM[0].xxxx, CONST[6]
  3: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[4].yyyy
  4: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].xxxx
  5: MOV TEMP[2].w, TEMP[1].wwww
  6: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[1].xyyy
  7: MOV TEMP[3].xy, TEMP[3].xyyy
  8: TEX TEMP[3], TEMP[3], SAMP[0], 2D
  9: DP4 TEMP[4].x, IMM[0].xxxx, CONST[5]
 10: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[4].xxxx
 11: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 12: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[0].yzww
 13: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx
 14: RCP TEMP[5].x, TEMP[5].xxxx
 15: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 17: MAD TEMP[1].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[1].xyzz
 18: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
 19: MAX TEMP[1].x, IN[2].wwww, CONST[2].wwww
 20: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 21: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[2].xyzz, CONST[2].xyzz
 22: MOV OUT[0], TEMP[2]
 23: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 20)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %41 = load <32 x i8> addrspace(2)* %40, !tbaa !0
  %42 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0
  %44 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %46 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %48 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %49 = fmul float %13, %38
  %50 = fadd float %49, %39
  %51 = fmul float 1.000000e+00, %34
  %52 = fmul float 1.000000e+00, %35
  %53 = fadd float %51, %52
  %54 = fmul float 1.000000e+00, %36
  %55 = fadd float %53, %54
  %56 = fmul float 1.000000e+00, %37
  %57 = fadd float %55, %56
  %58 = fadd float %57, %29
  %59 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
  %60 = call float @llvm.AMDGPU.lrp(float %59, float %44, float 1.000000e+00)
  %61 = call float @llvm.AMDGPU.lrp(float %59, float %45, float 1.000000e+00)
  %62 = call float @llvm.AMDGPU.lrp(float %59, float %46, float 1.000000e+00)
  %63 = call float @llvm.AMDGPU.lrp(float %59, float %47, float 1.000000e+00)
  %64 = fmul float %12, %22
  %65 = fmul float %50, %23
  %66 = bitcast float %64 to i32
  %67 = bitcast float %65 to i32
  %68 = insertelement <2 x i32> undef, i32 %66, i32 0
  %69 = insertelement <2 x i32> %68, i32 %67, i32 1
  %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %41, <16 x i8> %43, i32 2)
  %71 = extractelement <4 x float> %70, i32 0
  %72 = extractelement <4 x float> %70, i32 1
  %73 = extractelement <4 x float> %70, i32 2
  %74 = extractelement <4 x float> %70, i32 3
  %75 = fmul float 1.000000e+00, %30
  %76 = fmul float 1.000000e+00, %31
  %77 = fadd float %75, %76
  %78 = fmul float 1.000000e+00, %32
  %79 = fadd float %77, %78
  %80 = fmul float 1.000000e+00, %33
  %81 = fadd float %79, %80
  %82 = fadd float %81, %28
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = fmul float %83, %74
  %85 = fmul float %71, 0x3FCB367A00000000
  %86 = fmul float %72, 0x3FE6E2EB20000000
  %87 = fadd float %86, %85
  %88 = fmul float %73, 0x3FB27BB300000000
  %89 = fadd float %87, %88
  %90 = fcmp uge float %89, 0x3F50624DE0000000
  %91 = select i1 %90, float %89, float 0x3F50624DE0000000
  %92 = fdiv float 1.000000e+00, %91
  %93 = fmul float %71, %92
  %94 = fmul float %72, %92
  %95 = fmul float %73, %92
  %96 = fmul float %60, %71
  %97 = fmul float %61, %72
  %98 = fmul float %62, %73
  %99 = fmul float %84, %93
  %100 = fadd float %99, %96
  %101 = fmul float %84, %94
  %102 = fadd float %101, %97
  %103 = fmul float %84, %95
  %104 = fadd float %103, %98
  %105 = fmul float %100, 4.000000e+00
  %106 = fmul float %102, 4.000000e+00
  %107 = fmul float %104, 4.000000e+00
  %108 = fcmp uge float %48, %27
  %109 = select i1 %108, float %48, float %27
  %110 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %110, float %105, float %24)
  %112 = call float @llvm.AMDGPU.lrp(float %110, float %106, float %25)
  %113 = call float @llvm.AMDGPU.lrp(float %110, float %107, float %26)
  %114 = call i32 @llvm.SI.packf16(float %111, float %112)
  %115 = bitcast i32 %114 to float
  %116 = call i32 @llvm.SI.packf16(float %113, float %63)
  %117 = bitcast i32 %116 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %115, float %117, float %115, float %117)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
c0840100
bf8c007f
c2000924
c2008925
bf8c007f
7e080201
d2820003
04100103
c2000905
bf8c007f
10080600
c2000904
bf8c007f
10060400
c0800300
c0c60500
bf8c007f
f0800f00
00030203
bf8c0770
100c04ff
3e59b3d0
7e0e02ff
3f371759
d2820006
041a0f03
7e0e02ff
3d93dd98
d2820006
041a0f04
7e0e02ff
3a83126f
d00c0000
02020f06
d2000006
00020d07
7e0c5506
10140d03
befc0306
c8240100
c8250101
c2000918
c2008919
bf8c007f
7e0e0201
d2060007
02020e00
c200091a
bf8c007f
060e0e00
c200091b
bf8c007f
060e0e00
c2000911
bf8c007f
060e0e00
d2060807
02010107
08100ef2
d2820009
04221307
10160709
c2000914
c2008915
bf8c007f
7e120201
d2060009
02021200
c2000916
bf8c007f
06121200
c2000917
bf8c007f
06121200
c2000910
bf8c007f
06121200
d2060809
02010109
10120b09
d282000a
042e1509
101814f6
c8280700
c8290701
c200090b
bf8c007f
d00c0002
0200010a
7e160200
d200000a
000a150b
d206080a
0201010a
081614f2
c2000909
bf8c007f
101a1600
d282000c
0436190a
101a0d02
c8380000
c8390001
d282000e
04221d07
101c050e
d282000d
043a1b09
101a1af6
c2000908
bf8c007f
101c1600
d282000d
043a1b0a
5e18190d
100c0d04
c8340200
c8350201
d282000d
04221b07
1004090d
d2820002
040a0d09
100404f6
c200090a
bf8c007f
10061600
d2820002
040e050a
c80c0300
c80d0301
d2820000
04220707
5e000102
f8001c0f
000c000c
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL CONST[0..204]
DCL TEMP[0..6], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  3: UMUL TEMP[2].x, IMM[1].xxxx, TEMP[0].wwww
  4: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].zzzz
  5: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].yyyy
  6: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].xxxx
  7: UARL ADDR[0].x, TEMP[5].xxxx
  8: MUL TEMP[5], CONST[ADDR[0].x+13], IN[1].xxxx
  9: UARL ADDR[0].x, TEMP[4].xxxx
 10: MAD TEMP[4], CONST[ADDR[0].x+13], IN[1].yyyy, TEMP[5]
 11: UARL ADDR[0].x, TEMP[3].xxxx
 12: MAD TEMP[3], CONST[ADDR[0].x+13], IN[1].zzzz, TEMP[4]
 13: UARL ADDR[0].x, TEMP[2].xxxx
 14: MAD TEMP[2], CONST[ADDR[0].x+13], IN[1].wwww, TEMP[3]
 15: DP4 TEMP[2].x, TEMP[1], TEMP[2]
 16: UMAD TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 17: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 18: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 19: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 20: UARL ADDR[0].x, TEMP[6].xxxx
 21: MUL TEMP[6], CONST[ADDR[0].x+13], IN[1].xxxx
 22: UARL ADDR[0].x, TEMP[5].xxxx
 23: MAD TEMP[5], CONST[ADDR[0].x+13], IN[1].yyyy, TEMP[6]
 24: UARL ADDR[0].x, TEMP[4].xxxx
 25: MAD TEMP[4], CONST[ADDR[0].x+13], IN[1].zzzz, TEMP[5]
 26: UARL ADDR[0].x, TEMP[3].xxxx
 27: MAD TEMP[3], CONST[ADDR[0].x+13], IN[1].wwww, TEMP[4]
 28: DP4 TEMP[3].x, TEMP[1], TEMP[3]
 29: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 30: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 31: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 32: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 33: UARL ADDR[0].x, TEMP[0].xxxx
 34: MUL TEMP[0], CONST[ADDR[0].x+13], IN[1].xxxx
 35: UARL ADDR[0].x, TEMP[6].xxxx
 36: MAD TEMP[0], CONST[ADDR[0].x+13], IN[1].yyyy, TEMP[0]
 37: UARL ADDR[0].x, TEMP[5].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+13], IN[1].zzzz, TEMP[0]
 39: UARL ADDR[0].x, TEMP[4].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+13], IN[1].wwww, TEMP[0]
 41: DP4 TEMP[0].x, TEMP[1], TEMP[0]
 42: MUL TEMP[1], CONST[4], TEMP[2].xxxx
 43: MAD TEMP[1], CONST[5], TEMP[3].xxxx, TEMP[1]
 44: MAD TEMP[1], CONST[6], TEMP[0].xxxx, TEMP[1]
 45: ADD TEMP[1], TEMP[1], CONST[7]
 46: MOV TEMP[4].w, IMM[0].xxxx
 47: MOV TEMP[4].xyz, CONST[8].xyzx
 48: MUL TEMP[2], CONST[0], TEMP[2].xxxx
 49: MAD TEMP[2], CONST[1], TEMP[3].xxxx, TEMP[2]
 50: MAD TEMP[0], CONST[2], TEMP[0].xxxx, TEMP[2]
 51: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 52: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[12].xyzz
 53: MAD TEMP[2].x, TEMP[1].zzzz, CONST[11].xxxx, CONST[11].yyyy
 54: MOV TEMP[0].w, TEMP[2].xxxx
 55: MOV OUT[1], TEMP[4]
 56: MOV OUT[2], TEMP[0]
 57: MOV OUT[0], TEMP[1]
 58: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = extractelement <4 x float> %61, i32 3
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = extractelement <4 x float> %68, i32 2
  %72 = extractelement <4 x float> %68, i32 3
  %73 = fptosi float %69 to i32
  %74 = fptosi float %70 to i32
  %75 = fptosi float %71 to i32
  %76 = fptosi float %72 to i32
  %77 = bitcast i32 %73 to float
  %78 = bitcast i32 %74 to float
  %79 = bitcast i32 %75 to float
  %80 = bitcast i32 %76 to float
  %81 = fmul float %56, %45
  %82 = fadd float %81, %42
  %83 = fmul float %57, %46
  %84 = fadd float %83, %43
  %85 = fmul float %58, %47
  %86 = fadd float %85, %44
  %87 = bitcast float %80 to i32
  %88 = mul i32 3, %87
  %89 = bitcast i32 %88 to float
  %90 = bitcast float %79 to i32
  %91 = mul i32 3, %90
  %92 = bitcast i32 %91 to float
  %93 = bitcast float %78 to i32
  %94 = mul i32 3, %93
  %95 = bitcast i32 %94 to float
  %96 = bitcast float %77 to i32
  %97 = mul i32 3, %96
  %98 = bitcast i32 %97 to float
  %99 = bitcast float %98 to i32
  %100 = shl i32 %99, 4
  %101 = add i32 %100, 208
  %102 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %101)
  %103 = fmul float %102, %62
  %104 = shl i32 %99, 4
  %105 = add i32 %104, 212
  %106 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %105)
  %107 = fmul float %106, %62
  %108 = shl i32 %99, 4
  %109 = add i32 %108, 216
  %110 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %109)
  %111 = fmul float %110, %62
  %112 = shl i32 %99, 4
  %113 = add i32 %112, 220
  %114 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %113)
  %115 = fmul float %114, %62
  %116 = bitcast float %95 to i32
  %117 = shl i32 %116, 4
  %118 = add i32 %117, 208
  %119 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %118)
  %120 = fmul float %119, %63
  %121 = fadd float %120, %103
  %122 = shl i32 %116, 4
  %123 = add i32 %122, 212
  %124 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %123)
  %125 = fmul float %124, %63
  %126 = fadd float %125, %107
  %127 = shl i32 %116, 4
  %128 = add i32 %127, 216
  %129 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %128)
  %130 = fmul float %129, %63
  %131 = fadd float %130, %111
  %132 = shl i32 %116, 4
  %133 = add i32 %132, 220
  %134 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %133)
  %135 = fmul float %134, %63
  %136 = fadd float %135, %115
  %137 = bitcast float %92 to i32
  %138 = shl i32 %137, 4
  %139 = add i32 %138, 208
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %64
  %142 = fadd float %141, %121
  %143 = shl i32 %137, 4
  %144 = add i32 %143, 212
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %64
  %147 = fadd float %146, %126
  %148 = shl i32 %137, 4
  %149 = add i32 %148, 216
  %150 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %149)
  %151 = fmul float %150, %64
  %152 = fadd float %151, %131
  %153 = shl i32 %137, 4
  %154 = add i32 %153, 220
  %155 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %154)
  %156 = fmul float %155, %64
  %157 = fadd float %156, %136
  %158 = bitcast float %89 to i32
  %159 = shl i32 %158, 4
  %160 = add i32 %159, 208
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %65
  %163 = fadd float %162, %142
  %164 = shl i32 %158, 4
  %165 = add i32 %164, 212
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %65
  %168 = fadd float %167, %147
  %169 = shl i32 %158, 4
  %170 = add i32 %169, 216
  %171 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %170)
  %172 = fmul float %171, %65
  %173 = fadd float %172, %152
  %174 = shl i32 %158, 4
  %175 = add i32 %174, 220
  %176 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %175)
  %177 = fmul float %176, %65
  %178 = fadd float %177, %157
  %179 = fmul float %82, %163
  %180 = fmul float %84, %168
  %181 = fadd float %179, %180
  %182 = fmul float %86, %173
  %183 = fadd float %181, %182
  %184 = fmul float 1.000000e+00, %178
  %185 = fadd float %183, %184
  %186 = bitcast float %80 to i32
  %187 = mul i32 3, %186
  %188 = add i32 %187, 1
  %189 = bitcast i32 %188 to float
  %190 = bitcast float %79 to i32
  %191 = mul i32 3, %190
  %192 = add i32 %191, 1
  %193 = bitcast i32 %192 to float
  %194 = bitcast float %78 to i32
  %195 = mul i32 3, %194
  %196 = add i32 %195, 1
  %197 = bitcast i32 %196 to float
  %198 = bitcast float %77 to i32
  %199 = mul i32 3, %198
  %200 = add i32 %199, 1
  %201 = bitcast i32 %200 to float
  %202 = bitcast float %201 to i32
  %203 = shl i32 %202, 4
  %204 = add i32 %203, 208
  %205 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %204)
  %206 = fmul float %205, %62
  %207 = shl i32 %202, 4
  %208 = add i32 %207, 212
  %209 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %208)
  %210 = fmul float %209, %62
  %211 = shl i32 %202, 4
  %212 = add i32 %211, 216
  %213 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %212)
  %214 = fmul float %213, %62
  %215 = shl i32 %202, 4
  %216 = add i32 %215, 220
  %217 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %216)
  %218 = fmul float %217, %62
  %219 = bitcast float %197 to i32
  %220 = shl i32 %219, 4
  %221 = add i32 %220, 208
  %222 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %221)
  %223 = fmul float %222, %63
  %224 = fadd float %223, %206
  %225 = shl i32 %219, 4
  %226 = add i32 %225, 212
  %227 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %226)
  %228 = fmul float %227, %63
  %229 = fadd float %228, %210
  %230 = shl i32 %219, 4
  %231 = add i32 %230, 216
  %232 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %231)
  %233 = fmul float %232, %63
  %234 = fadd float %233, %214
  %235 = shl i32 %219, 4
  %236 = add i32 %235, 220
  %237 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %236)
  %238 = fmul float %237, %63
  %239 = fadd float %238, %218
  %240 = bitcast float %193 to i32
  %241 = shl i32 %240, 4
  %242 = add i32 %241, 208
  %243 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %242)
  %244 = fmul float %243, %64
  %245 = fadd float %244, %224
  %246 = shl i32 %240, 4
  %247 = add i32 %246, 212
  %248 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %247)
  %249 = fmul float %248, %64
  %250 = fadd float %249, %229
  %251 = shl i32 %240, 4
  %252 = add i32 %251, 216
  %253 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %252)
  %254 = fmul float %253, %64
  %255 = fadd float %254, %234
  %256 = shl i32 %240, 4
  %257 = add i32 %256, 220
  %258 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %257)
  %259 = fmul float %258, %64
  %260 = fadd float %259, %239
  %261 = bitcast float %189 to i32
  %262 = shl i32 %261, 4
  %263 = add i32 %262, 208
  %264 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %263)
  %265 = fmul float %264, %65
  %266 = fadd float %265, %245
  %267 = shl i32 %261, 4
  %268 = add i32 %267, 212
  %269 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %268)
  %270 = fmul float %269, %65
  %271 = fadd float %270, %250
  %272 = shl i32 %261, 4
  %273 = add i32 %272, 216
  %274 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %273)
  %275 = fmul float %274, %65
  %276 = fadd float %275, %255
  %277 = shl i32 %261, 4
  %278 = add i32 %277, 220
  %279 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %278)
  %280 = fmul float %279, %65
  %281 = fadd float %280, %260
  %282 = fmul float %82, %266
  %283 = fmul float %84, %271
  %284 = fadd float %282, %283
  %285 = fmul float %86, %276
  %286 = fadd float %284, %285
  %287 = fmul float 1.000000e+00, %281
  %288 = fadd float %286, %287
  %289 = bitcast float %80 to i32
  %290 = mul i32 3, %289
  %291 = add i32 %290, 2
  %292 = bitcast i32 %291 to float
  %293 = bitcast float %79 to i32
  %294 = mul i32 3, %293
  %295 = add i32 %294, 2
  %296 = bitcast i32 %295 to float
  %297 = bitcast float %78 to i32
  %298 = mul i32 3, %297
  %299 = add i32 %298, 2
  %300 = bitcast i32 %299 to float
  %301 = bitcast float %77 to i32
  %302 = mul i32 3, %301
  %303 = add i32 %302, 2
  %304 = bitcast i32 %303 to float
  %305 = bitcast float %304 to i32
  %306 = shl i32 %305, 4
  %307 = add i32 %306, 208
  %308 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %307)
  %309 = fmul float %308, %62
  %310 = shl i32 %305, 4
  %311 = add i32 %310, 212
  %312 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %311)
  %313 = fmul float %312, %62
  %314 = shl i32 %305, 4
  %315 = add i32 %314, 216
  %316 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %315)
  %317 = fmul float %316, %62
  %318 = shl i32 %305, 4
  %319 = add i32 %318, 220
  %320 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %319)
  %321 = fmul float %320, %62
  %322 = bitcast float %300 to i32
  %323 = shl i32 %322, 4
  %324 = add i32 %323, 208
  %325 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %324)
  %326 = fmul float %325, %63
  %327 = fadd float %326, %309
  %328 = shl i32 %322, 4
  %329 = add i32 %328, 212
  %330 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %329)
  %331 = fmul float %330, %63
  %332 = fadd float %331, %313
  %333 = shl i32 %322, 4
  %334 = add i32 %333, 216
  %335 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %334)
  %336 = fmul float %335, %63
  %337 = fadd float %336, %317
  %338 = shl i32 %322, 4
  %339 = add i32 %338, 220
  %340 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %339)
  %341 = fmul float %340, %63
  %342 = fadd float %341, %321
  %343 = bitcast float %296 to i32
  %344 = shl i32 %343, 4
  %345 = add i32 %344, 208
  %346 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %345)
  %347 = fmul float %346, %64
  %348 = fadd float %347, %327
  %349 = shl i32 %343, 4
  %350 = add i32 %349, 212
  %351 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %350)
  %352 = fmul float %351, %64
  %353 = fadd float %352, %332
  %354 = shl i32 %343, 4
  %355 = add i32 %354, 216
  %356 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %355)
  %357 = fmul float %356, %64
  %358 = fadd float %357, %337
  %359 = shl i32 %343, 4
  %360 = add i32 %359, 220
  %361 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %360)
  %362 = fmul float %361, %64
  %363 = fadd float %362, %342
  %364 = bitcast float %292 to i32
  %365 = shl i32 %364, 4
  %366 = add i32 %365, 208
  %367 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %366)
  %368 = fmul float %367, %65
  %369 = fadd float %368, %348
  %370 = shl i32 %364, 4
  %371 = add i32 %370, 212
  %372 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %371)
  %373 = fmul float %372, %65
  %374 = fadd float %373, %353
  %375 = shl i32 %364, 4
  %376 = add i32 %375, 216
  %377 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %376)
  %378 = fmul float %377, %65
  %379 = fadd float %378, %358
  %380 = shl i32 %364, 4
  %381 = add i32 %380, 220
  %382 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %381)
  %383 = fmul float %382, %65
  %384 = fadd float %383, %363
  %385 = fmul float %82, %369
  %386 = fmul float %84, %374
  %387 = fadd float %385, %386
  %388 = fmul float %86, %379
  %389 = fadd float %387, %388
  %390 = fmul float 1.000000e+00, %384
  %391 = fadd float %389, %390
  %392 = fmul float %23, %185
  %393 = fmul float %24, %185
  %394 = fmul float %25, %185
  %395 = fmul float %26, %185
  %396 = fmul float %27, %288
  %397 = fadd float %396, %392
  %398 = fmul float %28, %288
  %399 = fadd float %398, %393
  %400 = fmul float %29, %288
  %401 = fadd float %400, %394
  %402 = fmul float %30, %288
  %403 = fadd float %402, %395
  %404 = fmul float %31, %391
  %405 = fadd float %404, %397
  %406 = fmul float %32, %391
  %407 = fadd float %406, %399
  %408 = fmul float %33, %391
  %409 = fadd float %408, %401
  %410 = fmul float %34, %391
  %411 = fadd float %410, %403
  %412 = fadd float %405, %35
  %413 = fadd float %407, %36
  %414 = fadd float %409, %37
  %415 = fadd float %411, %38
  %416 = fmul float %11, %185
  %417 = fmul float %12, %185
  %418 = fmul float %13, %185
  %419 = fmul float %14, %288
  %420 = fadd float %419, %416
  %421 = fmul float %15, %288
  %422 = fadd float %421, %417
  %423 = fmul float %16, %288
  %424 = fadd float %423, %418
  %425 = fmul float %17, %391
  %426 = fadd float %425, %420
  %427 = fmul float %18, %391
  %428 = fadd float %427, %422
  %429 = fmul float %19, %391
  %430 = fadd float %429, %424
  %431 = fadd float %426, %20
  %432 = fadd float %428, %21
  %433 = fadd float %430, %22
  %434 = fsub float -0.000000e+00, %50
  %435 = fadd float %431, %434
  %436 = fsub float -0.000000e+00, %51
  %437 = fadd float %432, %436
  %438 = fsub float -0.000000e+00, %52
  %439 = fadd float %433, %438
  %440 = fmul float %414, %48
  %441 = fadd float %440, %49
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %435, float %437, float %439, float %441)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %412, float %413, float %414, float %415)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020800
bf8c0770
7e021108
d2d60005
02010701
34180a84
4a0218ff
000000d0
e0301000
80000601
c0840704
bf8c0070
e00c2000
80020100
bf8c0770
100e0306
7e0c1109
d2d60006
02010706
341a0c84
4a1c1aff
000000d0
e0301000
80000e0e
bf8c0770
d282000f
041e050e
7e0e110a
d2d60007
02010707
341c0e84
4a201cff
000000d0
e0301000
80001010
bf8c0770
d2820010
043e0710
7e10110b
d2d60008
02010708
341e1084
4a121eff
000000d0
e0301000
80000909
bf8c0770
d282000b
04420909
4a1218ff
000000d4
e0301000
80000909
bf8c0770
10120309
4a141aff
000000d4
e0301000
80000a0a
bf8c0770
d2820009
0426050a
4a141cff
000000d4
e0301000
80000a0a
bf8c0770
d2820009
0426070a
4a141eff
000000d4
e0301000
80000a0a
bf8c0770
d282000a
0426090a
c0820700
bf8c007f
e00c2000
80011000
c2020129
c2028125
bf8c0070
7e000205
d2820009
04000911
10001509
c2020128
c2028124
bf8c007f
7e140205
d282000a
04280910
d2820000
0402170a
4a1618ff
000000d8
e0301000
80000b0b
bf8c0770
1016030b
4a281aff
000000d8
e0301000
80001414
bf8c0770
d282000b
042e0514
4a281cff
000000d8
e0301000
80001414
bf8c0770
d282000b
042e0714
4a281eff
000000d8
e0301000
80001414
bf8c0770
d2820014
042e0914
c202012a
c2028126
bf8c007f
7e160205
d282000b
042c0912
d2820000
0402290b
4a1818ff
000000dc
e0301000
80000c0c
bf8c0770
1018030c
4a1a1aff
000000dc
e0301000
80000d0d
bf8c0770
d282000c
0432050d
4a1a1cff
000000dc
e0301000
80000d0d
bf8c0770
d282000c
0432070d
4a1a1eff
000000dc
e0301000
80000d0d
bf8c0770
d282000c
0432090d
06001900
c2020112
bf8c007f
101a0004
4a180a81
34181884
4a1c18ff
000000d0
e0301000
80000e0e
bf8c0770
101e030e
4a1c0c81
341c1c84
4a201cff
000000d0
e0301000
80001010
bf8c0770
d2820010
043e0510
4a1e0e81
341e1e84
4a221eff
000000d0
e0301000
80001111
bf8c0770
d2820011
04420711
4a201081
34202084
4a2420ff
000000d0
e0301000
80001212
bf8c0770
d2820011
04460912
4a2418ff
000000d4
e0301000
80001212
bf8c0770
10240312
4a261cff
000000d4
e0301000
80001313
bf8c0770
d2820012
044a0513
4a261eff
000000d4
e0301000
80001313
bf8c0770
d2820012
044a0713
4a2620ff
000000d4
e0301000
80001313
bf8c0770
d2820012
044a0913
10242509
d2820011
044a230a
4a2418ff
000000d8
e0301000
80001212
bf8c0770
10240312
4a261cff
000000d8
e0301000
80001313
bf8c0770
d2820012
044a0513
4a261eff
000000d8
e0301000
80001313
bf8c0770
d2820012
044a0713
4a2620ff
000000d8
e0301000
80001313
bf8c0770
d2820012
044a0913
d2820011
0446250b
4a1818ff
000000dc
e0301000
80000c0c
bf8c0770
1018030c
4a1c1cff
000000dc
e0301000
80000e0e
bf8c0770
d282000c
0432050e
4a1c1eff
000000dc
e0301000
80000e0e
bf8c0770
d282000c
0432070e
4a1c20ff
000000dc
e0301000
80000e0e
bf8c0770
d282000c
0432090e
06181911
c2020116
bf8c007f
d282000d
04361804
4a0a0a82
340a0a84
4a1c0aff
000000d0
e0301000
80000e0e
bf8c0770
101c030e
4a0c0c82
340c0c84
4a1e0cff
000000d0
e0301000
80000f0f
bf8c0770
d282000e
043a050f
4a0e0e82
340e0e84
4a1e0eff
000000d0
e0301000
80000f0f
bf8c0770
d282000e
043a070f
4a101082
34101084
4a1e10ff
000000d0
e0301000
80000f0f
bf8c0770
d282000e
043a090f
4a1e0aff
000000d4
e0301000
80000f0f
bf8c0770
101e030f
4a200cff
000000d4
e0301000
80001010
bf8c0770
d282000f
043e0510
4a200eff
000000d4
e0301000
80001010
bf8c0770
d282000f
043e0710
4a2010ff
000000d4
e0301000
80001010
bf8c0770
d282000f
043e0910
10121f09
d2820009
04261d0a
4a140aff
000000d8
e0301000
80000a0a
bf8c0770
1014030a
4a1c0cff
000000d8
e0301000
80000e0e
bf8c0770
d282000a
042a050e
4a1c0eff
000000d8
e0301000
80000e0e
bf8c0770
d282000a
042a070e
4a1c10ff
000000d8
e0301000
80000e0e
bf8c0770
d282000a
042a090e
d2820009
0426150b
4a0a0aff
000000dc
e0301000
80000505
bf8c0770
100a0305
4a0c0cff
000000dc
e0301000
80000606
bf8c0770
d2820005
04160506
4a0c0eff
000000dc
e0301000
80000606
bf8c0770
d2820005
04160706
4a0c10ff
000000dc
e0301000
80000606
bf8c0770
d2820001
04160906
06020309
c202011a
bf8c007f
d2820002
04360204
c202011e
bf8c007f
06040404
c202012c
c202812d
bf8c007f
7e060205
d2820003
040c0902
c2020102
bf8c007f
10080004
c2020106
bf8c007f
d2820004
04121804
c202010a
bf8c007f
d2820004
04120204
c202010e
bf8c007f
06080804
c2020132
bf8c007f
0a080804
c2020101
bf8c007f
100a0004
c2020105
bf8c007f
d2820005
04161804
c2020109
bf8c007f
d2820005
04160204
c202010d
bf8c007f
060a0a04
c2020131
bf8c007f
0a0a0a04
c2020100
bf8c007f
100c0004
c2020104
bf8c007f
d2820006
041a1804
c2020108
bf8c007f
d2820006
041a0204
c202010c
bf8c007f
060c0c04
c2020130
bf8c007f
0a0c0c04
f800021f
03040506
c2020113
bf8c000f
10060004
c2020117
bf8c007f
d2820003
040e1804
c202011b
bf8c007f
d2820003
040e0204
c202011f
bf8c007f
06060604
c2020111
bf8c007f
10080004
c2020115
bf8c007f
d2820004
04121804
c2020119
bf8c007f
d2820004
04120204
c202011d
bf8c007f
06080804
c2020110
bf8c007f
10000004
c2020114
bf8c007f
d2820000
04021804
c2020118
bf8c007f
d2820000
04020204
c200011c
bf8c007f
06000000
f80008cf
03020400
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL CONST[11..12]
DCL CONST[4..10]
DCL TEMP[0..1]
DCL TEMP[2..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[5].xxxx
  4: MOV TEMP[2].xy, IN[4].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[4].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].xy, IN[4].xyyy
 13: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 14: MAD TEMP[5].yw, IMM[0].zzzz, TEMP[5], IMM[0].xxxx
 15: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[2].xyzz
 16: RSQ TEMP[6].x, TEMP[6].xxxx
 17: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 18: DP2 TEMP[6].x, TEMP[5].ywww, TEMP[5].ywww
 19: ADD TEMP[6].x, IMM[0].yyyy, -TEMP[6].xxxx
 20: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
 21: RSQ TEMP[7].x, TEMP[6].xxxx
 22: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[6].xxxx
 23: CMP TEMP[7].x, -TEMP[6].xxxx, TEMP[7].xxxx, IMM[0].wwww
 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 25: DP3 TEMP[6].x, IN[5].yzww, IN[5].yzww
 26: RSQ TEMP[6].x, TEMP[6].xxxx
 27: MUL TEMP[6].xyz, IN[5].yzww, TEMP[6].xxxx
 28: DP3 TEMP[7].x, IN[6].xyzz, IN[6].xyzz
 29: RSQ TEMP[7].x, TEMP[7].xxxx
 30: MUL TEMP[7].xyz, IN[6].xyzz, TEMP[7].xxxx
 31: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].wwww
 32: MAD TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].yyyy, TEMP[7].xyzz
 33: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[5].xyzz
 34: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 35: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 36: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 37: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 38: MOV TEMP[2].xyz, TEMP[2].xyzz
 39: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 40: DP4 TEMP[3].x, TEMP[4], CONST[9]
 41: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[7].yyyy
 42: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 43: MUL TEMP[3], TEMP[4], TEMP[3]
 44: MUL TEMP[5].xy, TEMP[0].xyyy, CONST[4].xyyy
 45: MOV TEMP[5].xy, TEMP[5].xyyy
 46: TEX TEMP[5], TEMP[5], SAMP[3], 2D
 47: DP4 TEMP[6].x, TEMP[4], CONST[8]
 48: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[7].xxxx
 49: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].wwww
 50: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[1].xyzz
 51: MAX TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
 52: RCP TEMP[7].x, TEMP[7].xxxx
 53: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 54: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xyzz
 55: MAD TEMP[5].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[5].xyzz
 56: MUL TEMP[3].xyz, TEMP[5].xyzz, IMM[2].xxxx
 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 58: DP4 TEMP[4].x, TEMP[4], CONST[10]
 59: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[7].zzzz
 60: MAD TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[3].xyzz
 61: MAX TEMP[2].x, IN[3].wwww, CONST[5].wwww
 62: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 63: LRP TEMP[3].xyz, TEMP[2].xxxx, TEMP[3].xyzz, CONST[5].xyzz
 64: MOV OUT[0], TEMP[3]
 65: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %58 = load <32 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = fcmp ugt float %16, 0.000000e+00
  %62 = select i1 %61, float 1.000000e+00, float 0.000000e+00
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %65 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %66 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %67 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %68 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %82 = fmul float %13, %43
  %83 = fadd float %82, %44
  %84 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
  %85 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %87 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %88 = bitcast float %84 to i32
  %89 = icmp ne i32 %88, 0
  %. = select i1 %89, float -1.000000e+00, float 1.000000e+00
  %90 = bitcast float %71 to i32
  %91 = bitcast float %72 to i32
  %92 = insertelement <2 x i32> undef, i32 %90, i32 0
  %93 = insertelement <2 x i32> %92, i32 %91, i32 1
  %94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %46, <16 x i8> %48, i32 2)
  %95 = extractelement <4 x float> %94, i32 0
  %96 = extractelement <4 x float> %94, i32 1
  %97 = extractelement <4 x float> %94, i32 2
  %98 = extractelement <4 x float> %94, i32 3
  %99 = bitcast float %71 to i32
  %100 = bitcast float %72 to i32
  %101 = insertelement <2 x i32> undef, i32 %99, i32 0
  %102 = insertelement <2 x i32> %101, i32 %100, i32 1
  %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %50, <16 x i8> %52, i32 2)
  %104 = extractelement <4 x float> %103, i32 1
  %105 = extractelement <4 x float> %103, i32 3
  %106 = fmul float 2.000000e+00, %104
  %107 = fadd float %106, -1.000000e+00
  %108 = fmul float 2.000000e+00, %105
  %109 = fadd float %108, -1.000000e+00
  %110 = fmul float %73, %73
  %111 = fmul float %74, %74
  %112 = fadd float %111, %110
  %113 = fmul float %75, %75
  %114 = fadd float %112, %113
  %115 = call float @llvm.AMDGPU.rsq(float %114)
  %116 = fmul float %73, %115
  %117 = fmul float %74, %115
  %118 = fmul float %75, %115
  %119 = fmul float %107, %107
  %120 = fmul float %109, %109
  %121 = fadd float %119, %120
  %122 = fsub float -0.000000e+00, %121
  %123 = fadd float 1.000000e+00, %122
  %124 = fcmp uge float 0.000000e+00, %123
  %125 = select i1 %124, float 0.000000e+00, float %123
  %126 = call float @llvm.AMDGPU.rsq(float %125)
  %127 = fmul float %126, %125
  %128 = fsub float -0.000000e+00, %125
  %129 = call float @llvm.AMDGPU.cndlt(float %128, float %127, float 0.000000e+00)
  %130 = fmul float %116, %129
  %131 = fmul float %117, %129
  %132 = fmul float %118, %129
  %133 = fmul float %76, %76
  %134 = fmul float %77, %77
  %135 = fadd float %134, %133
  %136 = fmul float %78, %78
  %137 = fadd float %135, %136
  %138 = call float @llvm.AMDGPU.rsq(float %137)
  %139 = fmul float %76, %138
  %140 = fmul float %77, %138
  %141 = fmul float %78, %138
  %142 = fmul float %79, %79
  %143 = fmul float %80, %80
  %144 = fadd float %143, %142
  %145 = fmul float %81, %81
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %79, %147
  %149 = fmul float %80, %147
  %150 = fmul float %81, %147
  %151 = fmul float %148, %109
  %152 = fmul float %149, %109
  %153 = fmul float %150, %109
  %154 = fmul float %139, %107
  %155 = fadd float %154, %151
  %156 = fmul float %140, %107
  %157 = fadd float %156, %152
  %158 = fmul float %141, %107
  %159 = fadd float %158, %153
  %160 = fmul float %130, %.
  %161 = fadd float %160, %155
  %162 = fmul float %131, %.
  %163 = fadd float %162, %157
  %164 = fmul float %132, %.
  %165 = fadd float %164, %159
  %166 = fmul float %161, %67
  %167 = fmul float %163, %68
  %168 = fadd float %167, %166
  %169 = fmul float %165, %69
  %170 = fadd float %168, %169
  %171 = fmul float %170, %161
  %172 = fmul float %170, %163
  %173 = fmul float %170, %165
  %174 = fmul float 2.000000e+00, %171
  %175 = fmul float 2.000000e+00, %172
  %176 = fmul float 2.000000e+00, %173
  %177 = fsub float -0.000000e+00, %174
  %178 = fadd float %67, %177
  %179 = fsub float -0.000000e+00, %175
  %180 = fadd float %68, %179
  %181 = fsub float -0.000000e+00, %176
  %182 = fadd float %69, %181
  %183 = insertelement <4 x float> undef, float %178, i32 0
  %184 = insertelement <4 x float> %183, float %180, i32 1
  %185 = insertelement <4 x float> %184, float %182, i32 2
  %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3
  %187 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %186)
  %188 = extractelement <4 x float> %187, i32 0
  %189 = extractelement <4 x float> %187, i32 1
  %190 = extractelement <4 x float> %187, i32 2
  %191 = extractelement <4 x float> %187, i32 3
  %192 = call float @fabs(float %190)
  %193 = fdiv float 1.000000e+00, %192
  %194 = fmul float %188, %193
  %195 = fadd float %194, 1.500000e+00
  %196 = fmul float %189, %193
  %197 = fadd float %196, 1.500000e+00
  %198 = bitcast float %197 to i32
  %199 = bitcast float %195 to i32
  %200 = bitcast float %191 to i32
  %201 = insertelement <4 x i32> undef, i32 %198, i32 0
  %202 = insertelement <4 x i32> %201, i32 %199, i32 1
  %203 = insertelement <4 x i32> %202, i32 %200, i32 2
  %204 = insertelement <4 x i32> %203, i32 undef, i32 3
  %205 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %204, <32 x i8> %54, <16 x i8> %56, i32 4)
  %206 = extractelement <4 x float> %205, i32 0
  %207 = extractelement <4 x float> %205, i32 1
  %208 = extractelement <4 x float> %205, i32 2
  %209 = extractelement <4 x float> %205, i32 3
  %210 = fmul float %95, %35
  %211 = fmul float %96, %36
  %212 = fadd float %210, %211
  %213 = fmul float %97, %37
  %214 = fadd float %212, %213
  %215 = fmul float %98, %38
  %216 = fadd float %214, %215
  %217 = fadd float %216, %29
  %218 = call float @llvm.AMDIL.clamp.(float %217, float 0.000000e+00, float 1.000000e+00)
  %219 = call float @llvm.AMDGPU.lrp(float %218, float %63, float 1.000000e+00)
  %220 = call float @llvm.AMDGPU.lrp(float %218, float %64, float 1.000000e+00)
  %221 = call float @llvm.AMDGPU.lrp(float %218, float %65, float 1.000000e+00)
  %222 = call float @llvm.AMDGPU.lrp(float %218, float %66, float 1.000000e+00)
  %223 = fmul float %95, %219
  %224 = fmul float %96, %220
  %225 = fmul float %97, %221
  %226 = fmul float %98, %222
  %227 = fmul float %12, %22
  %228 = fmul float %83, %23
  %229 = bitcast float %227 to i32
  %230 = bitcast float %228 to i32
  %231 = insertelement <2 x i32> undef, i32 %229, i32 0
  %232 = insertelement <2 x i32> %231, i32 %230, i32 1
  %233 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %232, <32 x i8> %58, <16 x i8> %60, i32 2)
  %234 = extractelement <4 x float> %233, i32 0
  %235 = extractelement <4 x float> %233, i32 1
  %236 = extractelement <4 x float> %233, i32 2
  %237 = extractelement <4 x float> %233, i32 3
  %238 = fmul float %95, %31
  %239 = fmul float %96, %32
  %240 = fadd float %238, %239
  %241 = fmul float %97, %33
  %242 = fadd float %240, %241
  %243 = fmul float %98, %34
  %244 = fadd float %242, %243
  %245 = fadd float %244, %28
  %246 = call float @llvm.AMDIL.clamp.(float %245, float 0.000000e+00, float 1.000000e+00)
  %247 = fmul float %246, %237
  %248 = fmul float %234, 0x3FCB367A00000000
  %249 = fmul float %235, 0x3FE6E2EB20000000
  %250 = fadd float %249, %248
  %251 = fmul float %236, 0x3FB27BB300000000
  %252 = fadd float %250, %251
  %253 = fcmp uge float %252, 0x3F50624DE0000000
  %254 = select i1 %253, float %252, float 0x3F50624DE0000000
  %255 = fdiv float 1.000000e+00, %254
  %256 = fmul float %234, %255
  %257 = fmul float %235, %255
  %258 = fmul float %236, %255
  %259 = fmul float %223, %234
  %260 = fmul float %224, %235
  %261 = fmul float %225, %236
  %262 = fmul float %247, %256
  %263 = fadd float %262, %259
  %264 = fmul float %247, %257
  %265 = fadd float %264, %260
  %266 = fmul float %247, %258
  %267 = fadd float %266, %261
  %268 = fmul float %263, 4.000000e+00
  %269 = fmul float %265, 4.000000e+00
  %270 = fmul float %267, 4.000000e+00
  %271 = fmul float %206, %209
  %272 = fmul float %207, %209
  %273 = fmul float %208, %209
  %274 = fmul float %95, %39
  %275 = fmul float %96, %40
  %276 = fadd float %274, %275
  %277 = fmul float %97, %41
  %278 = fadd float %276, %277
  %279 = fmul float %98, %42
  %280 = fadd float %278, %279
  %281 = fadd float %280, %30
  %282 = call float @llvm.AMDIL.clamp.(float %281, float 0.000000e+00, float 1.000000e+00)
  %283 = fmul float %271, %282
  %284 = fadd float %283, %268
  %285 = fmul float %272, %282
  %286 = fadd float %285, %269
  %287 = fmul float %273, %282
  %288 = fadd float %287, %270
  %289 = fcmp uge float %70, %27
  %290 = select i1 %289, float %70, float %27
  %291 = call float @llvm.AMDIL.clamp.(float %290, float 0.000000e+00, float 1.000000e+00)
  %292 = call float @llvm.AMDGPU.lrp(float %291, float %284, float %24)
  %293 = call float @llvm.AMDGPU.lrp(float %291, float %286, float %25)
  %294 = call float @llvm.AMDGPU.lrp(float %291, float %288, float %26)
  %295 = call i32 @llvm.SI.packf16(float %292, float %293)
  %296 = bitcast i32 %295 to float
  %297 = call i32 @llvm.SI.packf16(float %294, float %226)
  %298 = bitcast i32 %297 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %296, float %298, float %296, float %298)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c8140800
c8150801
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340b00
c8350b01
c8300a00
c8310a01
1014190c
d282000b
042a1b0d
c8280c00
c8290c01
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441100
c8451101
c8401000
c8411001
101a2110
d282000e
04362311
c8341200
c8351201
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8540e00
c8550e01
c8500d00
c8510d01
10222914
d2820012
04462b15
c8440f00
c8450f01
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0400
c83d0401
10281f0c
c8400500
c8410501
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200600
c8210601
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430808
bf8c0770
10241709
c0840300
c0c60500
bf8c007f
f0800f00
00430405
c0840100
bf8c0070
c2000925
bf8c007f
10180a00
c2000924
bf8c007f
d282000c
04300104
c2000926
bf8c007f
d282000c
04300106
c2000927
bf8c007f
d282000c
04300107
c200091d
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8380100
c8390101
d282000e
04361d0c
10261d05
c2000930
c2008931
bf8c007f
7e1c0201
d2820003
04380103
c2000911
bf8c007f
101e0600
c2000910
bf8c007f
101c0400
c080030c
c0c60518
bf8c007f
f0800f00
00030e0e
bf8c0770
10261f13
10041cff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070f
7e0602ff
3d93dd98
d2820002
040a0710
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
1028050f
c2000921
bf8c007f
10060a00
c2000920
bf8c007f
d2820003
040c0104
c2000922
bf8c007f
d2820003
040c0106
c2000923
bf8c007f
d2820003
040c0107
c200091c
bf8c007f
06060600
d2060803
02010103
10062303
d2820013
044e2903
102826f6
c2000929
bf8c007f
10260a00
c2000928
bf8c007f
d2820013
044c0104
c200092a
bf8c007f
d2820013
044c0106
c200092b
bf8c007f
d2820013
044c0107
c200091e
bf8c007f
06262600
d2060813
02010113
d2820015
04522712
c8480700
c8490701
c2000917
bf8c007f
d00c0002
02000112
7e280200
d2000012
000a2514
d2060812
02010112
082824f2
c2000915
bf8c007f
102c2800
d2820015
045a2b12
102c1708
c85c0000
c85d0001
d2820017
04362f0c
102e2f04
102e1d17
1030050e
d2820017
045e3103
102e2ef6
d2820016
045e2716
c2000914
bf8c007f
102e2800
d2820016
045e2d12
5e2a2b16
1010170a
c8240200
c8250201
d2820009
0436130c
10121306
10122109
10040510
d2820002
04260503
100404f6
d2820002
040a2708
c2000916
bf8c007f
10062800
d2820002
040e0512
c80c0300
c80d0301
d2820000
0436070c
10000107
5e000102
f8001c0f
00150015
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL CONST[0..206]
DCL TEMP[0..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: MOV TEMP[3].w, IMM[0].xxxx
  6: MOV TEMP[3].xyz, IN[5].xyzx
  7: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww
  8: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz
  9: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy
 10: UMUL TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx
 11: UARL ADDR[0].x, TEMP[7].xxxx
 12: MUL TEMP[7], CONST[ADDR[0].x+15], IN[1].xxxx
 13: UARL ADDR[0].x, TEMP[6].xxxx
 14: MAD TEMP[6], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[7]
 15: UARL ADDR[0].x, TEMP[5].xxxx
 16: MAD TEMP[5], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[6]
 17: UARL ADDR[0].x, TEMP[4].xxxx
 18: UARL ADDR[0].x, TEMP[4].xxxx
 19: MAD TEMP[4], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[5]
 20: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 21: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 22: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 23: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 24: UARL ADDR[0].x, TEMP[8].xxxx
 25: MUL TEMP[8], CONST[ADDR[0].x+15], IN[1].xxxx
 26: UARL ADDR[0].x, TEMP[7].xxxx
 27: MAD TEMP[7], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[8]
 28: UARL ADDR[0].x, TEMP[6].xxxx
 29: MAD TEMP[6], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[7]
 30: UARL ADDR[0].x, TEMP[5].xxxx
 31: UARL ADDR[0].x, TEMP[5].xxxx
 32: MAD TEMP[5], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[6]
 33: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 34: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 35: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 36: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 37: UARL ADDR[0].x, TEMP[0].xxxx
 38: MUL TEMP[0], CONST[ADDR[0].x+15], IN[1].xxxx
 39: UARL ADDR[0].x, TEMP[8].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[0]
 41: UARL ADDR[0].x, TEMP[7].xxxx
 42: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[0]
 43: UARL ADDR[0].x, TEMP[6].xxxx
 44: UARL ADDR[0].x, TEMP[6].xxxx
 45: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[0]
 46: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 47: DP4 TEMP[7].x, TEMP[1], TEMP[5]
 48: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 49: DP4 TEMP[8].x, TEMP[2], TEMP[4]
 50: DP4 TEMP[9].x, TEMP[2], TEMP[5]
 51: MOV TEMP[8].y, TEMP[9].xxxx
 52: DP4 TEMP[2].x, TEMP[2], TEMP[0]
 53: MOV TEMP[8].z, TEMP[2].xxxx
 54: DP4 TEMP[2].x, TEMP[3], TEMP[4]
 55: DP4 TEMP[4].x, TEMP[3], TEMP[5]
 56: MOV TEMP[2].y, TEMP[4].xxxx
 57: DP4 TEMP[0].x, TEMP[3], TEMP[0]
 58: MOV TEMP[2].z, TEMP[0].xxxx
 59: MUL TEMP[0], CONST[4], TEMP[6].xxxx
 60: MAD TEMP[0], CONST[5], TEMP[7].xxxx, TEMP[0]
 61: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 62: ADD TEMP[0], TEMP[0], CONST[7]
 63: MOV TEMP[3].w, IMM[0].xxxx
 64: MOV TEMP[3].xyz, CONST[8].xyzx
 65: MUL TEMP[4].xyz, TEMP[8].xyzz, CONST[10].wwww
 66: MUL TEMP[5], CONST[0], TEMP[4].xxxx
 67: MAD TEMP[5], CONST[1], TEMP[4].yyyy, TEMP[5]
 68: MAD TEMP[4].xyz, CONST[2], TEMP[4].zzzz, TEMP[5]
 69: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
 70: RSQ TEMP[5].x, TEMP[5].xxxx
 71: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
 72: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[10].wwww
 73: MUL TEMP[5], CONST[0], TEMP[2].xxxx
 74: MAD TEMP[5], CONST[1], TEMP[2].yyyy, TEMP[5]
 75: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[5]
 76: MUL TEMP[5], CONST[0], TEMP[6].xxxx
 77: MAD TEMP[5], CONST[1], TEMP[7].xxxx, TEMP[5]
 78: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[5]
 79: ADD TEMP[1].xyz, TEMP[1], CONST[3]
 80: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[14].xyzz
 81: MAD TEMP[5].x, TEMP[0].zzzz, CONST[13].xxxx, CONST[13].yyyy
 82: MOV TEMP[1].w, TEMP[5].xxxx
 83: MAD TEMP[5].xy, IN[4].xyyy, CONST[9].xyyy, CONST[9].zwww
 84: MOV TEMP[5].zw, TEMP[4].yyxy
 85: MOV TEMP[6].x, TEMP[4].zzzz
 86: MUL TEMP[7].xyz, TEMP[2].zxyy, TEMP[4].yzxx
 87: MAD TEMP[4].xyz, TEMP[2].yzxx, TEMP[4].zxyy, -TEMP[7].xyzz
 88: MOV TEMP[6].yzw, TEMP[4].yxyz
 89: MOV TEMP[2].xyz, TEMP[2].xyzx
 90: MOV OUT[1], TEMP[3]
 91: MOV OUT[3], TEMP[5]
 92: MOV OUT[5], TEMP[2]
 93: MOV OUT[4], TEMP[6]
 94: MOV OUT[2], TEMP[1]
 95: MOV OUT[0], TEMP[0]
 96: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = extractelement <4 x float> %66, i32 3
  %71 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %5)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = extractelement <4 x float> %73, i32 2
  %77 = extractelement <4 x float> %73, i32 3
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0
  %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %5)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %90 = load <16 x i8> addrspace(2)* %89, !tbaa !0
  %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %5)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = extractelement <4 x float> %91, i32 1
  %94 = extractelement <4 x float> %91, i32 2
  %95 = fptosi float %74 to i32
  %96 = fptosi float %75 to i32
  %97 = fptosi float %76 to i32
  %98 = fptosi float %77 to i32
  %99 = bitcast i32 %95 to float
  %100 = bitcast i32 %96 to float
  %101 = bitcast i32 %97 to float
  %102 = bitcast i32 %98 to float
  %103 = fmul float %61, %50
  %104 = fadd float %103, %47
  %105 = fmul float %62, %51
  %106 = fadd float %105, %48
  %107 = fmul float %63, %52
  %108 = fadd float %107, %49
  %109 = bitcast float %102 to i32
  %110 = mul i32 3, %109
  %111 = bitcast i32 %110 to float
  %112 = bitcast float %101 to i32
  %113 = mul i32 3, %112
  %114 = bitcast i32 %113 to float
  %115 = bitcast float %100 to i32
  %116 = mul i32 3, %115
  %117 = bitcast i32 %116 to float
  %118 = bitcast float %99 to i32
  %119 = mul i32 3, %118
  %120 = bitcast i32 %119 to float
  %121 = bitcast float %120 to i32
  %122 = shl i32 %121, 4
  %123 = add i32 %122, 240
  %124 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %123)
  %125 = fmul float %124, %67
  %126 = shl i32 %121, 4
  %127 = add i32 %126, 244
  %128 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %127)
  %129 = fmul float %128, %67
  %130 = shl i32 %121, 4
  %131 = add i32 %130, 248
  %132 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %131)
  %133 = fmul float %132, %67
  %134 = shl i32 %121, 4
  %135 = add i32 %134, 252
  %136 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %135)
  %137 = fmul float %136, %67
  %138 = bitcast float %117 to i32
  %139 = shl i32 %138, 4
  %140 = add i32 %139, 240
  %141 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %140)
  %142 = fmul float %141, %68
  %143 = fadd float %142, %125
  %144 = shl i32 %138, 4
  %145 = add i32 %144, 244
  %146 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %145)
  %147 = fmul float %146, %68
  %148 = fadd float %147, %129
  %149 = shl i32 %138, 4
  %150 = add i32 %149, 248
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %68
  %153 = fadd float %152, %133
  %154 = shl i32 %138, 4
  %155 = add i32 %154, 252
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %68
  %158 = fadd float %157, %137
  %159 = bitcast float %114 to i32
  %160 = shl i32 %159, 4
  %161 = add i32 %160, 240
  %162 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %161)
  %163 = fmul float %162, %69
  %164 = fadd float %163, %143
  %165 = shl i32 %159, 4
  %166 = add i32 %165, 244
  %167 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %166)
  %168 = fmul float %167, %69
  %169 = fadd float %168, %148
  %170 = shl i32 %159, 4
  %171 = add i32 %170, 248
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %69
  %174 = fadd float %173, %153
  %175 = shl i32 %159, 4
  %176 = add i32 %175, 252
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %69
  %179 = fadd float %178, %158
  %180 = bitcast float %111 to i32
  %181 = shl i32 %180, 4
  %182 = add i32 %181, 240
  %183 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %182)
  %184 = fmul float %183, %70
  %185 = fadd float %184, %164
  %186 = shl i32 %180, 4
  %187 = add i32 %186, 244
  %188 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %187)
  %189 = fmul float %188, %70
  %190 = fadd float %189, %169
  %191 = shl i32 %180, 4
  %192 = add i32 %191, 248
  %193 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %192)
  %194 = fmul float %193, %70
  %195 = fadd float %194, %174
  %196 = shl i32 %180, 4
  %197 = add i32 %196, 252
  %198 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %197)
  %199 = fmul float %198, %70
  %200 = fadd float %199, %179
  %201 = bitcast float %102 to i32
  %202 = mul i32 3, %201
  %203 = add i32 %202, 1
  %204 = bitcast i32 %203 to float
  %205 = bitcast float %101 to i32
  %206 = mul i32 3, %205
  %207 = add i32 %206, 1
  %208 = bitcast i32 %207 to float
  %209 = bitcast float %100 to i32
  %210 = mul i32 3, %209
  %211 = add i32 %210, 1
  %212 = bitcast i32 %211 to float
  %213 = bitcast float %99 to i32
  %214 = mul i32 3, %213
  %215 = add i32 %214, 1
  %216 = bitcast i32 %215 to float
  %217 = bitcast float %216 to i32
  %218 = shl i32 %217, 4
  %219 = add i32 %218, 240
  %220 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %219)
  %221 = fmul float %220, %67
  %222 = shl i32 %217, 4
  %223 = add i32 %222, 244
  %224 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %223)
  %225 = fmul float %224, %67
  %226 = shl i32 %217, 4
  %227 = add i32 %226, 248
  %228 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %227)
  %229 = fmul float %228, %67
  %230 = shl i32 %217, 4
  %231 = add i32 %230, 252
  %232 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %231)
  %233 = fmul float %232, %67
  %234 = bitcast float %212 to i32
  %235 = shl i32 %234, 4
  %236 = add i32 %235, 240
  %237 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %236)
  %238 = fmul float %237, %68
  %239 = fadd float %238, %221
  %240 = shl i32 %234, 4
  %241 = add i32 %240, 244
  %242 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %241)
  %243 = fmul float %242, %68
  %244 = fadd float %243, %225
  %245 = shl i32 %234, 4
  %246 = add i32 %245, 248
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %68
  %249 = fadd float %248, %229
  %250 = shl i32 %234, 4
  %251 = add i32 %250, 252
  %252 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %251)
  %253 = fmul float %252, %68
  %254 = fadd float %253, %233
  %255 = bitcast float %208 to i32
  %256 = shl i32 %255, 4
  %257 = add i32 %256, 240
  %258 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %257)
  %259 = fmul float %258, %69
  %260 = fadd float %259, %239
  %261 = shl i32 %255, 4
  %262 = add i32 %261, 244
  %263 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %262)
  %264 = fmul float %263, %69
  %265 = fadd float %264, %244
  %266 = shl i32 %255, 4
  %267 = add i32 %266, 248
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %69
  %270 = fadd float %269, %249
  %271 = shl i32 %255, 4
  %272 = add i32 %271, 252
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = fmul float %273, %69
  %275 = fadd float %274, %254
  %276 = bitcast float %204 to i32
  %277 = shl i32 %276, 4
  %278 = add i32 %277, 240
  %279 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %278)
  %280 = fmul float %279, %70
  %281 = fadd float %280, %260
  %282 = shl i32 %276, 4
  %283 = add i32 %282, 244
  %284 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %283)
  %285 = fmul float %284, %70
  %286 = fadd float %285, %265
  %287 = shl i32 %276, 4
  %288 = add i32 %287, 248
  %289 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %288)
  %290 = fmul float %289, %70
  %291 = fadd float %290, %270
  %292 = shl i32 %276, 4
  %293 = add i32 %292, 252
  %294 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %293)
  %295 = fmul float %294, %70
  %296 = fadd float %295, %275
  %297 = bitcast float %102 to i32
  %298 = mul i32 3, %297
  %299 = add i32 %298, 2
  %300 = bitcast i32 %299 to float
  %301 = bitcast float %101 to i32
  %302 = mul i32 3, %301
  %303 = add i32 %302, 2
  %304 = bitcast i32 %303 to float
  %305 = bitcast float %100 to i32
  %306 = mul i32 3, %305
  %307 = add i32 %306, 2
  %308 = bitcast i32 %307 to float
  %309 = bitcast float %99 to i32
  %310 = mul i32 3, %309
  %311 = add i32 %310, 2
  %312 = bitcast i32 %311 to float
  %313 = bitcast float %312 to i32
  %314 = shl i32 %313, 4
  %315 = add i32 %314, 240
  %316 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %315)
  %317 = fmul float %316, %67
  %318 = shl i32 %313, 4
  %319 = add i32 %318, 244
  %320 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %319)
  %321 = fmul float %320, %67
  %322 = shl i32 %313, 4
  %323 = add i32 %322, 248
  %324 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %323)
  %325 = fmul float %324, %67
  %326 = shl i32 %313, 4
  %327 = add i32 %326, 252
  %328 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %327)
  %329 = fmul float %328, %67
  %330 = bitcast float %308 to i32
  %331 = shl i32 %330, 4
  %332 = add i32 %331, 240
  %333 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %332)
  %334 = fmul float %333, %68
  %335 = fadd float %334, %317
  %336 = shl i32 %330, 4
  %337 = add i32 %336, 244
  %338 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %337)
  %339 = fmul float %338, %68
  %340 = fadd float %339, %321
  %341 = shl i32 %330, 4
  %342 = add i32 %341, 248
  %343 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %342)
  %344 = fmul float %343, %68
  %345 = fadd float %344, %325
  %346 = shl i32 %330, 4
  %347 = add i32 %346, 252
  %348 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %347)
  %349 = fmul float %348, %68
  %350 = fadd float %349, %329
  %351 = bitcast float %304 to i32
  %352 = shl i32 %351, 4
  %353 = add i32 %352, 240
  %354 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %353)
  %355 = fmul float %354, %69
  %356 = fadd float %355, %335
  %357 = shl i32 %351, 4
  %358 = add i32 %357, 244
  %359 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %358)
  %360 = fmul float %359, %69
  %361 = fadd float %360, %340
  %362 = shl i32 %351, 4
  %363 = add i32 %362, 248
  %364 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %363)
  %365 = fmul float %364, %69
  %366 = fadd float %365, %345
  %367 = shl i32 %351, 4
  %368 = add i32 %367, 252
  %369 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %368)
  %370 = fmul float %369, %69
  %371 = fadd float %370, %350
  %372 = bitcast float %300 to i32
  %373 = shl i32 %372, 4
  %374 = add i32 %373, 240
  %375 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %374)
  %376 = fmul float %375, %70
  %377 = fadd float %376, %356
  %378 = shl i32 %372, 4
  %379 = add i32 %378, 244
  %380 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %379)
  %381 = fmul float %380, %70
  %382 = fadd float %381, %361
  %383 = shl i32 %372, 4
  %384 = add i32 %383, 248
  %385 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %384)
  %386 = fmul float %385, %70
  %387 = fadd float %386, %366
  %388 = shl i32 %372, 4
  %389 = add i32 %388, 252
  %390 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %389)
  %391 = fmul float %390, %70
  %392 = fadd float %391, %371
  %393 = fmul float %104, %185
  %394 = fmul float %106, %190
  %395 = fadd float %393, %394
  %396 = fmul float %108, %195
  %397 = fadd float %395, %396
  %398 = fmul float 1.000000e+00, %200
  %399 = fadd float %397, %398
  %400 = fmul float %104, %281
  %401 = fmul float %106, %286
  %402 = fadd float %400, %401
  %403 = fmul float %108, %291
  %404 = fadd float %402, %403
  %405 = fmul float 1.000000e+00, %296
  %406 = fadd float %404, %405
  %407 = fmul float %104, %377
  %408 = fmul float %106, %382
  %409 = fadd float %407, %408
  %410 = fmul float %108, %387
  %411 = fadd float %409, %410
  %412 = fmul float 1.000000e+00, %392
  %413 = fadd float %411, %412
  %414 = fmul float %81, %185
  %415 = fmul float %82, %190
  %416 = fadd float %414, %415
  %417 = fmul float %83, %195
  %418 = fadd float %416, %417
  %419 = fmul float 1.000000e+00, %200
  %420 = fadd float %418, %419
  %421 = fmul float %81, %281
  %422 = fmul float %82, %286
  %423 = fadd float %421, %422
  %424 = fmul float %83, %291
  %425 = fadd float %423, %424
  %426 = fmul float 1.000000e+00, %296
  %427 = fadd float %425, %426
  %428 = fmul float %81, %377
  %429 = fmul float %82, %382
  %430 = fadd float %428, %429
  %431 = fmul float %83, %387
  %432 = fadd float %430, %431
  %433 = fmul float 1.000000e+00, %392
  %434 = fadd float %432, %433
  %435 = fmul float %92, %185
  %436 = fmul float %93, %190
  %437 = fadd float %435, %436
  %438 = fmul float %94, %195
  %439 = fadd float %437, %438
  %440 = fmul float 1.000000e+00, %200
  %441 = fadd float %439, %440
  %442 = fmul float %92, %281
  %443 = fmul float %93, %286
  %444 = fadd float %442, %443
  %445 = fmul float %94, %291
  %446 = fadd float %444, %445
  %447 = fmul float 1.000000e+00, %296
  %448 = fadd float %446, %447
  %449 = fmul float %92, %377
  %450 = fmul float %93, %382
  %451 = fadd float %449, %450
  %452 = fmul float %94, %387
  %453 = fadd float %451, %452
  %454 = fmul float 1.000000e+00, %392
  %455 = fadd float %453, %454
  %456 = fmul float %23, %399
  %457 = fmul float %24, %399
  %458 = fmul float %25, %399
  %459 = fmul float %26, %399
  %460 = fmul float %27, %406
  %461 = fadd float %460, %456
  %462 = fmul float %28, %406
  %463 = fadd float %462, %457
  %464 = fmul float %29, %406
  %465 = fadd float %464, %458
  %466 = fmul float %30, %406
  %467 = fadd float %466, %459
  %468 = fmul float %31, %413
  %469 = fadd float %468, %461
  %470 = fmul float %32, %413
  %471 = fadd float %470, %463
  %472 = fmul float %33, %413
  %473 = fadd float %472, %465
  %474 = fmul float %34, %413
  %475 = fadd float %474, %467
  %476 = fadd float %469, %35
  %477 = fadd float %471, %36
  %478 = fadd float %473, %37
  %479 = fadd float %475, %38
  %480 = fmul float %420, %46
  %481 = fmul float %427, %46
  %482 = fmul float %434, %46
  %483 = fmul float %11, %480
  %484 = fmul float %12, %480
  %485 = fmul float %13, %480
  %486 = fmul float %14, %481
  %487 = fadd float %486, %483
  %488 = fmul float %15, %481
  %489 = fadd float %488, %484
  %490 = fmul float %16, %481
  %491 = fadd float %490, %485
  %492 = fmul float %17, %482
  %493 = fadd float %492, %487
  %494 = fmul float %18, %482
  %495 = fadd float %494, %489
  %496 = fmul float %19, %482
  %497 = fadd float %496, %491
  %498 = fmul float %493, %493
  %499 = fmul float %495, %495
  %500 = fadd float %499, %498
  %501 = fmul float %497, %497
  %502 = fadd float %500, %501
  %503 = call float @llvm.AMDGPU.rsq(float %502)
  %504 = fmul float %493, %503
  %505 = fmul float %495, %503
  %506 = fmul float %497, %503
  %507 = fmul float %441, %46
  %508 = fmul float %448, %46
  %509 = fmul float %455, %46
  %510 = fmul float %11, %507
  %511 = fmul float %12, %507
  %512 = fmul float %13, %507
  %513 = fmul float %14, %508
  %514 = fadd float %513, %510
  %515 = fmul float %15, %508
  %516 = fadd float %515, %511
  %517 = fmul float %16, %508
  %518 = fadd float %517, %512
  %519 = fmul float %17, %509
  %520 = fadd float %519, %514
  %521 = fmul float %18, %509
  %522 = fadd float %521, %516
  %523 = fmul float %19, %509
  %524 = fadd float %523, %518
  %525 = fmul float %11, %399
  %526 = fmul float %12, %399
  %527 = fmul float %13, %399
  %528 = fmul float %14, %406
  %529 = fadd float %528, %525
  %530 = fmul float %15, %406
  %531 = fadd float %530, %526
  %532 = fmul float %16, %406
  %533 = fadd float %532, %527
  %534 = fmul float %17, %413
  %535 = fadd float %534, %529
  %536 = fmul float %18, %413
  %537 = fadd float %536, %531
  %538 = fmul float %19, %413
  %539 = fadd float %538, %533
  %540 = fadd float %535, %20
  %541 = fadd float %537, %21
  %542 = fadd float %539, %22
  %543 = fsub float -0.000000e+00, %55
  %544 = fadd float %540, %543
  %545 = fsub float -0.000000e+00, %56
  %546 = fadd float %541, %545
  %547 = fsub float -0.000000e+00, %57
  %548 = fadd float %542, %547
  %549 = fmul float %478, %53
  %550 = fadd float %549, %54
  %551 = fmul float %87, %42
  %552 = fadd float %551, %44
  %553 = fmul float %88, %43
  %554 = fadd float %553, %45
  %555 = fmul float %524, %505
  %556 = fmul float %520, %506
  %557 = fmul float %522, %504
  %558 = fsub float -0.000000e+00, %555
  %559 = fmul float %522, %506
  %560 = fadd float %559, %558
  %561 = fsub float -0.000000e+00, %556
  %562 = fmul float %524, %504
  %563 = fadd float %562, %561
  %564 = fsub float -0.000000e+00, %557
  %565 = fmul float %520, %505
  %566 = fadd float %565, %564
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %544, float %546, float %548, float %550)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %552, float %554, float %504, float %505)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %506, float %560, float %563, float %566)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %520, float %522, float %524, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %476, float %477, float %478, float %479)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020c00
bf8c0770
7e04110c
d2d60010
02010702
34062084
4a0406ff
000000f0
e0301000
80000202
c0840704
bf8c0070
e00c2000
80020800
bf8c0770
10041102
7e08110d
d2d60011
02010704
340c2284
4a080cff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1304
7e08110e
d2d60012
02010704
340e2484
4a080eff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1504
7e08110f
d2d60013
02010704
34182684
4a0818ff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1704
4a0806ff
000000f4
e0301000
80000404
bf8c0770
10081104
4a0a0cff
000000f4
e0301000
80000505
bf8c0770
d2820004
04121305
4a0a0eff
000000f4
e0301000
80000505
bf8c0770
d2820004
04121505
4a0a18ff
000000f4
e0301000
80000505
bf8c0770
d2820004
04121705
c0840700
bf8c007f
e00c2000
80021600
c2020131
c202812d
bf8c0070
7e0a0205
d2820014
04140917
100a0914
c2020130
c202812c
bf8c007f
7e1a0205
d2820015
04340916
d282000d
04160515
4a0a06ff
000000f8
e0301000
80000505
bf8c0770
100a1105
4a1c0cff
000000f8
e0301000
80000e0e
bf8c0770
d2820005
0416130e
4a1c0eff
000000f8
e0301000
80000e0e
bf8c0770
d2820005
0416150e
4a1c18ff
000000f8
e0301000
80000e0e
bf8c0770
d2820005
0416170e
c2020132
c202812e
bf8c007f
7e1c0205
d2820016
04380918
d282000d
04360b16
4a0606ff
000000fc
e0301000
80000303
bf8c0770
10061103
4a0c0cff
000000fc
e0301000
80000606
bf8c0770
d2820003
040e1306
4a0c0eff
000000fc
e0301000
80000606
bf8c0770
d2820003
040e1506
4a0c18ff
000000fc
e0301000
80000606
bf8c0770
d2820006
040e1706
06060d0d
c2020112
bf8c007f
102e0604
4a0e2081
340e0e84
4a180eff
000000f0
e0301000
80000c0c
bf8c0770
1018110c
4a1a2281
341e1a84
4a1a1eff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432130d
4a1a2481
34301a84
4a1a30ff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432150d
4a1a2681
34321a84
4a1a32ff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432170d
4a1a0eff
000000f4
e0301000
80000d0d
bf8c0770
101a110d
4a1c1eff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436130e
4a1c30ff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436150e
4a1c32ff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436170e
101c1b14
d282001a
043a1915
4a1c0eff
000000f8
e0301000
80000e0e
bf8c0770
101c110e
4a361eff
000000f8
e0301000
80001b1b
bf8c0770
d282000e
043a131b
4a3630ff
000000f8
e0301000
80001b1b
bf8c0770
d282000e
043a151b
4a3632ff
000000f8
e0301000
80001b1b
bf8c0770
d282000e
043a171b
d282001a
046a1d16
4a0e0eff
000000fc
e0301000
80000707
bf8c0770
100e1107
4a1e1eff
000000fc
e0301000
80000f0f
bf8c0770
d2820007
041e130f
4a1e30ff
000000fc
e0301000
80000f0f
bf8c0770
d2820007
041e150f
4a1e32ff
000000fc
e0301000
80000f0f
bf8c0770
d282000f
041e170f
060e1f1a
c2020116
bf8c007f
d2820017
045e0e04
4a202082
34302084
4a2030ff
000000f0
e0301000
80001010
bf8c0770
10201110
4a222282
34322284
4a2232ff
000000f0
e0301000
80001111
bf8c0770
d2820010
04421311
4a222482
34342284
4a2234ff
000000f0
e0301000
80001111
bf8c0770
d2820010
04421511
4a222682
34262284
4a2226ff
000000f0
e0301000
80001111
bf8c0770
d2820010
04421711
4a2230ff
000000f4
e0301000
80001111
bf8c0770
10221111
4a2432ff
000000f4
e0301000
80001212
bf8c0770
d2820011
04461312
4a2434ff
000000f4
e0301000
80001212
bf8c0770
d2820011
04461512
4a2426ff
000000f4
e0301000
80001212
bf8c0770
d2820011
04461712
10242314
d2820014
044a2115
4a2430ff
000000f8
e0301000
80001212
bf8c0770
10241112
4a2a32ff
000000f8
e0301000
80001515
bf8c0770
d2820012
044a1315
4a2a34ff
000000f8
e0301000
80001515
bf8c0770
d2820012
044a1515
4a2a26ff
000000f8
e0301000
80001515
bf8c0770
d2820012
044a1715
d2820014
04522516
4a2a30ff
000000fc
e0301000
80001515
bf8c0770
102a1115
4a2c32ff
000000fc
e0301000
80001616
bf8c0770
d2820015
04561316
4a2c34ff
000000fc
e0301000
80001616
bf8c0770
d2820015
04561516
4a2626ff
000000fc
e0301000
80001313
bf8c0770
d282000a
04561713
06101514
c202011a
bf8c007f
d2820009
045e1004
c202011e
bf8c007f
06121204
c2020134
c2028135
bf8c007f
7e160205
d282000b
042c0909
c2020102
bf8c007f
10260604
c2028106
bf8c007f
d2820013
044e0e05
c204010a
bf8c007f
d2820013
044e1008
c204810e
bf8c007f
06262609
c204813a
bf8c007f
0a262609
c2070101
bf8c007f
1028060e
c2050105
bf8c007f
d2820014
04520e0a
c2048109
bf8c007f
d2820014
04521009
c205810d
bf8c007f
0628280b
c2058139
bf8c007f
0a28280b
c2058100
bf8c007f
102a060b
c2060104
bf8c007f
d2820015
04560e0c
c2068108
bf8c007f
d2820015
0456100d
c207810c
bf8c007f
062a2a0f
c2078138
bf8c007f
0a2a2a0f
f800021f
0b131415
c088070c
bf8c000f
e00c2000
80041300
bf8c0770
10160914
d282000b
042e0513
d282000b
042e0b15
06160d0b
c207812b
bf8c007f
1016160f
1030160e
102e1b14
d2820017
045e1913
d2820017
045e1d15
062e1f17
102e2e0f
d2820018
04622e0a
10322314
d2820019
04662113
d2820013
04662515
06261513
1028260f
d2820013
04622809
102a160b
d2820015
04562e0c
d2820016
0456280d
102a2d16
d2820015
04562713
10161604
d282000b
042e2e05
d2820014
042e2808
d282000b
04562914
7e2a5b0b
10162b13
10262b16
c0880710
bf8c007f
e00c2000
80041600
c2080125
c2088127
bf8c0070
7e340211
d282001a
04682117
c2080124
c2088126
bf8c007f
7e360211
d2820016
046c2116
f800022f
0b131a16
c0880714
bf8c000f
e00c2000
80041600
bf8c0770
10000917
d2820000
04020516
d2820000
04020b18
06000d00
1000000f
1004000e
10081b17
d2820004
04121916
d2820004
04121d18
06081f04
1008080f
d2820002
040a080a
100a2317
d2820005
04162116
d2820005
04162518
060a1505
100c0a0f
d2820002
040a0c09
10142702
100a000b
d2820005
0416080c
d2820005
04160c0d
10181705
0814150c
10182b14
101a1905
10000004
d2820000
04020805
d2820000
04020c08
10082700
08081b04
100c1700
10161902
080c0d0b
f800023f
0a04060c
f800024f
01000205
c2020113
bf8c000f
10000604
c2020117
bf8c007f
d2820000
04020e04
c202011b
bf8c007f
d2820000
04021004
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10020604
c2020115
bf8c007f
d2820001
04060e04
c2020119
bf8c007f
d2820001
04061004
c202011d
bf8c007f
06020204
c2020110
bf8c007f
10040604
c2020114
bf8c007f
d2820002
040a0e04
c2020118
bf8c007f
d2820002
040a1004
c200011c
bf8c007f
06040400
f80008cf
00090102
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[11..12]
DCL CONST[3..10]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV TEMP[1].xy, IN[4].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[1].xyz, CONST[5].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: MOV TEMP[3].xy, IN[2].xyyy
  8: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D
  9: LRP TEMP[3].xyz, CONST[6].wwww, TEMP[3].xyzz, IMM[0].wwww
 10: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz
 11: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 12: DP4 TEMP[1].x, TEMP[2], CONST[9]
 13: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[7].yyyy
 14: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 15: MUL TEMP[1], TEMP[2], TEMP[1]
 16: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[3].xyyy
 17: MOV TEMP[3].xy, TEMP[3].xyyy
 18: TEX TEMP[3], TEMP[3], SAMP[2], 2D
 19: DP4 TEMP[2].x, TEMP[2], CONST[8]
 20: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[7].xxxx
 21: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 22: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 23: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 24: RCP TEMP[4].x, TEMP[4].xxxx
 25: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 26: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 27: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 28: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 29: MAX TEMP[2].x, IN[3].wwww, CONST[4].wwww
 30: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 31: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[4].xyzz
 32: MAD TEMP[2].x, TEMP[1].wwww, CONST[5].yyyy, CONST[5].zzzz
 33: SLT TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz
 34: F2I TEMP[2].x, -TEMP[2]
 35: UIF TEMP[2].xxxx :2
 36:   KILL
 37: ENDIF
 38: MOV OUT[0], TEMP[1]
 39: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %44 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %45 = load <32 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %58 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %59 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %63 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %64 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %65 = fmul float %13, %42
  %66 = fadd float %65, %43
  %67 = bitcast float %63 to i32
  %68 = bitcast float %64 to i32
  %69 = insertelement <2 x i32> undef, i32 %67, i32 0
  %70 = insertelement <2 x i32> %69, i32 %68, i32 1
  %71 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %70, <32 x i8> %45, <16 x i8> %47, i32 2)
  %72 = extractelement <4 x float> %71, i32 0
  %73 = extractelement <4 x float> %71, i32 1
  %74 = extractelement <4 x float> %71, i32 2
  %75 = extractelement <4 x float> %71, i32 3
  %76 = fmul float %72, 0x3FCB367A00000000
  %77 = fmul float %73, 0x3FE6E2EB20000000
  %78 = fadd float %77, %76
  %79 = fmul float %74, 0x3FB27BB300000000
  %80 = fadd float %78, %79
  %81 = call float @llvm.AMDGPU.lrp(float %28, float %80, float %72)
  %82 = call float @llvm.AMDGPU.lrp(float %28, float %80, float %73)
  %83 = call float @llvm.AMDGPU.lrp(float %28, float %80, float %74)
  %84 = bitcast float %60 to i32
  %85 = bitcast float %61 to i32
  %86 = insertelement <2 x i32> undef, i32 %84, i32 0
  %87 = insertelement <2 x i32> %86, i32 %85, i32 1
  %88 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %49, <16 x i8> %51, i32 2)
  %89 = extractelement <4 x float> %88, i32 0
  %90 = extractelement <4 x float> %88, i32 1
  %91 = extractelement <4 x float> %88, i32 2
  %92 = call float @llvm.AMDGPU.lrp(float %31, float %89, float 1.000000e+00)
  %93 = call float @llvm.AMDGPU.lrp(float %31, float %90, float 1.000000e+00)
  %94 = call float @llvm.AMDGPU.lrp(float %31, float %91, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float %92, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float %93, float 0.000000e+00, float 1.000000e+00)
  %97 = call float @llvm.AMDIL.clamp.(float %94, float 0.000000e+00, float 1.000000e+00)
  %98 = fmul float %81, %95
  %99 = fmul float %82, %96
  %100 = fmul float %83, %97
  %101 = fmul float %98, %38
  %102 = fmul float %99, %39
  %103 = fadd float %101, %102
  %104 = fmul float %100, %40
  %105 = fadd float %103, %104
  %106 = fmul float %75, %41
  %107 = fadd float %105, %106
  %108 = fadd float %107, %33
  %109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00)
  %110 = call float @llvm.AMDGPU.lrp(float %109, float %56, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %109, float %57, float 1.000000e+00)
  %112 = call float @llvm.AMDGPU.lrp(float %109, float %58, float 1.000000e+00)
  %113 = call float @llvm.AMDGPU.lrp(float %109, float %59, float 1.000000e+00)
  %114 = fmul float %98, %110
  %115 = fmul float %99, %111
  %116 = fmul float %100, %112
  %117 = fmul float %75, %113
  %118 = fmul float %12, %22
  %119 = fmul float %66, %23
  %120 = bitcast float %118 to i32
  %121 = bitcast float %119 to i32
  %122 = insertelement <2 x i32> undef, i32 %120, i32 0
  %123 = insertelement <2 x i32> %122, i32 %121, i32 1
  %124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %123, <32 x i8> %53, <16 x i8> %55, i32 2)
  %125 = extractelement <4 x float> %124, i32 0
  %126 = extractelement <4 x float> %124, i32 1
  %127 = extractelement <4 x float> %124, i32 2
  %128 = extractelement <4 x float> %124, i32 3
  %129 = fmul float %98, %34
  %130 = fmul float %99, %35
  %131 = fadd float %129, %130
  %132 = fmul float %100, %36
  %133 = fadd float %131, %132
  %134 = fmul float %75, %37
  %135 = fadd float %133, %134
  %136 = fadd float %135, %32
  %137 = call float @llvm.AMDIL.clamp.(float %136, float 0.000000e+00, float 1.000000e+00)
  %138 = fmul float %137, %128
  %139 = fmul float %125, 0x3FCB367A00000000
  %140 = fmul float %126, 0x3FE6E2EB20000000
  %141 = fadd float %140, %139
  %142 = fmul float %127, 0x3FB27BB300000000
  %143 = fadd float %141, %142
  %144 = fcmp uge float %143, 0x3F50624DE0000000
  %145 = select i1 %144, float %143, float 0x3F50624DE0000000
  %146 = fdiv float 1.000000e+00, %145
  %147 = fmul float %125, %146
  %148 = fmul float %126, %146
  %149 = fmul float %127, %146
  %150 = fmul float %114, %125
  %151 = fmul float %115, %126
  %152 = fmul float %116, %127
  %153 = fmul float %138, %147
  %154 = fadd float %153, %150
  %155 = fmul float %138, %148
  %156 = fadd float %155, %151
  %157 = fmul float %138, %149
  %158 = fadd float %157, %152
  %159 = fmul float %154, 4.000000e+00
  %160 = fmul float %156, 4.000000e+00
  %161 = fmul float %158, 4.000000e+00
  %162 = fcmp uge float %62, %27
  %163 = select i1 %162, float %62, float %27
  %164 = call float @llvm.AMDIL.clamp.(float %163, float 0.000000e+00, float 1.000000e+00)
  %165 = call float @llvm.AMDGPU.lrp(float %164, float %159, float %24)
  %166 = call float @llvm.AMDGPU.lrp(float %164, float %160, float %25)
  %167 = call float @llvm.AMDGPU.lrp(float %164, float %161, float %26)
  %168 = fmul float %117, %29
  %169 = fadd float %168, %30
  %170 = fcmp ult float %169, 0.000000e+00
  %171 = select i1 %170, float 1.000000e+00, float 0.000000e+00
  %172 = fsub float -0.000000e+00, %171
  %173 = fptosi float %172 to i32
  %174 = bitcast i32 %173 to float
  %175 = bitcast float %174 to i32
  %176 = icmp ne i32 %175, 0
  br i1 %176, label %IF, label %ENDIF

IF:                                               ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF

ENDIF:                                            ; preds = %main_body, %IF
  %177 = call i32 @llvm.SI.packf16(float %165, float %166)
  %178 = bitcast i32 %177 to float
  %179 = call i32 @llvm.SI.packf16(float %167, float %117)
  %180 = bitcast i32 %179 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %178, float %180, float %178, float %180)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140d00
c8150d01
c8100c00
c8110c01
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1402ff
3f371759
d2820008
04221505
7e1802ff
3d93dd98
d2820009
04221906
c0840100
bf8c007f
c2000914
bf8c007f
d208000b
020000f2
1010090b
d2820008
04221200
c8380500
c8390501
c8340400
c8350401
c0860304
c0c80508
bf8c007f
f0800700
00640e0d
c200891b
bf8c0070
d2080011
020002f2
d282000d
04461c01
d206080d
0201010d
10101b08
101a0b0b
d282000d
04361200
d2820012
04461e01
d2060812
02010112
101a250d
c2038925
bf8c007f
10241a07
c2038924
bf8c007f
d2820013
04480f08
10160d0b
d2820009
042e1200
d282000b
04462001
d206080b
0201010b
10241709
c2000926
bf8c007f
d2820009
044c0112
c2000927
bf8c007f
d2820009
04240107
c200091d
bf8c007f
06121200
d2060809
02010109
081612f2
c8380200
c8390201
d282000e
042e1d09
10261d12
c2000930
c2008931
bf8c007f
7e1c0201
d2820003
04380103
c200090d
bf8c007f
101e0600
c200090c
bf8c007f
101c0400
c0800308
c0c60510
bf8c007f
f0800f00
00030e0e
bf8c0770
10042113
c2000921
bf8c007f
10061a00
c2000920
bf8c007f
d2820003
040c0108
c2000922
bf8c007f
d2820003
040c0112
c2000923
bf8c007f
d2820003
040c0107
c200091c
bf8c007f
06060600
d2060803
02010103
10062303
10241cff
3e59b3d0
d282000a
044a150f
d282000a
042a1910
7e1802ff
3a83126f
d00c0000
0202190a
d200000a
0002150c
7e14550a
10181510
d2820002
040a1903
100404f6
c8300b00
c8310b01
c2000913
bf8c007f
d00c0002
0200010c
7e240200
d200000c
000a1912
d2060812
0201010c
082624f2
c2000912
bf8c007f
10182600
d2820002
04320512
c8300100
c8310101
d282000c
042e1909
1018190d
10181f0c
101a150f
d282000c
04321b03
101818f6
c2000911
bf8c007f
101a2600
d282000c
04361912
c8340000
c8350001
d282000d
042e1b09
10101b08
10101d08
1014150e
d2820003
04221503
100606f6
c2000910
bf8c007f
10102600
d2820003
04220712
c8200300
c8210301
d2820000
042e1109
10000107
c2000915
c2008916
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000102
5e021903
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[4], TEMP[3]
 16: MOV OUT[2], IN[2]
 17: MOV OUT[1], TEMP[2]
 18: MOV OUT[3], TEMP[0]
 19: MOV OUT[0], TEMP[1]
 20: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = extractelement <4 x float> %72, i32 2
  %76 = extractelement <4 x float> %72, i32 3
  %77 = fmul float %62, %51
  %78 = fadd float %77, %48
  %79 = fmul float %63, %52
  %80 = fadd float %79, %49
  %81 = fmul float %64, %53
  %82 = fadd float %81, %50
  %83 = fmul float %25, %78
  %84 = fmul float %26, %78
  %85 = fmul float %27, %78
  %86 = fmul float %28, %78
  %87 = fmul float %29, %80
  %88 = fadd float %87, %83
  %89 = fmul float %30, %80
  %90 = fadd float %89, %84
  %91 = fmul float %31, %80
  %92 = fadd float %91, %85
  %93 = fmul float %32, %80
  %94 = fadd float %93, %86
  %95 = fmul float %33, %82
  %96 = fadd float %95, %88
  %97 = fmul float %34, %82
  %98 = fadd float %97, %90
  %99 = fmul float %35, %82
  %100 = fadd float %99, %92
  %101 = fmul float %36, %82
  %102 = fadd float %101, %94
  %103 = fadd float %96, %37
  %104 = fadd float %98, %38
  %105 = fadd float %100, %39
  %106 = fadd float %102, %40
  %107 = fmul float %11, %78
  %108 = fmul float %12, %78
  %109 = fmul float %13, %78
  %110 = fmul float %14, %78
  %111 = fmul float %15, %80
  %112 = fadd float %111, %107
  %113 = fmul float %16, %80
  %114 = fadd float %113, %108
  %115 = fmul float %17, %80
  %116 = fadd float %115, %109
  %117 = fmul float %18, %80
  %118 = fadd float %117, %110
  %119 = fmul float %19, %82
  %120 = fadd float %119, %112
  %121 = fmul float %20, %82
  %122 = fadd float %121, %114
  %123 = fmul float %21, %82
  %124 = fadd float %123, %116
  %125 = fadd float %120, %22
  %126 = fadd float %122, %23
  %127 = fadd float %124, %24
  %128 = fsub float -0.000000e+00, %56
  %129 = fadd float %125, %128
  %130 = fsub float -0.000000e+00, %57
  %131 = fadd float %126, %130
  %132 = fsub float -0.000000e+00, %58
  %133 = fadd float %127, %132
  %134 = fmul float %105, %54
  %135 = fadd float %134, %55
  %136 = fmul float %68, %44
  %137 = fadd float %136, %46
  %138 = fmul float %69, %45
  %139 = fadd float %138, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %75, float %76)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %131, float %133, float %135)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %137, float %139, float %116, float %118)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %104, float %105, float %106)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800022f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800023f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MOV OUT[0], TEMP[1]
 28: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %42 = load <32 x i8> addrspace(2)* %41, !tbaa !0
  %43 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %46 = load <32 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %56 = fmul float %13, %39
  %57 = fadd float %56, %40
  %58 = bitcast float %54 to i32
  %59 = bitcast float %55 to i32
  %60 = insertelement <2 x i32> undef, i32 %58, i32 0
  %61 = insertelement <2 x i32> %60, i32 %59, i32 1
  %62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %42, <16 x i8> %44, i32 2)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = fmul float %63, 0x3FCB367A00000000
  %68 = fmul float %64, 0x3FE6E2EB20000000
  %69 = fadd float %68, %67
  %70 = fmul float %65, 0x3FB27BB300000000
  %71 = fadd float %69, %70
  %72 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %63)
  %73 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %64)
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %71, float %65)
  %75 = fmul float %72, %35
  %76 = fmul float %73, %36
  %77 = fadd float %75, %76
  %78 = fmul float %74, %37
  %79 = fadd float %77, %78
  %80 = fmul float %66, %38
  %81 = fadd float %79, %80
  %82 = fadd float %81, %30
  %83 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDGPU.lrp(float %83, float %49, float 1.000000e+00)
  %85 = call float @llvm.AMDGPU.lrp(float %83, float %50, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %83, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %83, float %52, float 1.000000e+00)
  %88 = fmul float %72, %84
  %89 = fmul float %73, %85
  %90 = fmul float %74, %86
  %91 = fmul float %66, %87
  %92 = fmul float %12, %22
  %93 = fmul float %57, %23
  %94 = bitcast float %92 to i32
  %95 = bitcast float %93 to i32
  %96 = insertelement <2 x i32> undef, i32 %94, i32 0
  %97 = insertelement <2 x i32> %96, i32 %95, i32 1
  %98 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %46, <16 x i8> %48, i32 2)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = extractelement <4 x float> %98, i32 3
  %103 = fmul float %72, %31
  %104 = fmul float %73, %32
  %105 = fadd float %103, %104
  %106 = fmul float %74, %33
  %107 = fadd float %105, %106
  %108 = fmul float %66, %34
  %109 = fadd float %107, %108
  %110 = fadd float %109, %29
  %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00)
  %112 = fmul float %111, %102
  %113 = fmul float %99, 0x3FCB367A00000000
  %114 = fmul float %100, 0x3FE6E2EB20000000
  %115 = fadd float %114, %113
  %116 = fmul float %101, 0x3FB27BB300000000
  %117 = fadd float %115, %116
  %118 = fcmp uge float %117, 0x3F50624DE0000000
  %119 = select i1 %118, float %117, float 0x3F50624DE0000000
  %120 = fdiv float 1.000000e+00, %119
  %121 = fmul float %99, %120
  %122 = fmul float %100, %120
  %123 = fmul float %101, %120
  %124 = fmul float %88, %99
  %125 = fmul float %89, %100
  %126 = fmul float %90, %101
  %127 = fmul float %112, %121
  %128 = fadd float %127, %124
  %129 = fmul float %112, %122
  %130 = fadd float %129, %125
  %131 = fmul float %112, %123
  %132 = fadd float %131, %126
  %133 = fmul float %128, 4.000000e+00
  %134 = fmul float %130, 4.000000e+00
  %135 = fmul float %132, 4.000000e+00
  %136 = fcmp uge float %53, %27
  %137 = select i1 %136, float %53, float %27
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.lrp(float %138, float %133, float %24)
  %140 = call float @llvm.AMDGPU.lrp(float %138, float %134, float %25)
  %141 = call float @llvm.AMDGPU.lrp(float %138, float %135, float %26)
  %142 = call i32 @llvm.SI.packf16(float %139, float %140)
  %143 = bitcast i32 %142 to float
  %144 = call i32 @llvm.SI.packf16(float %141, float %91)
  %145 = bitcast i32 %144 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %143, float %145, float %143, float %145)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430504
bf8c0770
10080aff
3e59b3d0
7e2002ff
3f371759
d2820004
04122106
7e2202ff
3d93dd98
d282000a
04122307
c0840100
bf8c007f
c2000910
bf8c007f
d208000b
020000f2
10080b0b
d2820004
04121400
10120d0b
d2820009
04261400
c2008921
bf8c007f
10181201
c2008920
bf8c007f
d282000c
04300304
10160f0b
d2820012
042e1400
c2000922
bf8c007f
d282000a
04300112
c2000923
bf8c007f
d282000a
04280108
c2000919
bf8c007f
06141400
d206080a
0201010a
081614f2
c8300200
c8310201
d282000c
042e190a
10261912
c200092c
c200892d
bf8c007f
7e180201
d2820003
04300103
c2000909
bf8c007f
101a0600
c2000908
bf8c007f
10180400
c0800304
c0c60508
bf8c007f
f0800f00
00030c0c
bf8c0770
10261d13
c200091d
bf8c007f
10041200
c200091c
bf8c007f
d2820002
04080104
c200091e
bf8c007f
d2820002
04080112
c200091f
bf8c007f
d2820002
04080108
c2000918
bf8c007f
06040400
d2060802
02010102
10041f02
100618ff
3e59b3d0
d2820003
040e210d
d2820003
040e230e
7e2002ff
3a83126f
d00c0000
02022103
d2000003
00020710
7e065503
1020070e
d2820010
044e2102
102420f6
c8400700
c8410701
c200090f
bf8c007f
d00c0002
02000110
7e220200
d2000010
000a2111
d2060810
02010110
082220f2
c200090e
bf8c007f
10262200
d2820012
044e2510
c84c0300
c84d0301
d2820013
042e270a
100a2708
5e0a0b12
c8180100
c8190101
d2820006
042e0d0a
100c0d09
100c1b06
100e070d
d2820006
041a0f02
100c0cf6
c200090d
bf8c007f
100e2200
d2820006
041e0d10
c81c0000
c81d0001
d2820000
042e0f0a
10000104
10001900
1002070c
d2820000
04020302
100000f6
c200090c
bf8c007f
10022200
d2820000
04060110
5e000d00
f8001c0f
05000500
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[3], TEMP[3]
 16: MOV OUT[1], TEMP[2]
 17: MOV OUT[2], TEMP[0]
 18: MOV OUT[0], TEMP[1]
 19: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = fmul float %62, %51
  %71 = fadd float %70, %48
  %72 = fmul float %63, %52
  %73 = fadd float %72, %49
  %74 = fmul float %64, %53
  %75 = fadd float %74, %50
  %76 = fmul float %25, %71
  %77 = fmul float %26, %71
  %78 = fmul float %27, %71
  %79 = fmul float %28, %71
  %80 = fmul float %29, %73
  %81 = fadd float %80, %76
  %82 = fmul float %30, %73
  %83 = fadd float %82, %77
  %84 = fmul float %31, %73
  %85 = fadd float %84, %78
  %86 = fmul float %32, %73
  %87 = fadd float %86, %79
  %88 = fmul float %33, %75
  %89 = fadd float %88, %81
  %90 = fmul float %34, %75
  %91 = fadd float %90, %83
  %92 = fmul float %35, %75
  %93 = fadd float %92, %85
  %94 = fmul float %36, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %37
  %97 = fadd float %91, %38
  %98 = fadd float %93, %39
  %99 = fadd float %95, %40
  %100 = fmul float %11, %71
  %101 = fmul float %12, %71
  %102 = fmul float %13, %71
  %103 = fmul float %14, %71
  %104 = fmul float %15, %73
  %105 = fadd float %104, %100
  %106 = fmul float %16, %73
  %107 = fadd float %106, %101
  %108 = fmul float %17, %73
  %109 = fadd float %108, %102
  %110 = fmul float %18, %73
  %111 = fadd float %110, %103
  %112 = fmul float %19, %75
  %113 = fadd float %112, %105
  %114 = fmul float %20, %75
  %115 = fadd float %114, %107
  %116 = fmul float %21, %75
  %117 = fadd float %116, %109
  %118 = fadd float %113, %22
  %119 = fadd float %115, %23
  %120 = fadd float %117, %24
  %121 = fsub float -0.000000e+00, %56
  %122 = fadd float %118, %121
  %123 = fsub float -0.000000e+00, %57
  %124 = fadd float %119, %123
  %125 = fsub float -0.000000e+00, %58
  %126 = fadd float %120, %125
  %127 = fmul float %98, %54
  %128 = fadd float %127, %55
  %129 = fmul float %68, %44
  %130 = fadd float %129, %46
  %131 = fmul float %69, %45
  %132 = fadd float %131, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %130, float %132, float %109, float %111)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[11..12]
DCL CONST[3..10]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV TEMP[1].xy, IN[4].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[1].xyz, CONST[5].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: MOV TEMP[3].xy, IN[2].xyyy
  8: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D
  9: LRP TEMP[3].xyz, CONST[6].wwww, TEMP[3].xyzz, IMM[0].wwww
 10: MOV_SAT TEMP[3].xyz, TEMP[3].xyzz
 11: MUL TEMP[2].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 12: DP4 TEMP[1].x, TEMP[2], CONST[9]
 13: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[7].yyyy
 14: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 15: MUL TEMP[1], TEMP[2], TEMP[1]
 16: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[3].xyyy
 17: MOV TEMP[3].xy, TEMP[3].xyyy
 18: TEX TEMP[3], TEMP[3], SAMP[2], 2D
 19: DP4 TEMP[2].x, TEMP[2], CONST[8]
 20: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[7].xxxx
 21: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 22: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 23: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 24: RCP TEMP[4].x, TEMP[4].xxxx
 25: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 26: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 27: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 28: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 29: MAX TEMP[2].x, IN[3].wwww, CONST[4].wwww
 30: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 31: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[4].xyzz
 32: MOV OUT[0], TEMP[1]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %42 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %43 = load <32 x i8> addrspace(2)* %42, !tbaa !0
  %44 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0
  %46 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %47 = load <32 x i8> addrspace(2)* %46, !tbaa !0
  %48 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %51 = load <32 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0
  %54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %60 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %61 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %62 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %63 = fmul float %13, %40
  %64 = fadd float %63, %41
  %65 = bitcast float %61 to i32
  %66 = bitcast float %62 to i32
  %67 = insertelement <2 x i32> undef, i32 %65, i32 0
  %68 = insertelement <2 x i32> %67, i32 %66, i32 1
  %69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %43, <16 x i8> %45, i32 2)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = extractelement <4 x float> %69, i32 3
  %74 = fmul float %70, 0x3FCB367A00000000
  %75 = fmul float %71, 0x3FE6E2EB20000000
  %76 = fadd float %75, %74
  %77 = fmul float %72, 0x3FB27BB300000000
  %78 = fadd float %76, %77
  %79 = call float @llvm.AMDGPU.lrp(float %28, float %78, float %70)
  %80 = call float @llvm.AMDGPU.lrp(float %28, float %78, float %71)
  %81 = call float @llvm.AMDGPU.lrp(float %28, float %78, float %72)
  %82 = bitcast float %58 to i32
  %83 = bitcast float %59 to i32
  %84 = insertelement <2 x i32> undef, i32 %82, i32 0
  %85 = insertelement <2 x i32> %84, i32 %83, i32 1
  %86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %47, <16 x i8> %49, i32 2)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = extractelement <4 x float> %86, i32 2
  %90 = call float @llvm.AMDGPU.lrp(float %29, float %87, float 1.000000e+00)
  %91 = call float @llvm.AMDGPU.lrp(float %29, float %88, float 1.000000e+00)
  %92 = call float @llvm.AMDGPU.lrp(float %29, float %89, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float %90, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float %91, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float %92, float 0.000000e+00, float 1.000000e+00)
  %96 = fmul float %79, %93
  %97 = fmul float %80, %94
  %98 = fmul float %81, %95
  %99 = fmul float %96, %36
  %100 = fmul float %97, %37
  %101 = fadd float %99, %100
  %102 = fmul float %98, %38
  %103 = fadd float %101, %102
  %104 = fmul float %73, %39
  %105 = fadd float %103, %104
  %106 = fadd float %105, %31
  %107 = call float @llvm.AMDIL.clamp.(float %106, float 0.000000e+00, float 1.000000e+00)
  %108 = call float @llvm.AMDGPU.lrp(float %107, float %54, float 1.000000e+00)
  %109 = call float @llvm.AMDGPU.lrp(float %107, float %55, float 1.000000e+00)
  %110 = call float @llvm.AMDGPU.lrp(float %107, float %56, float 1.000000e+00)
  %111 = call float @llvm.AMDGPU.lrp(float %107, float %57, float 1.000000e+00)
  %112 = fmul float %96, %108
  %113 = fmul float %97, %109
  %114 = fmul float %98, %110
  %115 = fmul float %73, %111
  %116 = fmul float %12, %22
  %117 = fmul float %64, %23
  %118 = bitcast float %116 to i32
  %119 = bitcast float %117 to i32
  %120 = insertelement <2 x i32> undef, i32 %118, i32 0
  %121 = insertelement <2 x i32> %120, i32 %119, i32 1
  %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %51, <16 x i8> %53, i32 2)
  %123 = extractelement <4 x float> %122, i32 0
  %124 = extractelement <4 x float> %122, i32 1
  %125 = extractelement <4 x float> %122, i32 2
  %126 = extractelement <4 x float> %122, i32 3
  %127 = fmul float %96, %32
  %128 = fmul float %97, %33
  %129 = fadd float %127, %128
  %130 = fmul float %98, %34
  %131 = fadd float %129, %130
  %132 = fmul float %73, %35
  %133 = fadd float %131, %132
  %134 = fadd float %133, %30
  %135 = call float @llvm.AMDIL.clamp.(float %134, float 0.000000e+00, float 1.000000e+00)
  %136 = fmul float %135, %126
  %137 = fmul float %123, 0x3FCB367A00000000
  %138 = fmul float %124, 0x3FE6E2EB20000000
  %139 = fadd float %138, %137
  %140 = fmul float %125, 0x3FB27BB300000000
  %141 = fadd float %139, %140
  %142 = fcmp uge float %141, 0x3F50624DE0000000
  %143 = select i1 %142, float %141, float 0x3F50624DE0000000
  %144 = fdiv float 1.000000e+00, %143
  %145 = fmul float %123, %144
  %146 = fmul float %124, %144
  %147 = fmul float %125, %144
  %148 = fmul float %112, %123
  %149 = fmul float %113, %124
  %150 = fmul float %114, %125
  %151 = fmul float %136, %145
  %152 = fadd float %151, %148
  %153 = fmul float %136, %146
  %154 = fadd float %153, %149
  %155 = fmul float %136, %147
  %156 = fadd float %155, %150
  %157 = fmul float %152, 4.000000e+00
  %158 = fmul float %154, 4.000000e+00
  %159 = fmul float %156, 4.000000e+00
  %160 = fcmp uge float %60, %27
  %161 = select i1 %160, float %60, float %27
  %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
  %163 = call float @llvm.AMDGPU.lrp(float %162, float %157, float %24)
  %164 = call float @llvm.AMDGPU.lrp(float %162, float %158, float %25)
  %165 = call float @llvm.AMDGPU.lrp(float %162, float %159, float %26)
  %166 = call i32 @llvm.SI.packf16(float %163, float %164)
  %167 = bitcast i32 %166 to float
  %168 = call i32 @llvm.SI.packf16(float %165, float %115)
  %169 = bitcast i32 %168 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %167, float %169, float %167, float %169)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140d00
c8150d01
c8100c00
c8110c01
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1202ff
3f371759
d2820008
04221305
7e1602ff
3d93dd98
d282000c
04221706
c0840100
bf8c007f
c2000914
bf8c007f
d208000d
020000f2
1010090d
d2820008
04221800
c83c0500
c83d0501
c8380400
c8390401
c0860304
c0c80508
bf8c007f
f0800700
00640e0e
c200891b
bf8c0070
d2080011
020002f2
d282000a
04461c01
d206080a
0201010a
10101508
10140b0d
d282000a
042a1800
d2820012
04461e01
d2060812
02010112
1014250a
c2038925
bf8c007f
10241407
c2038924
bf8c007f
d2820013
04480f08
101a0d0d
d282000c
04361800
d282000d
04462001
d206080d
0201010d
10241b0c
c2000926
bf8c007f
d282000c
044c0112
c2000927
bf8c007f
d282000c
04300107
c200091d
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8380200
c8390201
d282000e
04361d0c
10261d12
c2000930
c2008931
bf8c007f
7e1c0201
d2820003
04380103
c200090d
bf8c007f
101e0600
c200090c
bf8c007f
101c0400
c0800308
c0c60510
bf8c007f
f0800f00
00030e0e
bf8c0770
10262113
c2000921
bf8c007f
10041400
c2000920
bf8c007f
d2820002
04080108
c2000922
bf8c007f
d2820002
04080112
c2000923
bf8c007f
d2820002
04080107
c200091c
bf8c007f
06040400
d2060802
02010102
10042302
10061cff
3e59b3d0
d2820003
040e130f
d2820003
040e1710
7e1202ff
3a83126f
d00c0000
02021303
d2000003
00020709
7e065503
10120710
d2820009
044e1302
102412f6
c8240b00
c8250b01
c2000913
bf8c007f
d00c0002
02000109
7e160200
d2000009
000a130b
d2060809
02010109
081612f2
c2000912
bf8c007f
10261600
d2820012
044e2509
c84c0300
c84d0301
d2820013
0436270c
10082707
5e080912
c8140100
c8150101
d2820005
04360b0c
100a0b0a
100a1f05
100c070f
d2820005
04160d02
100a0af6
c2000911
bf8c007f
100c1600
d2820005
041a0b09
c8180000
c8190001
d2820000
04360d0c
10000108
10001d00
1002070e
d2820000
04020302
100000f6
c2000910
bf8c007f
10021600
d2820000
04060109
5e000b00
f8001c0f
04000400
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[4], TEMP[3]
 16: MOV OUT[2], IN[2]
 17: MOV OUT[1], TEMP[2]
 18: MOV OUT[3], TEMP[0]
 19: MOV OUT[0], TEMP[1]
 20: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0
  %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %5)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = extractelement <4 x float> %72, i32 1
  %75 = extractelement <4 x float> %72, i32 2
  %76 = extractelement <4 x float> %72, i32 3
  %77 = fmul float %62, %51
  %78 = fadd float %77, %48
  %79 = fmul float %63, %52
  %80 = fadd float %79, %49
  %81 = fmul float %64, %53
  %82 = fadd float %81, %50
  %83 = fmul float %25, %78
  %84 = fmul float %26, %78
  %85 = fmul float %27, %78
  %86 = fmul float %28, %78
  %87 = fmul float %29, %80
  %88 = fadd float %87, %83
  %89 = fmul float %30, %80
  %90 = fadd float %89, %84
  %91 = fmul float %31, %80
  %92 = fadd float %91, %85
  %93 = fmul float %32, %80
  %94 = fadd float %93, %86
  %95 = fmul float %33, %82
  %96 = fadd float %95, %88
  %97 = fmul float %34, %82
  %98 = fadd float %97, %90
  %99 = fmul float %35, %82
  %100 = fadd float %99, %92
  %101 = fmul float %36, %82
  %102 = fadd float %101, %94
  %103 = fadd float %96, %37
  %104 = fadd float %98, %38
  %105 = fadd float %100, %39
  %106 = fadd float %102, %40
  %107 = fmul float %11, %78
  %108 = fmul float %12, %78
  %109 = fmul float %13, %78
  %110 = fmul float %14, %78
  %111 = fmul float %15, %80
  %112 = fadd float %111, %107
  %113 = fmul float %16, %80
  %114 = fadd float %113, %108
  %115 = fmul float %17, %80
  %116 = fadd float %115, %109
  %117 = fmul float %18, %80
  %118 = fadd float %117, %110
  %119 = fmul float %19, %82
  %120 = fadd float %119, %112
  %121 = fmul float %20, %82
  %122 = fadd float %121, %114
  %123 = fmul float %21, %82
  %124 = fadd float %123, %116
  %125 = fadd float %120, %22
  %126 = fadd float %122, %23
  %127 = fadd float %124, %24
  %128 = fsub float -0.000000e+00, %56
  %129 = fadd float %125, %128
  %130 = fsub float -0.000000e+00, %57
  %131 = fadd float %126, %130
  %132 = fsub float -0.000000e+00, %58
  %133 = fadd float %127, %132
  %134 = fmul float %105, %54
  %135 = fadd float %134, %55
  %136 = fmul float %68, %44
  %137 = fadd float %136, %46
  %138 = fmul float %69, %45
  %139 = fadd float %138, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %75, float %76)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %129, float %131, float %133, float %135)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %137, float %139, float %116, float %118)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %104, float %105, float %106)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840708
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800022f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800023f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[10..11]
DCL CONST[2..9]
DCL TEMP[0]
DCL TEMP[1..4], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
IMM[1] FLT32 {    0.0010,     4.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[11].xxxx, CONST[11].yyyy
  2: MOV TEMP[1].xy, IN[3].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MOV TEMP[2].w, TEMP[1].wwww
  5: DP3 TEMP[3].x, TEMP[1].xyzz, IMM[0].xyzz
  6: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[3].xxxx, TEMP[1].xyzz
  7: DP4 TEMP[1].x, TEMP[2], CONST[8]
  8: ADD_SAT TEMP[1].x, TEMP[1].xxxx, CONST[6].yyyy
  9: LRP TEMP[1], TEMP[1].xxxx, IN[1], IMM[0].wwww
 10: MUL TEMP[1], TEMP[2], TEMP[1]
 11: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[2].xyyy
 12: MOV TEMP[3].xy, TEMP[3].xyyy
 13: TEX TEMP[3], TEMP[3], SAMP[1], 2D
 14: DP4 TEMP[2].x, TEMP[2], CONST[7]
 15: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[6].xxxx
 16: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww
 17: DP3 TEMP[4].x, TEMP[3].xyzz, IMM[0].xyzz
 18: MAX TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
 19: RCP TEMP[4].x, TEMP[4].xxxx
 20: MUL TEMP[4].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 21: MUL TEMP[3].xyz, TEMP[1].xyzz, TEMP[3].xyzz
 22: MAD TEMP[2].xyz, TEMP[2].xxxx, TEMP[4].xyzz, TEMP[3].xyzz
 23: MUL TEMP[1].xyz, TEMP[2].xyzz, IMM[1].yyyy
 24: MAX TEMP[2].x, IN[2].wwww, CONST[3].wwww
 25: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 26: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[3].xyzz
 27: MAD TEMP[2].x, TEMP[1].wwww, CONST[4].yyyy, CONST[4].zzzz
 28: SLT TEMP[2].x, TEMP[2].xxxx, IMM[1].zzzz
 29: F2I TEMP[2].x, -TEMP[2]
 30: UIF TEMP[2].xxxx :2
 31:   KILL
 32: ENDIF
 33: MOV OUT[0], TEMP[1]
 34: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 52)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 56)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %43 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %44 = load <32 x i8> addrspace(2)* %43, !tbaa !0
  %45 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0
  %47 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %48 = load <32 x i8> addrspace(2)* %47, !tbaa !0
  %49 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %53 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %54 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %55 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %56 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %57 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %58 = fmul float %13, %41
  %59 = fadd float %58, %42
  %60 = bitcast float %56 to i32
  %61 = bitcast float %57 to i32
  %62 = insertelement <2 x i32> undef, i32 %60, i32 0
  %63 = insertelement <2 x i32> %62, i32 %61, i32 1
  %64 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %44, <16 x i8> %46, i32 2)
  %65 = extractelement <4 x float> %64, i32 0
  %66 = extractelement <4 x float> %64, i32 1
  %67 = extractelement <4 x float> %64, i32 2
  %68 = extractelement <4 x float> %64, i32 3
  %69 = fmul float %65, 0x3FCB367A00000000
  %70 = fmul float %66, 0x3FE6E2EB20000000
  %71 = fadd float %70, %69
  %72 = fmul float %67, 0x3FB27BB300000000
  %73 = fadd float %71, %72
  %74 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %65)
  %75 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %66)
  %76 = call float @llvm.AMDGPU.lrp(float %28, float %73, float %67)
  %77 = fmul float %74, %37
  %78 = fmul float %75, %38
  %79 = fadd float %77, %78
  %80 = fmul float %76, %39
  %81 = fadd float %79, %80
  %82 = fmul float %68, %40
  %83 = fadd float %81, %82
  %84 = fadd float %83, %32
  %85 = call float @llvm.AMDIL.clamp.(float %84, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDGPU.lrp(float %85, float %51, float 1.000000e+00)
  %87 = call float @llvm.AMDGPU.lrp(float %85, float %52, float 1.000000e+00)
  %88 = call float @llvm.AMDGPU.lrp(float %85, float %53, float 1.000000e+00)
  %89 = call float @llvm.AMDGPU.lrp(float %85, float %54, float 1.000000e+00)
  %90 = fmul float %74, %86
  %91 = fmul float %75, %87
  %92 = fmul float %76, %88
  %93 = fmul float %68, %89
  %94 = fmul float %12, %22
  %95 = fmul float %59, %23
  %96 = bitcast float %94 to i32
  %97 = bitcast float %95 to i32
  %98 = insertelement <2 x i32> undef, i32 %96, i32 0
  %99 = insertelement <2 x i32> %98, i32 %97, i32 1
  %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %48, <16 x i8> %50, i32 2)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = extractelement <4 x float> %100, i32 3
  %105 = fmul float %74, %33
  %106 = fmul float %75, %34
  %107 = fadd float %105, %106
  %108 = fmul float %76, %35
  %109 = fadd float %107, %108
  %110 = fmul float %68, %36
  %111 = fadd float %109, %110
  %112 = fadd float %111, %31
  %113 = call float @llvm.AMDIL.clamp.(float %112, float 0.000000e+00, float 1.000000e+00)
  %114 = fmul float %113, %104
  %115 = fmul float %101, 0x3FCB367A00000000
  %116 = fmul float %102, 0x3FE6E2EB20000000
  %117 = fadd float %116, %115
  %118 = fmul float %103, 0x3FB27BB300000000
  %119 = fadd float %117, %118
  %120 = fcmp uge float %119, 0x3F50624DE0000000
  %121 = select i1 %120, float %119, float 0x3F50624DE0000000
  %122 = fdiv float 1.000000e+00, %121
  %123 = fmul float %101, %122
  %124 = fmul float %102, %122
  %125 = fmul float %103, %122
  %126 = fmul float %90, %101
  %127 = fmul float %91, %102
  %128 = fmul float %92, %103
  %129 = fmul float %114, %123
  %130 = fadd float %129, %126
  %131 = fmul float %114, %124
  %132 = fadd float %131, %127
  %133 = fmul float %114, %125
  %134 = fadd float %133, %128
  %135 = fmul float %130, 4.000000e+00
  %136 = fmul float %132, 4.000000e+00
  %137 = fmul float %134, 4.000000e+00
  %138 = fcmp uge float %55, %27
  %139 = select i1 %138, float %55, float %27
  %140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
  %141 = call float @llvm.AMDGPU.lrp(float %140, float %135, float %24)
  %142 = call float @llvm.AMDGPU.lrp(float %140, float %136, float %25)
  %143 = call float @llvm.AMDGPU.lrp(float %140, float %137, float %26)
  %144 = fmul float %93, %29
  %145 = fadd float %144, %30
  %146 = fcmp ult float %145, 0.000000e+00
  %147 = select i1 %146, float 1.000000e+00, float 0.000000e+00
  %148 = fsub float -0.000000e+00, %147
  %149 = fptosi float %148 to i32
  %150 = bitcast i32 %149 to float
  %151 = bitcast float %150 to i32
  %152 = icmp ne i32 %151, 0
  br i1 %152, label %IF, label %ENDIF

IF:                                               ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF

ENDIF:                                            ; preds = %main_body, %IF
  %153 = call i32 @llvm.SI.packf16(float %141, float %142)
  %154 = bitcast i32 %153 to float
  %155 = call i32 @llvm.SI.packf16(float %143, float %93)
  %156 = bitcast i32 %155 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %154, float %156, float %154, float %156)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140900
c8150901
c8100800
c8110801
c0840300
c0c60500
bf8c007f
f0800f00
00430404
bf8c0770
101008ff
3e59b3d0
7e1802ff
3f371759
d2820008
04221905
7e1a02ff
3d93dd98
d2820009
04221b06
c0840100
bf8c007f
c2000910
bf8c007f
d208000a
020000f2
1010090a
d2820008
04221200
10160b0a
d282000b
042e1200
c2008921
bf8c007f
101c1601
c2008920
bf8c007f
d282000e
04380308
10140d0a
d2820012
042a1200
c2000922
bf8c007f
d2820009
04380112
c2000923
bf8c007f
d2820009
04240107
c2000919
bf8c007f
06121200
d2060809
02010109
081412f2
c8380200
c8390201
d282000e
042a1d09
10261d12
c200092c
c200892d
bf8c007f
7e1c0201
d2820003
04380103
c2000909
bf8c007f
101e0600
c2000908
bf8c007f
101c0400
c0800304
c0c60508
bf8c007f
f0800f00
00030e0e
bf8c0770
10042113
c200091d
bf8c007f
10061600
c200091c
bf8c007f
d2820003
040c0108
c200091e
bf8c007f
d2820003
040c0112
c200091f
bf8c007f
d2820003
040c0107
c2000918
bf8c007f
06060600
d2060803
02010103
10062303
10241cff
3e59b3d0
d282000c
044a190f
d282000c
04321b10
7e1a02ff
3a83126f
d00c0000
02021b0c
d200000c
0002190d
7e18550c
101a1910
d2820002
040a1b03
100404f6
c8340700
c8350701
c200090f
bf8c007f
d00c0002
0200010d
7e240200
d200000d
000a1b12
d206080d
0201010d
08241af2
c200090e
bf8c007f
10262400
d2820002
044e050d
c84c0100
c84d0101
d2820013
042a2709
1016270b
10161f0b
1026190f
d282000b
042e2703
101616f6
c200090d
bf8c007f
10262400
d282000b
044e170d
c84c0000
c84d0001
d2820013
042a2709
10102708
10101d08
1018190e
d2820003
04221903
100606f6
c200090c
bf8c007f
10102400
d2820003
0422070d
c8200300
c8210301
d2820000
042a1109
10000107
c2000911
c2008912
bf8c007f
7e020201
d2820001
04040100
d0020000
02010101
d2000001
0001e480
d2060001
22010101
7e021101
d10a0000
02010101
be802400
8980007e
7e0202f3
7c260280
88fe007e
5e000102
5e021703
f8001c0f
00010001
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MOV TEMP[2].w, IMM[0].xxxx
  6: MOV TEMP[2].xyz, CONST[8].xyzx
  7: MUL TEMP[3], CONST[0], TEMP[0].xxxx
  8: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
  9: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 10: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 11: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[13].xyzz
 12: MAD TEMP[3].x, TEMP[1].zzzz, CONST[12].xxxx, CONST[12].yyyy
 13: MOV TEMP[0].w, TEMP[3].xxxx
 14: MAD TEMP[3].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 15: MOV OUT[3], TEMP[3]
 16: MOV OUT[1], TEMP[2]
 17: MOV OUT[2], TEMP[0]
 18: MOV OUT[0], TEMP[1]
 19: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %59 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %5)
  %62 = extractelement <4 x float> %61, i32 0
  %63 = extractelement <4 x float> %61, i32 1
  %64 = extractelement <4 x float> %61, i32 2
  %65 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0
  %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %5)
  %68 = extractelement <4 x float> %67, i32 0
  %69 = extractelement <4 x float> %67, i32 1
  %70 = fmul float %62, %51
  %71 = fadd float %70, %48
  %72 = fmul float %63, %52
  %73 = fadd float %72, %49
  %74 = fmul float %64, %53
  %75 = fadd float %74, %50
  %76 = fmul float %25, %71
  %77 = fmul float %26, %71
  %78 = fmul float %27, %71
  %79 = fmul float %28, %71
  %80 = fmul float %29, %73
  %81 = fadd float %80, %76
  %82 = fmul float %30, %73
  %83 = fadd float %82, %77
  %84 = fmul float %31, %73
  %85 = fadd float %84, %78
  %86 = fmul float %32, %73
  %87 = fadd float %86, %79
  %88 = fmul float %33, %75
  %89 = fadd float %88, %81
  %90 = fmul float %34, %75
  %91 = fadd float %90, %83
  %92 = fmul float %35, %75
  %93 = fadd float %92, %85
  %94 = fmul float %36, %75
  %95 = fadd float %94, %87
  %96 = fadd float %89, %37
  %97 = fadd float %91, %38
  %98 = fadd float %93, %39
  %99 = fadd float %95, %40
  %100 = fmul float %11, %71
  %101 = fmul float %12, %71
  %102 = fmul float %13, %71
  %103 = fmul float %14, %71
  %104 = fmul float %15, %73
  %105 = fadd float %104, %100
  %106 = fmul float %16, %73
  %107 = fadd float %106, %101
  %108 = fmul float %17, %73
  %109 = fadd float %108, %102
  %110 = fmul float %18, %73
  %111 = fadd float %110, %103
  %112 = fmul float %19, %75
  %113 = fadd float %112, %105
  %114 = fmul float %20, %75
  %115 = fadd float %114, %107
  %116 = fmul float %21, %75
  %117 = fadd float %116, %109
  %118 = fadd float %113, %22
  %119 = fadd float %115, %23
  %120 = fadd float %117, %24
  %121 = fsub float -0.000000e+00, %56
  %122 = fadd float %118, %121
  %123 = fsub float -0.000000e+00, %57
  %124 = fadd float %119, %123
  %125 = fsub float -0.000000e+00, %58
  %126 = fadd float %120, %125
  %127 = fmul float %98, %54
  %128 = fadd float %127, %55
  %129 = fmul float %68, %44
  %130 = fadd float %129, %46
  %131 = fmul float %69, %45
  %132 = fadd float %131, %47
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %130, float %132, float %109, float %111)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %96, float %97, float %98, float %99)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020130
c2028131
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020136
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020135
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020134
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL IN[4], GENERIC[22], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL CONST[3..16]
DCL TEMP[0]
DCL TEMP[1..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.0000,     0.0100}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: MOV TEMP[2].xy, IN[3].xyyy
  7: TEX TEMP[2], TEMP[2], SAMP[0], 2D
  8: MOV TEMP[3].w, TEMP[2].wwww
  9: DP3 TEMP[4].x, TEMP[2].xyzz, IMM[1].xyzz
 10: LRP TEMP[3].xyz, CONST[3].xxxx, TEMP[4].xxxx, TEMP[2].xyzz
 11: DP4 TEMP[2].x, TEMP[3], CONST[14]
 12: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[13].xxxx
 13: DP3 TEMP[4].x, IN[4].xyzz, IN[4].xyzz
 14: RSQ TEMP[4].x, TEMP[4].xxxx
 15: MUL TEMP[4].xyz, IN[4].xyzz, TEMP[4].xxxx
 16: MUL TEMP[1].xyz, TEMP[4].xyzz, TEMP[1].xxxx
 17: DP4 TEMP[4].x, TEMP[3], CONST[15]
 18: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[13].yyyy
 19: LRP TEMP[4], TEMP[4].xxxx, IN[1], IMM[0].yyyy
 20: MUL TEMP[3], TEMP[3], TEMP[4]
 21: DP3 TEMP[4].x, TEMP[1].xyzz, CONST[5].xyzz
 22: ADD TEMP[4].x, TEMP[4].xxxx, CONST[9].wwww
 23: MOV_SAT TEMP[5].x, TEMP[4].xxxx
 24: LRP TEMP[5].xyz, TEMP[5].xxxx, CONST[7].xyzz, CONST[8].xyzz
 25: MOV_SAT TEMP[6].x, -TEMP[4].xxxx
 26: LRP TEMP[6].xyz, TEMP[6].xxxx, CONST[9].xyzz, CONST[8].xyzz
 27: SLT TEMP[7].x, TEMP[4].xxxx, IMM[0].zzzz
 28: F2I TEMP[7].x, -TEMP[7]
 29: UIF TEMP[7].xxxx :1
 30:   MOV TEMP[6].xyz, TEMP[6].xyzx
 31: ELSE :1
 32:   MOV TEMP[6].xyz, TEMP[5].xyzx
 33: ENDIF
 34: DP3 TEMP[5].x, IN[2].xyzz, IN[2].xyzz
 35: RSQ TEMP[5].x, TEMP[5].xxxx
 36: MUL TEMP[5].xyz, IN[2].xyzz, TEMP[5].xxxx
 37: ADD TEMP[5].xyz, CONST[6].xyzz, -TEMP[5].xyzz
 38: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz
 39: RSQ TEMP[7].x, TEMP[7].xxxx
 40: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 41: DP3_SAT TEMP[5].x, TEMP[5].xyzz, TEMP[1].xyzz
 42: POW TEMP[5].x, TEMP[5].xxxx, CONST[12].xxxx
 43: SGE TEMP[4].x, TEMP[4].xxxx, IMM[0].wwww
 44: F2I TEMP[4].x, -TEMP[4]
 45: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy
 46: MUL TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx
 47: MUL TEMP[4].xyz, CONST[10].xyzz, TEMP[4].xxxx
 48: MOV TEMP[1].xyz, TEMP[1].xyzz
 49: TEX TEMP[1].xyz, TEMP[1], SAMP[2], CUBE
 50: MAD TEMP[1].xyz, TEMP[1].xyzz, IMM[1].wwww, TEMP[6].xyzz
 51: MOV TEMP[5].xy, IN[3].zwww
 52: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 53: MUL TEMP[5], TEMP[5], CONST[11]
 54: LRP TEMP[1].xyz, TEMP[5].wwww, TEMP[5].xyzz, TEMP[1].xyzz
 55: LRP TEMP[4].xyz, TEMP[5].wwww, IMM[0].zzzz, TEMP[4].xyzz
 56: MUL TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xyzz
 57: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[2].xxxx, TEMP[1].xyzz
 58: MAX TEMP[1].x, IN[2].wwww, CONST[4].wwww
 59: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 60: LRP TEMP[3].xyz, TEMP[1].xxxx, TEMP[3].xyzz, CONST[4].xyzz
 61: MOV OUT[0], TEMP[3]
 62: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %49 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %50 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %52 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %53 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %54 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %55 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %56 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %57 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %58 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %59 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %60 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252)
  %61 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
  %63 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
  %67 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
  %71 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = fcmp ugt float %16, 0.000000e+00
  %74 = select i1 %73, float 1.000000e+00, float 0.000000e+00
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.AMDIL.clamp.(float %74, float 0.000000e+00, float 1.000000e+00)
  %91 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %92 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %94 = bitcast float %90 to i32
  %95 = icmp ne i32 %94, 0
  %. = select i1 %95, float -1.000000e+00, float 1.000000e+00
  %96 = bitcast float %83 to i32
  %97 = bitcast float %84 to i32
  %98 = insertelement <2 x i32> undef, i32 %96, i32 0
  %99 = insertelement <2 x i32> %98, i32 %97, i32 1
  %100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %99, <32 x i8> %62, <16 x i8> %64, i32 2)
  %101 = extractelement <4 x float> %100, i32 0
  %102 = extractelement <4 x float> %100, i32 1
  %103 = extractelement <4 x float> %100, i32 2
  %104 = extractelement <4 x float> %100, i32 3
  %105 = fmul float %101, 0x3FCB367A00000000
  %106 = fmul float %102, 0x3FE6E2EB20000000
  %107 = fadd float %106, %105
  %108 = fmul float %103, 0x3FB27BB300000000
  %109 = fadd float %107, %108
  %110 = call float @llvm.AMDGPU.lrp(float %22, float %109, float %101)
  %111 = call float @llvm.AMDGPU.lrp(float %22, float %109, float %102)
  %112 = call float @llvm.AMDGPU.lrp(float %22, float %109, float %103)
  %113 = fmul float %110, %53
  %114 = fmul float %111, %54
  %115 = fadd float %113, %114
  %116 = fmul float %112, %55
  %117 = fadd float %115, %116
  %118 = fmul float %104, %56
  %119 = fadd float %117, %118
  %120 = fadd float %119, %51
  %121 = call float @llvm.AMDIL.clamp.(float %120, float 0.000000e+00, float 1.000000e+00)
  %122 = fmul float %87, %87
  %123 = fmul float %88, %88
  %124 = fadd float %123, %122
  %125 = fmul float %89, %89
  %126 = fadd float %124, %125
  %127 = call float @llvm.AMDGPU.rsq(float %126)
  %128 = fmul float %87, %127
  %129 = fmul float %88, %127
  %130 = fmul float %89, %127
  %131 = fmul float %128, %.
  %132 = fmul float %129, %.
  %133 = fmul float %130, %.
  %134 = fmul float %110, %57
  %135 = fmul float %111, %58
  %136 = fadd float %134, %135
  %137 = fmul float %112, %59
  %138 = fadd float %136, %137
  %139 = fmul float %104, %60
  %140 = fadd float %138, %139
  %141 = fadd float %140, %52
  %142 = call float @llvm.AMDIL.clamp.(float %141, float 0.000000e+00, float 1.000000e+00)
  %143 = call float @llvm.AMDGPU.lrp(float %142, float %75, float 1.000000e+00)
  %144 = call float @llvm.AMDGPU.lrp(float %142, float %76, float 1.000000e+00)
  %145 = call float @llvm.AMDGPU.lrp(float %142, float %77, float 1.000000e+00)
  %146 = call float @llvm.AMDGPU.lrp(float %142, float %78, float 1.000000e+00)
  %147 = fmul float %110, %143
  %148 = fmul float %111, %144
  %149 = fmul float %112, %145
  %150 = fmul float %104, %146
  %151 = fmul float %131, %27
  %152 = fmul float %132, %28
  %153 = fadd float %152, %151
  %154 = fmul float %133, %29
  %155 = fadd float %153, %154
  %156 = fadd float %155, %42
  %157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00)
  %158 = call float @llvm.AMDGPU.lrp(float %157, float %33, float %36)
  %159 = call float @llvm.AMDGPU.lrp(float %157, float %34, float %37)
  %160 = call float @llvm.AMDGPU.lrp(float %157, float %35, float %38)
  %161 = fsub float -0.000000e+00, %156
  %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
  %163 = call float @llvm.AMDGPU.lrp(float %162, float %39, float %36)
  %164 = call float @llvm.AMDGPU.lrp(float %162, float %40, float %37)
  %165 = call float @llvm.AMDGPU.lrp(float %162, float %41, float %38)
  %166 = fcmp ult float %156, 0.000000e+00
  %167 = select i1 %166, float 1.000000e+00, float 0.000000e+00
  %168 = fsub float -0.000000e+00, %167
  %169 = fptosi float %168 to i32
  %170 = bitcast i32 %169 to float
  %171 = bitcast float %170 to i32
  %172 = icmp ne i32 %171, 0
  %temp24.0 = select i1 %172, float %163, float %158
  %temp25.0 = select i1 %172, float %164, float %159
  %temp26.0 = select i1 %172, float %165, float %160
  %173 = fmul float %79, %79
  %174 = fmul float %80, %80
  %175 = fadd float %174, %173
  %176 = fmul float %81, %81
  %177 = fadd float %175, %176
  %178 = call float @llvm.AMDGPU.rsq(float %177)
  %179 = fmul float %79, %178
  %180 = fmul float %80, %178
  %181 = fmul float %81, %178
  %182 = fsub float -0.000000e+00, %179
  %183 = fadd float %30, %182
  %184 = fsub float -0.000000e+00, %180
  %185 = fadd float %31, %184
  %186 = fsub float -0.000000e+00, %181
  %187 = fadd float %32, %186
  %188 = fmul float %183, %183
  %189 = fmul float %185, %185
  %190 = fadd float %189, %188
  %191 = fmul float %187, %187
  %192 = fadd float %190, %191
  %193 = call float @llvm.AMDGPU.rsq(float %192)
  %194 = fmul float %183, %193
  %195 = fmul float %185, %193
  %196 = fmul float %187, %193
  %197 = fmul float %194, %131
  %198 = fmul float %195, %132
  %199 = fadd float %198, %197
  %200 = fmul float %196, %133
  %201 = fadd float %199, %200
  %202 = call float @llvm.AMDIL.clamp.(float %201, float 0.000000e+00, float 1.000000e+00)
  %203 = call float @llvm.pow.f32(float %202, float %50)
  %204 = fcmp uge float %156, 0x3F847AE140000000
  %205 = select i1 %204, float 1.000000e+00, float 0.000000e+00
  %206 = fsub float -0.000000e+00, %205
  %207 = fptosi float %206 to i32
  %208 = bitcast i32 %207 to float
  %209 = bitcast float %208 to i32
  %210 = and i32 %209, 1065353216
  %211 = bitcast i32 %210 to float
  %212 = fmul float %203, %211
  %213 = fmul float %43, %212
  %214 = fmul float %44, %212
  %215 = fmul float %45, %212
  %216 = insertelement <4 x float> undef, float %131, i32 0
  %217 = insertelement <4 x float> %216, float %132, i32 1
  %218 = insertelement <4 x float> %217, float %133, i32 2
  %219 = insertelement <4 x float> %218, float 0.000000e+00, i32 3
  %220 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %219)
  %221 = extractelement <4 x float> %220, i32 0
  %222 = extractelement <4 x float> %220, i32 1
  %223 = extractelement <4 x float> %220, i32 2
  %224 = extractelement <4 x float> %220, i32 3
  %225 = call float @fabs(float %223)
  %226 = fdiv float 1.000000e+00, %225
  %227 = fmul float %221, %226
  %228 = fadd float %227, 1.500000e+00
  %229 = fmul float %222, %226
  %230 = fadd float %229, 1.500000e+00
  %231 = bitcast float %230 to i32
  %232 = bitcast float %228 to i32
  %233 = bitcast float %224 to i32
  %234 = insertelement <4 x i32> undef, i32 %231, i32 0
  %235 = insertelement <4 x i32> %234, i32 %232, i32 1
  %236 = insertelement <4 x i32> %235, i32 %233, i32 2
  %237 = insertelement <4 x i32> %236, i32 undef, i32 3
  %238 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %237, <32 x i8> %70, <16 x i8> %72, i32 4)
  %239 = extractelement <4 x float> %238, i32 0
  %240 = extractelement <4 x float> %238, i32 1
  %241 = extractelement <4 x float> %238, i32 2
  %242 = fmul float %239, 4.000000e+00
  %243 = fadd float %242, %temp24.0
  %244 = fmul float %240, 4.000000e+00
  %245 = fadd float %244, %temp25.0
  %246 = fmul float %241, 4.000000e+00
  %247 = fadd float %246, %temp26.0
  %248 = bitcast float %85 to i32
  %249 = bitcast float %86 to i32
  %250 = insertelement <2 x i32> undef, i32 %248, i32 0
  %251 = insertelement <2 x i32> %250, i32 %249, i32 1
  %252 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %251, <32 x i8> %66, <16 x i8> %68, i32 2)
  %253 = extractelement <4 x float> %252, i32 0
  %254 = extractelement <4 x float> %252, i32 1
  %255 = extractelement <4 x float> %252, i32 2
  %256 = extractelement <4 x float> %252, i32 3
  %257 = fmul float %253, %46
  %258 = fmul float %254, %47
  %259 = fmul float %255, %48
  %260 = fmul float %256, %49
  %261 = call float @llvm.AMDGPU.lrp(float %260, float %257, float %243)
  %262 = call float @llvm.AMDGPU.lrp(float %260, float %258, float %245)
  %263 = call float @llvm.AMDGPU.lrp(float %260, float %259, float %247)
  %264 = call float @llvm.AMDGPU.lrp(float %260, float 0.000000e+00, float %213)
  %265 = call float @llvm.AMDGPU.lrp(float %260, float 0.000000e+00, float %214)
  %266 = call float @llvm.AMDGPU.lrp(float %260, float 0.000000e+00, float %215)
  %267 = fmul float %147, %261
  %268 = fmul float %148, %262
  %269 = fmul float %149, %263
  %270 = fmul float %264, %121
  %271 = fadd float %270, %267
  %272 = fmul float %265, %121
  %273 = fadd float %272, %268
  %274 = fmul float %266, %121
  %275 = fadd float %274, %269
  %276 = fcmp uge float %82, %26
  %277 = select i1 %276, float %82, float %26
  %278 = call float @llvm.AMDIL.clamp.(float %277, float 0.000000e+00, float 1.000000e+00)
  %279 = call float @llvm.AMDGPU.lrp(float %278, float %271, float %23)
  %280 = call float @llvm.AMDGPU.lrp(float %278, float %273, float %24)
  %281 = call float @llvm.AMDGPU.lrp(float %278, float %275, float %25)
  %282 = call i32 @llvm.SI.packf16(float %279, float %280)
  %283 = bitcast i32 %282 to float
  %284 = call i32 @llvm.SI.packf16(float %281, float %150)
  %285 = bitcast i32 %284 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %283, float %285, float %283, float %285)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
attributes #3 = { nounwind readonly }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100d00
c8110d01
c80c0c00
c80d0c01
100a0703
d2820006
04160904
c8140e00
c8150e01
d2820006
041a0b05
7e0c5b06
10080d04
d0080008
02010102
d2000002
0021e480
d2060802
02010102
d10a0008
02010102
d2000002
0021e6f2
10180504
10060d03
10160503
10060d05
101a0503
7e1c0280
d28a0003
0436190b
d28c0002
0436190b
d28e0004
0436190b
d2880005
0436190b
d206010a
02010104
7e14550a
7e1e02ff
3fc00000
d2820004
043e1502
d2820003
043e1503
c0840308
c0c60510
bf8c007f
f0800700
00430203
c0840100
bf8c0070
c2000914
bf8c007f
100a1600
c2000915
bf8c007f
d2820005
0414010c
c2000916
bf8c007f
d2820005
0414010d
c2000927
bf8c007f
06340a00
d2060805
0201011a
080c0af2
c2000921
bf8c007f
100e0c00
c200891d
bf8c007f
d2820009
041c0305
d2060007
2201011a
d2060807
02010107
08100ef2
10141000
c2000925
bf8c007f
d282000a
04280107
d0020000
0201011a
d200000f
0001e480
d206000f
2201010f
7e1e110f
d10a0000
0201010f
d2000009
00021509
d2820013
0425ed03
c8280b00
c8290b01
c8240a00
c8250a01
c0860304
c0c80508
bf8c007f
f0800f00
00640f09
c203892f
bf8c0070
10122407
081412f2
1026270a
c203892d
bf8c007f
10282007
d282001d
044e2909
c8500900
c8510901
c84c0800
c84d0801
c0860300
c0c80500
bf8c007f
f0800f00
00641313
bf8c0770
102e26ff
3e59b3d0
7e3002ff
3f371759
d2820017
045e3114
7e3002ff
3d93dd98
d2820017
045e3115
c201090c
bf8c007f
d2080018
020004f2
10322718
d282001b
04662e02
10322918
d282001c
04662e02
c201893d
bf8c007f
10323803
c201893c
bf8c007f
d282001e
0464071b
10302b18
d2820019
04622e02
c201093e
bf8c007f
d2820017
04780519
c201093f
bf8c007f
d2820017
045c0516
c2010935
bf8c007f
062e2e02
d2060817
02010117
08302ef2
c8780100
c8790101
d282001e
04623d17
103c3d1c
103a3b1e
c8780500
c8790501
c87c0400
c87d0401
10403f1f
d2820021
04823d1e
c8800600
c8810601
d2820021
04864120
7e425b21
103c431e
c2010919
bf8c007f
083c3c02
103e431f
c2010918
bf8c007f
083e3e02
10443f1f
d2820022
048a3d1e
10404320
c201091a
bf8c007f
08404002
d2820021
048a4120
7e425b21
103c431e
103e431f
103e171f
d282001e
047e191e
103e4320
d282000b
047a1b1f
d206080b
0201010b
7e164f0b
c2010930
bf8c007f
0e161602
7e164b0b
7e1802ff
3c23d70a
d00c0002
0202191a
d200000c
0009e480
d206000c
2201010c
7e18110c
361818f2
1016190b
c2010929
bf8c007f
10181602
1018190a
d282000d
04310109
c2010939
bf8c007f
10183802
c2010938
bf8c007f
d282000c
0430051b
c201093a
bf8c007f
d282000c
04300519
c201093b
bf8c007f
d282000c
04300516
c2010934
bf8c007f
06181802
d206080c
0201010c
d282001a
0476190d
c8340700
c8350701
c2010913
bf8c007f
d00c0004
0200050d
7e1c0202
d200000d
00121b0e
d206080d
0201010d
081c1af2
c2010911
bf8c007f
10381c02
d282001a
0472350d
c2010920
bf8c007f
10380c02
c201891c
bf8c007f
d282001c
04700705
103a1002
c2010924
bf8c007f
d282001d
04740507
d200001c
00023b1c
d282001c
0471ed02
1038390a
c201092c
bf8c007f
103a1e02
d282001c
04723b09
c8740000
c8750001
d282001d
04623b17
10363b1b
1036391b
c2010928
bf8c007f
10381602
1038390a
d282001c
04710109
d282001b
046e191c
c2010910
bf8c007f
10381c02
d282001b
0472370d
5e34351b
c2010922
bf8c007f
100c0c02
c201891e
bf8c007f
d2820005
04180705
100c1002
c2010926
bf8c007f
d2820006
04180507
d2000005
00020d05
d2820002
0415ed04
1004050a
c200092e
bf8c007f
10062200
d2820002
040a0709
c80c0200
c80d0201
d2820003
04620717
10060719
10040503
c200092a
bf8c007f
10061600
1006070a
d2820003
040d0109
d2820002
040a1903
c2000912
bf8c007f
10061c00
d2820002
040e050d
c80c0300
c80d0301
d2820000
04620717
10000116
5e000102
f8001c0f
001a001a
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL CONST[0..16]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 {    0.0000,     1.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2].xyz, IN[1].xyzz, CONST[14].wwww
  6: MUL TEMP[3], CONST[0], TEMP[2].xxxx
  7: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
  8: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
  9: MUL TEMP[3], CONST[0], TEMP[0].xxxx
 10: MAD TEMP[3], CONST[1], TEMP[0].yyyy, TEMP[3]
 11: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[3]
 12: ADD TEMP[0].xyz, TEMP[0], CONST[3]
 13: MOV TEMP[3].w, IMM[0].yyyy
 14: MOV TEMP[3].xyz, TEMP[0].xyzx
 15: MOV TEMP[4].w, IMM[0].yyyy
 16: MOV TEMP[4].xyz, TEMP[0].xyzx
 17: DP4 TEMP[3].x, CONST[15], TEMP[3]
 18: DP4 TEMP[4].x, CONST[16], TEMP[4]
 19: MOV TEMP[3].y, TEMP[4].xxxx
 20: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[12].xyzz
 21: MAD TEMP[4].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 22: MOV TEMP[0].w, TEMP[4].xxxx
 23: MAD TEMP[4].xy, IN[2].xyyy, CONST[9].xyyy, CONST[9].zwww
 24: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
 27: MOV TEMP[4].zw, TEMP[3].yyxy
 28: MOV OUT[4], TEMP[2]
 29: MOV OUT[3], TEMP[4]
 30: MOV OUT[1], CONST[8]
 31: MOV OUT[2], TEMP[0]
 32: MOV OUT[0], TEMP[1]
 33: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 236)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 252)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %66 = call float @llvm.SI.load.const(<16 x i8> %10, i32 268)
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = extractelement <4 x float> %75, i32 2
  %79 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
  %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %5)
  %82 = extractelement <4 x float> %81, i32 0
  %83 = extractelement <4 x float> %81, i32 1
  %84 = fmul float %70, %50
  %85 = fadd float %84, %47
  %86 = fmul float %71, %51
  %87 = fadd float %86, %48
  %88 = fmul float %72, %52
  %89 = fadd float %88, %49
  %90 = fmul float %23, %85
  %91 = fmul float %24, %85
  %92 = fmul float %25, %85
  %93 = fmul float %26, %85
  %94 = fmul float %27, %87
  %95 = fadd float %94, %90
  %96 = fmul float %28, %87
  %97 = fadd float %96, %91
  %98 = fmul float %29, %87
  %99 = fadd float %98, %92
  %100 = fmul float %30, %87
  %101 = fadd float %100, %93
  %102 = fmul float %31, %89
  %103 = fadd float %102, %95
  %104 = fmul float %32, %89
  %105 = fadd float %104, %97
  %106 = fmul float %33, %89
  %107 = fadd float %106, %99
  %108 = fmul float %34, %89
  %109 = fadd float %108, %101
  %110 = fadd float %103, %35
  %111 = fadd float %105, %36
  %112 = fadd float %107, %37
  %113 = fadd float %109, %38
  %114 = fmul float %76, %58
  %115 = fmul float %77, %58
  %116 = fmul float %78, %58
  %117 = fmul float %11, %114
  %118 = fmul float %12, %114
  %119 = fmul float %13, %114
  %120 = fmul float %14, %115
  %121 = fadd float %120, %117
  %122 = fmul float %15, %115
  %123 = fadd float %122, %118
  %124 = fmul float %16, %115
  %125 = fadd float %124, %119
  %126 = fmul float %17, %116
  %127 = fadd float %126, %121
  %128 = fmul float %18, %116
  %129 = fadd float %128, %123
  %130 = fmul float %19, %116
  %131 = fadd float %130, %125
  %132 = fmul float %11, %85
  %133 = fmul float %12, %85
  %134 = fmul float %13, %85
  %135 = fmul float %14, %87
  %136 = fadd float %135, %132
  %137 = fmul float %15, %87
  %138 = fadd float %137, %133
  %139 = fmul float %16, %87
  %140 = fadd float %139, %134
  %141 = fmul float %17, %89
  %142 = fadd float %141, %136
  %143 = fmul float %18, %89
  %144 = fadd float %143, %138
  %145 = fmul float %19, %89
  %146 = fadd float %145, %140
  %147 = fadd float %142, %20
  %148 = fadd float %144, %21
  %149 = fadd float %146, %22
  %150 = fmul float %59, %147
  %151 = fmul float %60, %148
  %152 = fadd float %150, %151
  %153 = fmul float %61, %149
  %154 = fadd float %152, %153
  %155 = fmul float %62, 1.000000e+00
  %156 = fadd float %154, %155
  %157 = fmul float %63, %147
  %158 = fmul float %64, %148
  %159 = fadd float %157, %158
  %160 = fmul float %65, %149
  %161 = fadd float %159, %160
  %162 = fmul float %66, 1.000000e+00
  %163 = fadd float %161, %162
  %164 = fsub float -0.000000e+00, %53
  %165 = fadd float %147, %164
  %166 = fsub float -0.000000e+00, %54
  %167 = fadd float %148, %166
  %168 = fsub float -0.000000e+00, %55
  %169 = fadd float %149, %168
  %170 = fmul float %112, %56
  %171 = fadd float %170, %57
  %172 = fmul float %82, %43
  %173 = fadd float %172, %45
  %174 = fmul float %83, %44
  %175 = fadd float %174, %46
  %176 = fmul float %127, %127
  %177 = fmul float %129, %129
  %178 = fadd float %177, %176
  %179 = fmul float %131, %131
  %180 = fadd float %178, %179
  %181 = call float @llvm.AMDGPU.rsq(float %180)
  %182 = fmul float %127, %181
  %183 = fmul float %129, %181
  %184 = fmul float %131, %181
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float %42)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %165, float %167, float %169, float %171)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %173, float %175, float %156, float %163)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %182, float %183, float %184, float 0.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %111, float %112, float %113)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2028106
bf8c007f
d2820005
04160205
c204010a
bf8c007f
d2820005
04160608
c204810e
bf8c007f
060a0a09
c2048132
bf8c007f
0a100a09
c2048101
bf8c007f
100e0409
c2058105
bf8c007f
d2820007
041e020b
c2050109
bf8c007f
d2820007
041e060a
c206010d
bf8c007f
060e0e0c
c2060131
bf8c007f
0a140e0c
c2060100
bf8c007f
1012040c
c2068104
bf8c007f
d2820009
0426020d
c2070108
bf8c007f
d2820009
0426060e
c207810c
bf8c007f
0612120f
c2078130
bf8c007f
0a16120f
f800021f
06080a0b
c2078141
bf8c000f
100c0e0f
c2078140
bf8c007f
d2820006
041a120f
c2078142
bf8c007f
d2820006
041a0a0f
c2078143
bf8c007f
060c0c0f
c207813d
bf8c007f
100e0e0f
c207813c
bf8c007f
d2820007
041e120f
c207813e
bf8c007f
d2820005
041e0a0f
c207813f
bf8c007f
060a0a0f
c0880708
bf8c007f
e00c2000
80040700
c2078125
c2080127
bf8c0070
7e160210
d282000b
042c1f08
c2078124
c2080126
bf8c007f
7e180210
d2820007
04301f07
f800022f
06050b07
c0880704
bf8c000f
e00c2000
80040700
c203013b
bf8c0070
10001006
100c0e06
100a0c09
d2820005
0416000b
10101206
d2820005
0416100a
100e0c0c
d2820007
041e000d
d2820007
041e100e
10120f07
d2820009
04260b05
100c0c04
d2820000
041a0005
d2820000
04021008
d2820006
04260100
7e0c5b06
10000d00
100a0d05
100c0d07
7e0e0280
f800023f
07000506
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[19], PERSPECTIVE
DCL IN[1], GENERIC[20], PERSPECTIVE
DCL IN[2], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..7]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 {    0.2126,     0.7152,     0.0722,     1.0000}
  0: MOV TEMP[0].xy, IN[2].xyyy
  1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
  2: MOV TEMP[1].w, TEMP[0].wwww
  3: DP3 TEMP[2].x, TEMP[0].xyzz, IMM[0].xyzz
  4: LRP TEMP[1].xyz, CONST[1].xxxx, TEMP[2].xxxx, TEMP[0].xyzz
  5: DP4 TEMP[0].x, TEMP[1], CONST[6]
  6: ADD_SAT TEMP[0].x, TEMP[0].xxxx, CONST[4].yyyy
  7: LRP TEMP[0], TEMP[0].xxxx, IN[0], IMM[0].wwww
  8: MUL TEMP[0], TEMP[1], TEMP[0]
  9: MAX TEMP[1].x, IN[1].wwww, CONST[2].wwww
 10: MOV_SAT TEMP[1].x, TEMP[1].xxxx
 11: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz
 12: MOV OUT[0], TEMP[0]
 13: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 40)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 44)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %32 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %33 = load <32 x i8> addrspace(2)* %32, !tbaa !0
  %34 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %35 = load <16 x i8> addrspace(2)* %34, !tbaa !0
  %36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %43 = bitcast float %41 to i32
  %44 = bitcast float %42 to i32
  %45 = insertelement <2 x i32> undef, i32 %43, i32 0
  %46 = insertelement <2 x i32> %45, i32 %44, i32 1
  %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %33, <16 x i8> %35, i32 2)
  %48 = extractelement <4 x float> %47, i32 0
  %49 = extractelement <4 x float> %47, i32 1
  %50 = extractelement <4 x float> %47, i32 2
  %51 = extractelement <4 x float> %47, i32 3
  %52 = fmul float %48, 0x3FCB367A00000000
  %53 = fmul float %49, 0x3FE6E2EB20000000
  %54 = fadd float %53, %52
  %55 = fmul float %50, 0x3FB27BB300000000
  %56 = fadd float %54, %55
  %57 = call float @llvm.AMDGPU.lrp(float %22, float %56, float %48)
  %58 = call float @llvm.AMDGPU.lrp(float %22, float %56, float %49)
  %59 = call float @llvm.AMDGPU.lrp(float %22, float %56, float %50)
  %60 = fmul float %57, %28
  %61 = fmul float %58, %29
  %62 = fadd float %60, %61
  %63 = fmul float %59, %30
  %64 = fadd float %62, %63
  %65 = fmul float %51, %31
  %66 = fadd float %64, %65
  %67 = fadd float %66, %27
  %68 = call float @llvm.AMDIL.clamp.(float %67, float 0.000000e+00, float 1.000000e+00)
  %69 = call float @llvm.AMDGPU.lrp(float %68, float %36, float 1.000000e+00)
  %70 = call float @llvm.AMDGPU.lrp(float %68, float %37, float 1.000000e+00)
  %71 = call float @llvm.AMDGPU.lrp(float %68, float %38, float 1.000000e+00)
  %72 = call float @llvm.AMDGPU.lrp(float %68, float %39, float 1.000000e+00)
  %73 = fmul float %57, %69
  %74 = fmul float %58, %70
  %75 = fmul float %59, %71
  %76 = fmul float %51, %72
  %77 = fcmp uge float %40, %26
  %78 = select i1 %77, float %40, float %26
  %79 = call float @llvm.AMDIL.clamp.(float %78, float 0.000000e+00, float 1.000000e+00)
  %80 = call float @llvm.AMDGPU.lrp(float %79, float %73, float %23)
  %81 = call float @llvm.AMDGPU.lrp(float %79, float %74, float %24)
  %82 = call float @llvm.AMDGPU.lrp(float %79, float %75, float %25)
  %83 = call i32 @llvm.SI.packf16(float %80, float %81)
  %84 = bitcast i32 %83 to float
  %85 = call i32 @llvm.SI.packf16(float %82, float %76)
  %86 = bitcast i32 %85 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %84, float %86, float %84, float %86)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c80c0900
c80d0901
c8080800
c8090801
c0840300
c0c60500
bf8c007f
f0800f00
00430302
bf8c0770
100406ff
3e59b3d0
7e0e02ff
3f371759
d2820002
040a0f04
7e0e02ff
3d93dd98
d2820008
040a0f05
c0800100
bf8c007f
c2020104
bf8c007f
d2080009
020008f2
10040709
d2820002
040a1004
100e0909
d2820007
041e1004
c2028119
bf8c007f
10140e05
c2028118
bf8c007f
d282000b
04280b02
10120b09
d282000a
04261004
c202011a
bf8c007f
d2820008
042c090a
c202011b
bf8c007f
d2820008
04200906
c2020111
bf8c007f
06101004
d2060808
02010108
081210f2
c82c0200
c82d0201
d282000b
04261708
1018170a
c8280700
c8290701
c202010b
bf8c007f
d00c0008
0200090a
7e160204
d200000a
0022150b
d206080a
0201010a
081614f2
c202010a
bf8c007f
101a1604
d282000c
0436190a
c8340300
c8350301
d282000d
04261b08
10061b06
5e06070c
c8100100
c8110101
d2820004
04260908
10080907
c2020109
bf8c007f
100a1604
d2820004
0416090a
c8140000
c8150001
d2820000
04260b08
10000102
c2000108
bf8c007f
10021600
d2820000
0406010a
5e000900
f8001c0f
03000300
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..13]
DCL TEMP[0..2], LOCAL
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  1: MUL TEMP[1], CONST[4], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[5], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[6], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[7]
  5: MUL TEMP[2], CONST[0], TEMP[0].xxxx
  6: MAD TEMP[2], CONST[1], TEMP[0].yyyy, TEMP[2]
  7: MAD TEMP[0], CONST[2], TEMP[0].zzzz, TEMP[2]
  8: ADD TEMP[0].xyz, TEMP[0], CONST[3]
  9: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[12].xyzz
 10: MAD TEMP[2].x, TEMP[1].zzzz, CONST[13].xxxx, CONST[13].yyyy
 11: MOV TEMP[0].w, TEMP[2].xxxx
 12: MAD TEMP[2].xy, IN[1].xyyy, CONST[9].xyyy, CONST[9].zwww
 13: MOV OUT[3], TEMP[2]
 14: MOV OUT[1], CONST[8]
 15: MOV OUT[2], TEMP[0]
 16: MOV OUT[0], TEMP[1]
 17: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %5)
  %69 = extractelement <4 x float> %68, i32 0
  %70 = extractelement <4 x float> %68, i32 1
  %71 = fmul float %63, %52
  %72 = fadd float %71, %49
  %73 = fmul float %64, %53
  %74 = fadd float %73, %50
  %75 = fmul float %65, %54
  %76 = fadd float %75, %51
  %77 = fmul float %25, %72
  %78 = fmul float %26, %72
  %79 = fmul float %27, %72
  %80 = fmul float %28, %72
  %81 = fmul float %29, %74
  %82 = fadd float %81, %77
  %83 = fmul float %30, %74
  %84 = fadd float %83, %78
  %85 = fmul float %31, %74
  %86 = fadd float %85, %79
  %87 = fmul float %32, %74
  %88 = fadd float %87, %80
  %89 = fmul float %33, %76
  %90 = fadd float %89, %82
  %91 = fmul float %34, %76
  %92 = fadd float %91, %84
  %93 = fmul float %35, %76
  %94 = fadd float %93, %86
  %95 = fmul float %36, %76
  %96 = fadd float %95, %88
  %97 = fadd float %90, %37
  %98 = fadd float %92, %38
  %99 = fadd float %94, %39
  %100 = fadd float %96, %40
  %101 = fmul float %11, %72
  %102 = fmul float %12, %72
  %103 = fmul float %13, %72
  %104 = fmul float %14, %72
  %105 = fmul float %15, %74
  %106 = fadd float %105, %101
  %107 = fmul float %16, %74
  %108 = fadd float %107, %102
  %109 = fmul float %17, %74
  %110 = fadd float %109, %103
  %111 = fmul float %18, %74
  %112 = fadd float %111, %104
  %113 = fmul float %19, %76
  %114 = fadd float %113, %106
  %115 = fmul float %20, %76
  %116 = fadd float %115, %108
  %117 = fmul float %21, %76
  %118 = fadd float %117, %110
  %119 = fadd float %114, %22
  %120 = fadd float %116, %23
  %121 = fadd float %118, %24
  %122 = fsub float -0.000000e+00, %55
  %123 = fadd float %119, %122
  %124 = fsub float -0.000000e+00, %56
  %125 = fadd float %120, %124
  %126 = fsub float -0.000000e+00, %57
  %127 = fadd float %121, %126
  %128 = fmul float %99, %58
  %129 = fadd float %128, %59
  %130 = fmul float %69, %45
  %131 = fadd float %130, %47
  %132 = fmul float %70, %46
  %133 = fadd float %132, %48
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float %44)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %125, float %127, float %129)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %131, float %133, float %110, float %112)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %97, float %98, float %99, float %100)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020123
c2028122
c2040121
c2048120
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202012d
c2028129
bf8c0070
7e020205
d2820001
04040904
c202012c
c2028128
bf8c007f
7e040205
d2820002
04080903
c2020112
bf8c007f
100e0404
c2020116
bf8c007f
d2820007
041e0204
c202012e
c202812a
bf8c007f
7e100205
d2820003
04200905
c202011a
bf8c007f
d2820004
041e0604
c202011e
bf8c007f
06080804
c2020134
c2028135
bf8c007f
7e0a0205
d2820006
04140904
c2020102
bf8c007f
100a0404
c2020106
bf8c007f
d2820005
04160204
c202010a
bf8c007f
d2820007
04160604
c202010e
bf8c007f
060e0e04
c2020132
bf8c007f
0a0e0e04
c2020101
bf8c007f
10100404
c2020105
bf8c007f
d2820008
04220204
c2020109
bf8c007f
d2820008
04220604
c202010d
bf8c007f
06101004
c2020131
bf8c007f
0a101004
c2020100
bf8c007f
10120404
c2020104
bf8c007f
d2820009
04260204
c2020108
bf8c007f
d2820009
04260604
c202010c
bf8c007f
06121204
c2020130
bf8c007f
0a121204
f800021f
06070809
c2020103
bf8c000f
100c0404
c2020107
bf8c007f
d2820006
041a0204
c0820704
bf8c007f
e00c2000
80010700
c2020125
c2028127
bf8c0070
7e000205
d2820000
04000908
c2020124
c2028126
bf8c007f
7e160205
d2820007
042c0907
f800022f
06050007
c2020113
bf8c000f
10000404
c2020117
bf8c007f
d2820000
04020204
c202011b
bf8c007f
d2820000
04020604
c202011f
bf8c007f
06000004
c2020111
bf8c007f
100a0404
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160604
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10040404
c2020114
bf8c007f
d2820001
040a0204
c2020118
bf8c007f
d2820001
04060604
c200011c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL CONST[4..5]
DCL CONST[2..3]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[5].xxxx, CONST[5].yyyy
  2: MOV TEMP[1].xy, IN[2].xyyy
  3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
  4: MUL TEMP[1], IN[1], TEMP[1]
  5: MAX TEMP[2].x, IN[3].zzzz, CONST[3].wwww
  6: MOV_SAT TEMP[2].x, TEMP[2].xxxx
  7: MUL TEMP[1], TEMP[1], TEMP[2].xxxx
  8: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[2].xyyy
  9: MOV TEMP[2].xy, TEMP[2].xyyy
 10: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
 11: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].zzzz, CONST[4].wwww
 12: RCP TEMP[2].x, TEMP[2].xxxx
 13: ADD TEMP[2].x, TEMP[2].xxxx, -IN[3].xxxx
 14: MUL_SAT TEMP[2].x, TEMP[2].xxxx, IN[3].yyyy
 15: MUL TEMP[2].x, TEMP[1].wwww, TEMP[2].xxxx
 16: MOV TEMP[1].w, TEMP[2].xxxx
 17: MOV OUT[0], TEMP[1]
 18: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 60)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %29 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %30 = load <32 x i8> addrspace(2)* %29, !tbaa !0
  %31 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0
  %33 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %34 = load <32 x i8> addrspace(2)* %33, !tbaa !0
  %35 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %36 = load <16 x i8> addrspace(2)* %35, !tbaa !0
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %43 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %44 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %45 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %46 = fmul float %13, %27
  %47 = fadd float %46, %28
  %48 = bitcast float %41 to i32
  %49 = bitcast float %42 to i32
  %50 = insertelement <2 x i32> undef, i32 %48, i32 0
  %51 = insertelement <2 x i32> %50, i32 %49, i32 1
  %52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %30, <16 x i8> %32, i32 2)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = extractelement <4 x float> %52, i32 1
  %55 = extractelement <4 x float> %52, i32 2
  %56 = extractelement <4 x float> %52, i32 3
  %57 = fmul float %37, %53
  %58 = fmul float %38, %54
  %59 = fmul float %39, %55
  %60 = fmul float %40, %56
  %61 = fcmp uge float %45, %24
  %62 = select i1 %61, float %45, float %24
  %63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
  %64 = fmul float %57, %63
  %65 = fmul float %58, %63
  %66 = fmul float %59, %63
  %67 = fmul float %60, %63
  %68 = fmul float %12, %22
  %69 = fmul float %47, %23
  %70 = bitcast float %68 to i32
  %71 = bitcast float %69 to i32
  %72 = insertelement <2 x i32> undef, i32 %70, i32 0
  %73 = insertelement <2 x i32> %72, i32 %71, i32 1
  %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %73, <32 x i8> %34, <16 x i8> %36, i32 2)
  %75 = extractelement <4 x float> %74, i32 0
  %76 = fmul float %75, %25
  %77 = fadd float %76, %26
  %78 = fdiv float 1.000000e+00, %77
  %79 = fsub float -0.000000e+00, %43
  %80 = fadd float %78, %79
  %81 = fmul float %80, %44
  %82 = call float @llvm.AMDIL.clamp.(float %81, float 0.000000e+00, float 1.000000e+00)
  %83 = fmul float %67, %82
  %84 = fcmp ugt float %83, 0x3F80101020000000
  %85 = sext i1 %84 to i32
  %86 = trunc i32 %85 to i1
  %87 = select i1 %86, float 1.000000e+00, float -1.000000e+00
  call void @llvm.AMDGPU.kill(float %87)
  %88 = call i32 @llvm.SI.packf16(float %64, float %65)
  %89 = bitcast i32 %88 to float
  %90 = call i32 @llvm.SI.packf16(float %66, float %83)
  %91 = bitcast i32 %90 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %89, float %91, float %89, float %91)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

declare void @llvm.AMDGPU.kill(float)

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8140500
c8150501
c8100400
c8110401
c0840300
c0c60500
bf8c007f
f0800f00
00430404
c8200300
c8210301
bf8c0770
10120f08
c8200a00
c8210a01
c0840100
bf8c007f
c200090f
bf8c007f
d00c000c
02000108
7e140200
d2000008
0032110a
d2060808
02010108
10121109
c8280800
c8290801
c2000914
c2008915
bf8c007f
7e160201
d2820003
042c0103
c2000909
bf8c007f
10180600
c2000908
bf8c007f
10160400
c0800304
c0c60508
bf8c007f
f0800100
0003020b
c2000912
c2008913
bf8c0070
7e060201
d2820002
040c0102
7e045502
08041502
c80c0900
c80d0901
10040702
d2060802
02010102
10040509
7e0602ff
3c008081
d0080000
02020702
d2000003
0001e4f3
7c260680
c80c0100
c80d0101
10060b03
10061103
c8240000
c8250001
10120909
10121109
5e060709
c8240200
c8250201
10000d09
10001100
5e000500
f8001c0f
00030003
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL CONST[15..16]
DCL CONST[6..14]
DCL TEMP[0..1]
DCL TEMP[2..9], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {   -0.0100,    -0.5000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[16].xxxx, CONST[16].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[7].xxxx
  4: MOV TEMP[2].xy, IN[6].zwzz
  5: MOV TEMP[3].yz, IN[6].yxyy
  6: MOV TEMP[3].x, IN[5].wwww
  7: UIF TEMP[1].xxxx :3
  8:   MOV TEMP[4].x, IMM[0].xxxx
  9: ELSE :3
 10:   MOV TEMP[4].x, IMM[0].yyyy
 11: ENDIF
 12: MOV TEMP[5].xy, IN[4].xyyy
 13: TEX TEMP[5], TEMP[5], SAMP[0], 2D
 14: MOV TEMP[6].xy, IN[4].xyyy
 15: TEX TEMP[6], TEMP[6], SAMP[1], 2D
 16: MAD TEMP[6].yw, IMM[0].zzzz, TEMP[6], IMM[0].xxxx
 17: DP3 TEMP[7].x, IN[5].xyzz, IN[5].xyzz
 18: RSQ TEMP[7].x, TEMP[7].xxxx
 19: MUL TEMP[7].xyz, IN[5].xyzz, TEMP[7].xxxx
 20: DP2 TEMP[8].x, TEMP[6].ywww, TEMP[6].ywww
 21: ADD_SAT TEMP[8].x, IMM[0].yyyy, -TEMP[8].xxxx
 22: RSQ TEMP[9].x, TEMP[8].xxxx
 23: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
 24: CMP TEMP[9].x, -TEMP[8].xxxx, TEMP[9].xxxx, IMM[0].wwww
 25: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[9].xxxx
 26: DP3 TEMP[8].x, TEMP[3].xyzz, TEMP[3].xyzz
 27: RSQ TEMP[8].x, TEMP[8].xxxx
 28: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[8].xxxx
 29: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz
 30: RSQ TEMP[8].x, TEMP[8].xxxx
 31: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
 32: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].wwww
 33: MAD TEMP[2].xyz, TEMP[3].xyzz, TEMP[6].yyyy, TEMP[2].xyzz
 34: MAD TEMP[2].xyz, TEMP[7].xyzz, TEMP[4].xxxx, TEMP[2].xyzz
 35: DP3 TEMP[3].x, TEMP[2].xyzz, IN[3].xyzz
 36: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 37: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 38: ADD TEMP[2].xyz, IN[3].xyzz, -TEMP[2].xyzz
 39: MOV TEMP[2].xyz, TEMP[2].xyzz
 40: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 41: DP4 TEMP[3].x, TEMP[5], CONST[14]
 42: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[12].yyyy
 43: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 44: MUL TEMP[3], TEMP[5], TEMP[3]
 45: MUL TEMP[4].xyz, TEMP[2].xyzz, TEMP[2].wwww
 46: DP4 TEMP[5].x, TEMP[5], CONST[15]
 47: ADD_SAT TEMP[5].x, TEMP[5].xxxx, CONST[12].zzzz
 48: MAD TEMP[2].xyz, TEMP[4].xyzz, TEMP[5].xxxx, TEMP[3].xyzz
 49: MAX TEMP[4].x, IN[3].wwww, CONST[8].wwww
 50: MOV_SAT TEMP[4].x, TEMP[4].xxxx
 51: LRP TEMP[2].xyz, TEMP[4].xxxx, TEMP[2].xyzz, CONST[8].xyzz
 52: MUL TEMP[4].xy, TEMP[0].xyyy, CONST[6].xyyy
 53: MOV TEMP[5].xy, TEMP[4].xyyy
 54: TEX TEMP[5].x, TEMP[5], SAMP[3], 2D
 55: MAD TEMP[5].x, TEMP[5].xxxx, CONST[7].zzzz, CONST[7].wwww
 56: RCP TEMP[5].x, TEMP[5].xxxx
 57: ADD TEMP[5].x, TEMP[5].xxxx, -IN[4].zzzz
 58: MUL_SAT TEMP[5].x, TEMP[5].xxxx, IN[4].wwww
 59: MUL TEMP[3].x, TEMP[3].wwww, TEMP[5].xxxx
 60: MOV TEMP[2].w, TEMP[3].xxxx
 61: MAD TEMP[5].xy, CONST[10].xzzz, TEMP[3].xxxx, CONST[10].ywww
 62: ADD TEMP[6].x, TEMP[5].xxxx, IMM[1].xxxx
 63: SLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
 64: F2I TEMP[6].x, -TEMP[6]
 65: UIF TEMP[6].xxxx :3
 66:   KILL
 67: ENDIF
 68: MUL TEMP[6].xy, TEMP[4].xyyy, CONST[9].yzzz
 69: MOV TEMP[6].xy, TEMP[6].xyyy
 70: TEX TEMP[6].xy, TEMP[6], SAMP[4], 2D
 71: ADD TEMP[6].xy, TEMP[6].xyyy, IMM[1].yyyy
 72: MUL TEMP[3].x, CONST[9].xxxx, TEMP[3].xxxx
 73: MAD TEMP[3].xy, TEMP[6].xyyy, TEMP[3].xxxx, TEMP[4].xyyy
 74: MOV TEMP[3].xy, TEMP[3].xyyy
 75: TEX TEMP[3], TEMP[3], SAMP[5], 2D
 76: MUL TEMP[2], TEMP[2], TEMP[5].xxxx
 77: MAD TEMP[2], TEMP[3], TEMP[5].yyyy, TEMP[2]
 78: MOV OUT[0], TEMP[2]
 79: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 124)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 224)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 228)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 232)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 236)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 240)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 244)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 248)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 252)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 256)
  %48 = call float @llvm.SI.load.const(<16 x i8> %21, i32 260)
  %49 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %50 = load <32 x i8> addrspace(2)* %49, !tbaa !0
  %51 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %52 = load <16 x i8> addrspace(2)* %51, !tbaa !0
  %53 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %54 = load <32 x i8> addrspace(2)* %53, !tbaa !0
  %55 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0
  %57 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %58 = load <32 x i8> addrspace(2)* %57, !tbaa !0
  %59 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %60 = load <16 x i8> addrspace(2)* %59, !tbaa !0
  %61 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
  %63 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
  %67 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = getelementptr <32 x i8> addrspace(2)* %2, i32 5
  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
  %71 = getelementptr <16 x i8> addrspace(2)* %1, i32 5
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = fcmp ugt float %16, 0.000000e+00
  %74 = select i1 %73, float 1.000000e+00, float 0.000000e+00
  %75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %91 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %92 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %93 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %94 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %95 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %96 = fmul float %13, %47
  %97 = fadd float %96, %48
  %98 = call float @llvm.AMDIL.clamp.(float %74, float 0.000000e+00, float 1.000000e+00)
  %99 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %100 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %101 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %102 = bitcast float %98 to i32
  %103 = icmp ne i32 %102, 0
  %. = select i1 %103, float -1.000000e+00, float 1.000000e+00
  %104 = bitcast float %83 to i32
  %105 = bitcast float %84 to i32
  %106 = insertelement <2 x i32> undef, i32 %104, i32 0
  %107 = insertelement <2 x i32> %106, i32 %105, i32 1
  %108 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %107, <32 x i8> %50, <16 x i8> %52, i32 2)
  %109 = extractelement <4 x float> %108, i32 0
  %110 = extractelement <4 x float> %108, i32 1
  %111 = extractelement <4 x float> %108, i32 2
  %112 = extractelement <4 x float> %108, i32 3
  %113 = bitcast float %83 to i32
  %114 = bitcast float %84 to i32
  %115 = insertelement <2 x i32> undef, i32 %113, i32 0
  %116 = insertelement <2 x i32> %115, i32 %114, i32 1
  %117 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %54, <16 x i8> %56, i32 2)
  %118 = extractelement <4 x float> %117, i32 1
  %119 = extractelement <4 x float> %117, i32 3
  %120 = fmul float 2.000000e+00, %118
  %121 = fadd float %120, -1.000000e+00
  %122 = fmul float 2.000000e+00, %119
  %123 = fadd float %122, -1.000000e+00
  %124 = fmul float %87, %87
  %125 = fmul float %88, %88
  %126 = fadd float %125, %124
  %127 = fmul float %89, %89
  %128 = fadd float %126, %127
  %129 = call float @llvm.AMDGPU.rsq(float %128)
  %130 = fmul float %87, %129
  %131 = fmul float %88, %129
  %132 = fmul float %89, %129
  %133 = fmul float %121, %121
  %134 = fmul float %123, %123
  %135 = fadd float %133, %134
  %136 = fsub float -0.000000e+00, %135
  %137 = fadd float 1.000000e+00, %136
  %138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
  %139 = call float @llvm.AMDGPU.rsq(float %138)
  %140 = fmul float %139, %138
  %141 = fsub float -0.000000e+00, %138
  %142 = call float @llvm.AMDGPU.cndlt(float %141, float %140, float 0.000000e+00)
  %143 = fmul float %130, %142
  %144 = fmul float %131, %142
  %145 = fmul float %132, %142
  %146 = fmul float %90, %90
  %147 = fmul float %91, %91
  %148 = fadd float %147, %146
  %149 = fmul float %92, %92
  %150 = fadd float %148, %149
  %151 = call float @llvm.AMDGPU.rsq(float %150)
  %152 = fmul float %90, %151
  %153 = fmul float %91, %151
  %154 = fmul float %92, %151
  %155 = fmul float %93, %93
  %156 = fmul float %94, %94
  %157 = fadd float %156, %155
  %158 = fmul float %95, %95
  %159 = fadd float %157, %158
  %160 = call float @llvm.AMDGPU.rsq(float %159)
  %161 = fmul float %93, %160
  %162 = fmul float %94, %160
  %163 = fmul float %95, %160
  %164 = fmul float %161, %123
  %165 = fmul float %162, %123
  %166 = fmul float %163, %123
  %167 = fmul float %152, %121
  %168 = fadd float %167, %164
  %169 = fmul float %153, %121
  %170 = fadd float %169, %165
  %171 = fmul float %154, %121
  %172 = fadd float %171, %166
  %173 = fmul float %143, %.
  %174 = fadd float %173, %168
  %175 = fmul float %144, %.
  %176 = fadd float %175, %170
  %177 = fmul float %145, %.
  %178 = fadd float %177, %172
  %179 = fmul float %174, %79
  %180 = fmul float %176, %80
  %181 = fadd float %180, %179
  %182 = fmul float %178, %81
  %183 = fadd float %181, %182
  %184 = fmul float %183, %174
  %185 = fmul float %183, %176
  %186 = fmul float %183, %178
  %187 = fmul float 2.000000e+00, %184
  %188 = fmul float 2.000000e+00, %185
  %189 = fmul float 2.000000e+00, %186
  %190 = fsub float -0.000000e+00, %187
  %191 = fadd float %79, %190
  %192 = fsub float -0.000000e+00, %188
  %193 = fadd float %80, %192
  %194 = fsub float -0.000000e+00, %189
  %195 = fadd float %81, %194
  %196 = insertelement <4 x float> undef, float %191, i32 0
  %197 = insertelement <4 x float> %196, float %193, i32 1
  %198 = insertelement <4 x float> %197, float %195, i32 2
  %199 = insertelement <4 x float> %198, float 0.000000e+00, i32 3
  %200 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %199)
  %201 = extractelement <4 x float> %200, i32 0
  %202 = extractelement <4 x float> %200, i32 1
  %203 = extractelement <4 x float> %200, i32 2
  %204 = extractelement <4 x float> %200, i32 3
  %205 = call float @fabs(float %203)
  %206 = fdiv float 1.000000e+00, %205
  %207 = fmul float %201, %206
  %208 = fadd float %207, 1.500000e+00
  %209 = fmul float %202, %206
  %210 = fadd float %209, 1.500000e+00
  %211 = bitcast float %210 to i32
  %212 = bitcast float %208 to i32
  %213 = bitcast float %204 to i32
  %214 = insertelement <4 x i32> undef, i32 %211, i32 0
  %215 = insertelement <4 x i32> %214, i32 %212, i32 1
  %216 = insertelement <4 x i32> %215, i32 %213, i32 2
  %217 = insertelement <4 x i32> %216, i32 undef, i32 3
  %218 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %217, <32 x i8> %58, <16 x i8> %60, i32 4)
  %219 = extractelement <4 x float> %218, i32 0
  %220 = extractelement <4 x float> %218, i32 1
  %221 = extractelement <4 x float> %218, i32 2
  %222 = extractelement <4 x float> %218, i32 3
  %223 = fmul float %109, %39
  %224 = fmul float %110, %40
  %225 = fadd float %223, %224
  %226 = fmul float %111, %41
  %227 = fadd float %225, %226
  %228 = fmul float %112, %42
  %229 = fadd float %227, %228
  %230 = fadd float %229, %37
  %231 = call float @llvm.AMDIL.clamp.(float %230, float 0.000000e+00, float 1.000000e+00)
  %232 = call float @llvm.AMDGPU.lrp(float %231, float %75, float 1.000000e+00)
  %233 = call float @llvm.AMDGPU.lrp(float %231, float %76, float 1.000000e+00)
  %234 = call float @llvm.AMDGPU.lrp(float %231, float %77, float 1.000000e+00)
  %235 = call float @llvm.AMDGPU.lrp(float %231, float %78, float 1.000000e+00)
  %236 = fmul float %109, %232
  %237 = fmul float %110, %233
  %238 = fmul float %111, %234
  %239 = fmul float %112, %235
  %240 = fmul float %219, %222
  %241 = fmul float %220, %222
  %242 = fmul float %221, %222
  %243 = fmul float %109, %43
  %244 = fmul float %110, %44
  %245 = fadd float %243, %244
  %246 = fmul float %111, %45
  %247 = fadd float %245, %246
  %248 = fmul float %112, %46
  %249 = fadd float %247, %248
  %250 = fadd float %249, %38
  %251 = call float @llvm.AMDIL.clamp.(float %250, float 0.000000e+00, float 1.000000e+00)
  %252 = fmul float %240, %251
  %253 = fadd float %252, %236
  %254 = fmul float %241, %251
  %255 = fadd float %254, %237
  %256 = fmul float %242, %251
  %257 = fadd float %256, %238
  %258 = fcmp uge float %82, %29
  %259 = select i1 %258, float %82, float %29
  %260 = call float @llvm.AMDIL.clamp.(float %259, float 0.000000e+00, float 1.000000e+00)
  %261 = call float @llvm.AMDGPU.lrp(float %260, float %253, float %26)
  %262 = call float @llvm.AMDGPU.lrp(float %260, float %255, float %27)
  %263 = call float @llvm.AMDGPU.lrp(float %260, float %257, float %28)
  %264 = fmul float %12, %22
  %265 = fmul float %97, %23
  %266 = bitcast float %264 to i32
  %267 = bitcast float %265 to i32
  %268 = insertelement <2 x i32> undef, i32 %266, i32 0
  %269 = insertelement <2 x i32> %268, i32 %267, i32 1
  %270 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %269, <32 x i8> %62, <16 x i8> %64, i32 2)
  %271 = extractelement <4 x float> %270, i32 0
  %272 = fmul float %271, %24
  %273 = fadd float %272, %25
  %274 = fdiv float 1.000000e+00, %273
  %275 = fsub float -0.000000e+00, %85
  %276 = fadd float %274, %275
  %277 = fmul float %276, %86
  %278 = call float @llvm.AMDIL.clamp.(float %277, float 0.000000e+00, float 1.000000e+00)
  %279 = fmul float %239, %278
  %280 = fmul float %33, %279
  %281 = fadd float %280, %34
  %282 = fmul float %35, %279
  %283 = fadd float %282, %36
  %284 = fadd float %281, 0xBF847AE140000000
  %285 = fcmp ult float %284, 0.000000e+00
  %286 = select i1 %285, float 1.000000e+00, float 0.000000e+00
  %287 = fsub float -0.000000e+00, %286
  %288 = fptosi float %287 to i32
  %289 = bitcast i32 %288 to float
  %290 = bitcast float %289 to i32
  %291 = icmp ne i32 %290, 0
  br i1 %291, label %IF41, label %ENDIF40

IF41:                                             ; preds = %main_body
  call void @llvm.AMDGPU.kilp()
  br label %ENDIF40

ENDIF40:                                          ; preds = %main_body, %IF41
  %292 = fmul float %264, %31
  %293 = fmul float %265, %32
  %294 = bitcast float %292 to i32
  %295 = bitcast float %293 to i32
  %296 = insertelement <2 x i32> undef, i32 %294, i32 0
  %297 = insertelement <2 x i32> %296, i32 %295, i32 1
  %298 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %297, <32 x i8> %66, <16 x i8> %68, i32 2)
  %299 = extractelement <4 x float> %298, i32 0
  %300 = extractelement <4 x float> %298, i32 1
  %301 = fadd float %299, -5.000000e-01
  %302 = fadd float %300, -5.000000e-01
  %303 = fmul float %30, %279
  %304 = fmul float %301, %303
  %305 = fadd float %304, %264
  %306 = fmul float %302, %303
  %307 = fadd float %306, %265
  %308 = bitcast float %305 to i32
  %309 = bitcast float %307 to i32
  %310 = insertelement <2 x i32> undef, i32 %308, i32 0
  %311 = insertelement <2 x i32> %310, i32 %309, i32 1
  %312 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %311, <32 x i8> %70, <16 x i8> %72, i32 2)
  %313 = extractelement <4 x float> %312, i32 0
  %314 = extractelement <4 x float> %312, i32 1
  %315 = extractelement <4 x float> %312, i32 2
  %316 = extractelement <4 x float> %312, i32 3
  %317 = fmul float %261, %281
  %318 = fmul float %262, %281
  %319 = fmul float %263, %281
  %320 = fmul float %279, %281
  %321 = fmul float %313, %283
  %322 = fadd float %321, %317
  %323 = fmul float %314, %283
  %324 = fadd float %323, %318
  %325 = fmul float %315, %283
  %326 = fadd float %325, %319
  %327 = fmul float %316, %283
  %328 = fadd float %327, %320
  %329 = call i32 @llvm.SI.packf16(float %322, float %324)
  %330 = bitcast i32 %329 to float
  %331 = call i32 @llvm.SI.packf16(float %326, float %328)
  %332 = bitcast i32 %331 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %330, float %332, float %330, float %332)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

declare void @llvm.AMDGPU.kilp()

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180900
c8190901
c8140800
c8150801
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d2060809
02010109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340d00
c8350d01
c8300c00
c8310c01
1014190c
d282000b
042a1b0d
c8280e00
c8290e01
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441300
c8451301
c8401200
c8411201
101a2110
d282000e
04362311
c8341400
c8351401
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541000
c8551001
c8500f00
c8510f01
10222914
d2820012
04462b15
c8441100
c8451101
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0400
c83d0401
10281f0c
c8400500
c8410501
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200600
c8210601
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430b08
bf8c0770
10081d0d
c0840300
c0c60500
bf8c007f
f0800f00
00430705
c0840100
bf8c0070
c2000939
bf8c007f
100a1000
c2000938
bf8c007f
d2820005
04140107
c200093a
bf8c007f
d2820005
04140109
c200093b
bf8c007f
d2820005
0414010a
c2000931
bf8c007f
060a0a00
d206080f
02010105
08201ef2
c8140200
c8150201
d2820005
04420b0f
100a0b09
c200093d
bf8c007f
100c1000
c200093c
bf8c007f
d2820006
04180107
c200093e
bf8c007f
d2820006
04180109
c200093f
bf8c007f
d2820006
0418010a
c2000932
bf8c007f
060c0c00
d2060806
02010106
d2820004
04160d04
c8140700
c8150701
c2000923
bf8c007f
d00c000c
02000105
7e220200
d2000005
00320b11
d2060811
02010105
082422f2
c2000922
bf8c007f
100a2400
d2820004
04160911
100a1d0c
c84c0100
c84d0101
d2820013
0442270f
10262708
d2820005
044e0d05
c2000921
bf8c007f
10262400
d2820005
044e0b11
10161d0b
c8300000
c8310001
d282000c
0442190f
10181907
d2820006
04320d0b
c2000920
bf8c007f
10162400
d2820006
042e0d11
c82c0300
c82d0301
d282000b
0442170f
1012170a
c8280a00
c8290a01
c2000940
c2008941
bf8c007f
7e0e0201
d2820003
041c0103
c2000919
bf8c007f
10100600
c2000918
bf8c007f
100e0400
c086030c
c0c80518
bf8c007f
f0800100
00640207
c200091e
c200891f
bf8c0070
7e060201
d2820002
040c0102
7e045502
08041502
c80c0b00
c80d0b01
10000702
d2060800
02010100
10040109
c200092a
c200892b
bf8c007f
7e000201
d2820000
04020400
c2000928
c2008929
bf8c007f
7e020201
d2820001
04060400
060602ff
bc23d70a
d0020000
02010103
d2000003
0001e480
d2060003
22010103
7e061103
d10a0006
02010103
c0860314
c0c80528
c0800310
c0cc0520
c2020926
c2028925
c2040924
bf8c007f
7e140204
7e120205
7e060208
be842406
8984047e
7e1602f3
7c261680
88fe047e
10161508
10141307
f0800300
00060b0a
bf8c0770
061218f1
10060503
d282000a
04220709
061616f1
d2820009
041e070b
f0800f00
00640709
10040302
bf8c0770
d2820002
040a010a
10060304
d2820003
040e0109
5e040503
10060305
d2820003
040e0108
10020306
d2820000
04060107
5e000700
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..18]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}
  0: MAD TEMP[0].xyz, IN[0].xyzz, CONST[15].xyzz, CONST[14].xyzz
  1: MUL TEMP[1], CONST[8], TEMP[0].xxxx
  2: MAD TEMP[1], CONST[9], TEMP[0].yyyy, TEMP[1]
  3: MAD TEMP[1], CONST[10], TEMP[0].zzzz, TEMP[1]
  4: ADD TEMP[1], TEMP[1], CONST[11]
  5: MUL TEMP[2].xyz, IN[1].xyzz, CONST[18].wwww
  6: MUL TEMP[3], CONST[0], TEMP[2].xxxx
  7: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
  8: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
  9: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
 10: RSQ TEMP[3].x, TEMP[3].xxxx
 11: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 12: MUL TEMP[3].xyz, IN[3].xyzz, CONST[18].wwww
 13: MUL TEMP[4], CONST[0], TEMP[3].xxxx
 14: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 15: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 16: MUL TEMP[4].xyz, TEMP[3].zxyy, TEMP[2].yzxx
 17: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[2].zxyy, -TEMP[4].xyzz
 18: MUL TEMP[5], CONST[0], TEMP[0].xxxx
 19: MAD TEMP[5], CONST[1], TEMP[0].yyyy, TEMP[5]
 20: MAD TEMP[5], CONST[2], TEMP[0].zzzz, TEMP[5]
 21: ADD TEMP[5].xyz, TEMP[5], CONST[3]
 22: ADD TEMP[5].xyz, TEMP[5].xyzz, -CONST[16].xyzz
 23: MAD TEMP[6].x, TEMP[1].zzzz, CONST[17].xxxx, CONST[17].yyyy
 24: MOV TEMP[5].w, TEMP[6].xxxx
 25: MUL TEMP[6], CONST[4], TEMP[0].xxxx
 26: MAD TEMP[6], CONST[5], TEMP[0].yyyy, TEMP[6]
 27: MAD TEMP[0], CONST[6], TEMP[0].zzzz, TEMP[6]
 28: ADD TEMP[0].z, TEMP[0], CONST[7]
 29: MAD TEMP[0].x, TEMP[0].zzzz, CONST[18].xxxx, CONST[18].yyyy
 30: MOV TEMP[0].y, CONST[18].zzzz
 31: MAD TEMP[6].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
 32: MOV TEMP[2].xyz, TEMP[2].xyzx
 33: MOV TEMP[2].w, TEMP[4].xxxx
 34: MOV TEMP[4].xy, TEMP[4].yzyy
 35: MOV TEMP[4].zw, TEMP[3].yyxy
 36: MOV TEMP[3].x, TEMP[3].zzzz
 37: MOV TEMP[6].zw, TEMP[0].yyxy
 38: MOV OUT[6], TEMP[3]
 39: MOV OUT[3], TEMP[6]
 40: MOV OUT[1], CONST[12]
 41: MOV OUT[4], TEMP[2]
 42: MOV OUT[5], TEMP[4]
 43: MOV OUT[2], TEMP[5]
 44: MOV OUT[0], TEMP[1]
 45: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 12)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 28)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 188)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 204)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 216)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 220)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 240)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 244)
  %58 = call float @llvm.SI.load.const(<16 x i8> %10, i32 248)
  %59 = call float @llvm.SI.load.const(<16 x i8> %10, i32 256)
  %60 = call float @llvm.SI.load.const(<16 x i8> %10, i32 260)
  %61 = call float @llvm.SI.load.const(<16 x i8> %10, i32 264)
  %62 = call float @llvm.SI.load.const(<16 x i8> %10, i32 272)
  %63 = call float @llvm.SI.load.const(<16 x i8> %10, i32 276)
  %64 = call float @llvm.SI.load.const(<16 x i8> %10, i32 288)
  %65 = call float @llvm.SI.load.const(<16 x i8> %10, i32 292)
  %66 = call float @llvm.SI.load.const(<16 x i8> %10, i32 296)
  %67 = call float @llvm.SI.load.const(<16 x i8> %10, i32 300)
  %68 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0
  %70 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %5)
  %71 = extractelement <4 x float> %70, i32 0
  %72 = extractelement <4 x float> %70, i32 1
  %73 = extractelement <4 x float> %70, i32 2
  %74 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0
  %76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %5)
  %77 = extractelement <4 x float> %76, i32 0
  %78 = extractelement <4 x float> %76, i32 1
  %79 = extractelement <4 x float> %76, i32 2
  %80 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0
  %82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %5)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = extractelement <4 x float> %82, i32 1
  %85 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %86 = load <16 x i8> addrspace(2)* %85, !tbaa !0
  %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %86, i32 0, i32 %5)
  %88 = extractelement <4 x float> %87, i32 0
  %89 = extractelement <4 x float> %87, i32 1
  %90 = extractelement <4 x float> %87, i32 2
  %91 = fmul float %71, %56
  %92 = fadd float %91, %53
  %93 = fmul float %72, %57
  %94 = fadd float %93, %54
  %95 = fmul float %73, %58
  %96 = fadd float %95, %55
  %97 = fmul float %29, %92
  %98 = fmul float %30, %92
  %99 = fmul float %31, %92
  %100 = fmul float %32, %92
  %101 = fmul float %33, %94
  %102 = fadd float %101, %97
  %103 = fmul float %34, %94
  %104 = fadd float %103, %98
  %105 = fmul float %35, %94
  %106 = fadd float %105, %99
  %107 = fmul float %36, %94
  %108 = fadd float %107, %100
  %109 = fmul float %37, %96
  %110 = fadd float %109, %102
  %111 = fmul float %38, %96
  %112 = fadd float %111, %104
  %113 = fmul float %39, %96
  %114 = fadd float %113, %106
  %115 = fmul float %40, %96
  %116 = fadd float %115, %108
  %117 = fadd float %110, %41
  %118 = fadd float %112, %42
  %119 = fadd float %114, %43
  %120 = fadd float %116, %44
  %121 = fmul float %77, %67
  %122 = fmul float %78, %67
  %123 = fmul float %79, %67
  %124 = fmul float %11, %121
  %125 = fmul float %12, %121
  %126 = fmul float %13, %121
  %127 = fmul float %14, %121
  %128 = fmul float %15, %122
  %129 = fadd float %128, %124
  %130 = fmul float %16, %122
  %131 = fadd float %130, %125
  %132 = fmul float %17, %122
  %133 = fadd float %132, %126
  %134 = fmul float %18, %122
  %135 = fadd float %134, %127
  %136 = fmul float %19, %123
  %137 = fadd float %136, %129
  %138 = fmul float %20, %123
  %139 = fadd float %138, %131
  %140 = fmul float %21, %123
  %141 = fadd float %140, %133
  %142 = fmul float %137, %137
  %143 = fmul float %139, %139
  %144 = fadd float %143, %142
  %145 = fmul float %141, %141
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %137, %147
  %149 = fmul float %139, %147
  %150 = fmul float %141, %147
  %151 = fmul float %88, %67
  %152 = fmul float %89, %67
  %153 = fmul float %90, %67
  %154 = fmul float %11, %151
  %155 = fmul float %12, %151
  %156 = fmul float %13, %151
  %157 = fmul float %15, %152
  %158 = fadd float %157, %154
  %159 = fmul float %16, %152
  %160 = fadd float %159, %155
  %161 = fmul float %17, %152
  %162 = fadd float %161, %156
  %163 = fmul float %19, %153
  %164 = fadd float %163, %158
  %165 = fmul float %20, %153
  %166 = fadd float %165, %160
  %167 = fmul float %21, %153
  %168 = fadd float %167, %162
  %169 = fmul float %168, %149
  %170 = fmul float %164, %150
  %171 = fmul float %166, %148
  %172 = fsub float -0.000000e+00, %169
  %173 = fmul float %166, %150
  %174 = fadd float %173, %172
  %175 = fsub float -0.000000e+00, %170
  %176 = fmul float %168, %148
  %177 = fadd float %176, %175
  %178 = fsub float -0.000000e+00, %171
  %179 = fmul float %164, %149
  %180 = fadd float %179, %178
  %181 = fmul float %11, %92
  %182 = fmul float %12, %92
  %183 = fmul float %13, %92
  %184 = fmul float %15, %94
  %185 = fadd float %184, %181
  %186 = fmul float %16, %94
  %187 = fadd float %186, %182
  %188 = fmul float %17, %94
  %189 = fadd float %188, %183
  %190 = fmul float %19, %96
  %191 = fadd float %190, %185
  %192 = fmul float %20, %96
  %193 = fadd float %192, %187
  %194 = fmul float %21, %96
  %195 = fadd float %194, %189
  %196 = fadd float %191, %22
  %197 = fadd float %193, %23
  %198 = fadd float %195, %24
  %199 = fsub float -0.000000e+00, %59
  %200 = fadd float %196, %199
  %201 = fsub float -0.000000e+00, %60
  %202 = fadd float %197, %201
  %203 = fsub float -0.000000e+00, %61
  %204 = fadd float %198, %203
  %205 = fmul float %119, %62
  %206 = fadd float %205, %63
  %207 = fmul float %25, %92
  %208 = fmul float %26, %94
  %209 = fadd float %208, %207
  %210 = fmul float %27, %96
  %211 = fadd float %210, %209
  %212 = fadd float %211, %28
  %213 = fmul float %212, %64
  %214 = fadd float %213, %65
  %215 = fmul float %83, %49
  %216 = fadd float %215, %51
  %217 = fmul float %84, %50
  %218 = fadd float %217, %52
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %47, float %48)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %200, float %202, float %204, float %206)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %216, float %218, float %214, float %66)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %148, float %149, float %150, float %174)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %177, float %180, float %164, float %166)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %168, float %166, float %168, float %135)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %118, float %119, float %120)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020133
c2028132
c2040131
c2048130
bf8c007f
7e020209
7e040208
7e060205
7e080204
f800020f
04030201
c0840700
bf8c000f
e00c2000
80020300
c202013d
c2028139
bf8c0070
7e020205
d2820001
04040904
c202013c
c2028138
bf8c007f
7e040205
d2820002
04080903
c2020122
bf8c007f
100e0404
c2020126
bf8c007f
d2820007
041e0204
c202013e
c202813a
bf8c007f
7e100205
d2820003
04200905
c202012a
bf8c007f
d2820004
041e0604
c202012e
bf8c007f
06080804
c2020144
c2028145
bf8c007f
7e0a0205
d2820005
04140904
c2020102
bf8c007f
100c0404
c2040106
bf8c007f
d2820006
041a0208
c202810a
bf8c007f
d2820006
041a0605
c204810e
bf8c007f
060c0c09
c2048142
bf8c007f
0a0c0c09
c2048101
bf8c007f
100e0409
c2050105
bf8c007f
d2820007
041e020a
c2058109
bf8c007f
d2820007
041e060b
c206010d
bf8c007f
060e0e0c
c2060141
bf8c007f
0a0e0e0c
c2060100
bf8c007f
1010040c
c2068104
bf8c007f
d2820008
0422020d
c2070108
bf8c007f
d2820008
0422060e
c207810c
bf8c007f
0610100f
c2078140
bf8c007f
0a10100f
f800021f
05060708
c2078112
bf8c000f
100a040f
c2078116
bf8c007f
d2820005
0416020f
c207811a
bf8c007f
d2820005
0416060f
c207811e
bf8c007f
060a0a0f
c2078148
c2080149
bf8c007f
7e0c0210
d2820005
04181f05
c0880708
bf8c007f
e00c2000
80040600
c2078135
c2080137
bf8c0070
7e140210
d282000a
04281f07
c2078134
c2080136
bf8c007f
7e160210
d2820006
042c1f06
c207814a
bf8c007f
7e0e020f
f800022f
07050a06
c0880704
bf8c000f
e00c2000
80040700
c207814b
bf8c0070
100a100f
100c0e0f
10160c09
d282000b
042e0a0a
1012120f
d2820008
042e120b
100e0c0c
d2820007
041e0a0d
d2820007
041e120e
10140f07
d282000a
042a1108
10160c04
d282000b
042e0a08
d282000b
042e1205
d2820009
042a170b
7e125b09
10101308
c088070c
bf8c007f
e00c2000
80040d00
bf8c0770
10141c0f
10181a0f
10001804
d2820000
04021408
101a1e0f
d2820000
04021a05
101e1100
101c130b
10161809
d282000b
042e140a
d282000b
042e1a0b
10201d0b
081e1f10
100e1307
f800023f
0f0e0807
bf8c070f
101e0f0b
1012180c
d2820009
0426140d
d2820009
04261a0e
10101109
08101f08
10141d09
100e0f00
080e1507
f800024f
0b090807
c2020103
bf8c000f
100c0c04
c2020107
bf8c007f
d2820005
041a0a04
f800025f
05000b00
c2020123
bf8c000f
10000404
c2020127
bf8c007f
d2820000
04020204
c202012b
bf8c007f
d2820000
04020604
c202012f
bf8c007f
06000004
c2020121
bf8c007f
100a0404
c2020125
bf8c007f
d2820005
04160204
c2020129
bf8c007f
d2820005
04160604
c202012d
bf8c007f
060a0a04
c2020120
bf8c007f
10040404
c2020124
bf8c007f
d2820001
040a0204
c2020128
bf8c007f
d2820001
04060604
c200012c
bf8c007f
06020200
f80008cf
00040501
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL IN[2], GENERIC[20], PERSPECTIVE
DCL IN[3], GENERIC[21], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL CONST[1..6]
DCL TEMP[0]
DCL TEMP[1..5], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.5000,     0.0010,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: MOV TEMP[1].z, IN[2].xxxx
  2: MOV TEMP[1].xy, IN[1].zwzz
  3: UIF TEMP[0].xxxx :1
  4:   MOV TEMP[2].x, IMM[0].xxxx
  5: ELSE :1
  6:   MOV TEMP[2].x, IMM[0].yyyy
  7: ENDIF
  8: MOV TEMP[3].xy, IN[1].xyyy
  9: TEX TEMP[3], TEMP[3], SAMP[0], 2D
 10: MAD TEMP[3].yw, IMM[0].zzzz, TEMP[3], IMM[0].xxxx
 11: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
 12: RSQ TEMP[4].x, TEMP[4].xxxx
 13: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
 14: DP2 TEMP[4].x, TEMP[3].ywww, TEMP[3].ywww
 15: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
 16: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
 17: RSQ TEMP[5].x, TEMP[4].xxxx
 18: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
 19: CMP TEMP[5].x, -TEMP[4].xxxx, TEMP[5].xxxx, IMM[0].wwww
 20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
 21: DP3 TEMP[4].x, IN[2].yzww, IN[2].yzww
 22: RSQ TEMP[4].x, TEMP[4].xxxx
 23: MUL TEMP[4].xyz, IN[2].yzww, TEMP[4].xxxx
 24: DP3 TEMP[5].x, IN[3].xyzz, IN[3].xyzz
 25: RSQ TEMP[5].x, TEMP[5].xxxx
 26: MUL TEMP[5].xyz, IN[3].xyzz, TEMP[5].xxxx
 27: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[3].wwww
 28: MAD TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
 29: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
 30: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[1].xxxx, IMM[1].xxxx
 31: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 32: F2I TEMP[1].x, -TEMP[1]
 33: UIF TEMP[1].xxxx :1
 34:   MOV TEMP[1].x, IMM[0].yyyy
 35: ELSE :1
 36:   MOV TEMP[1].x, IMM[0].wwww
 37: ENDIF
 38: MOV TEMP[2].z, TEMP[1].xxxx
 39: MUL TEMP[1].x, CONST[1].xxxx, IMM[1].yyyy
 40: MOV TEMP[1].yzw, TEMP[2].yxyz
 41: MOV OUT[0], TEMP[1]
 42: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
  %23 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !0
  %25 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0
  %27 = fcmp ugt float %16, 0.000000e+00
  %28 = select i1 %27, float 1.000000e+00, float 0.000000e+00
  %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %32 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
  %34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %3, <2 x i32> %5)
  %35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %36 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %37 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %38 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %39 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %40 = call float @llvm.AMDIL.clamp.(float %28, float 0.000000e+00, float 1.000000e+00)
  %41 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %42 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %43 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %44 = bitcast float %40 to i32
  %45 = icmp ne i32 %44, 0
  %. = select i1 %45, float -1.000000e+00, float 1.000000e+00
  %46 = bitcast float %29 to i32
  %47 = bitcast float %30 to i32
  %48 = insertelement <2 x i32> undef, i32 %46, i32 0
  %49 = insertelement <2 x i32> %48, i32 %47, i32 1
  %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %24, <16 x i8> %26, i32 2)
  %51 = extractelement <4 x float> %50, i32 1
  %52 = extractelement <4 x float> %50, i32 3
  %53 = fmul float 2.000000e+00, %51
  %54 = fadd float %53, -1.000000e+00
  %55 = fmul float 2.000000e+00, %52
  %56 = fadd float %55, -1.000000e+00
  %57 = fmul float %31, %31
  %58 = fmul float %32, %32
  %59 = fadd float %58, %57
  %60 = fmul float %33, %33
  %61 = fadd float %59, %60
  %62 = call float @llvm.AMDGPU.rsq(float %61)
  %63 = fmul float %31, %62
  %64 = fmul float %32, %62
  %65 = fmul float %33, %62
  %66 = fmul float %54, %54
  %67 = fmul float %56, %56
  %68 = fadd float %66, %67
  %69 = fsub float -0.000000e+00, %68
  %70 = fadd float 1.000000e+00, %69
  %71 = fcmp uge float 0.000000e+00, %70
  %72 = select i1 %71, float 0.000000e+00, float %70
  %73 = call float @llvm.AMDGPU.rsq(float %72)
  %74 = fmul float %73, %72
  %75 = fsub float -0.000000e+00, %72
  %76 = call float @llvm.AMDGPU.cndlt(float %75, float %74, float 0.000000e+00)
  %77 = fmul float %63, %76
  %78 = fmul float %64, %76
  %79 = fmul float %65, %76
  %80 = fmul float %34, %34
  %81 = fmul float %35, %35
  %82 = fadd float %81, %80
  %83 = fmul float %36, %36
  %84 = fadd float %82, %83
  %85 = call float @llvm.AMDGPU.rsq(float %84)
  %86 = fmul float %34, %85
  %87 = fmul float %35, %85
  %88 = fmul float %36, %85
  %89 = fmul float %37, %37
  %90 = fmul float %38, %38
  %91 = fadd float %90, %89
  %92 = fmul float %39, %39
  %93 = fadd float %91, %92
  %94 = call float @llvm.AMDGPU.rsq(float %93)
  %95 = fmul float %37, %94
  %96 = fmul float %38, %94
  %97 = fmul float %39, %94
  %98 = fmul float %95, %56
  %99 = fmul float %96, %56
  %100 = fmul float %97, %56
  %101 = fmul float %86, %54
  %102 = fadd float %101, %98
  %103 = fmul float %87, %54
  %104 = fadd float %103, %99
  %105 = fmul float %88, %54
  %106 = fadd float %105, %100
  %107 = fmul float %77, %.
  %108 = fadd float %107, %102
  %109 = fmul float %78, %.
  %110 = fadd float %109, %104
  %111 = fmul float %79, %.
  %112 = fadd float %111, %106
  %113 = fmul float %108, 5.000000e-01
  %114 = fadd float %113, 5.000000e-01
  %115 = fmul float %110, 5.000000e-01
  %116 = fadd float %115, 5.000000e-01
  %117 = fcmp uge float %112, 0.000000e+00
  %118 = select i1 %117, float 1.000000e+00, float 0.000000e+00
  %119 = fsub float -0.000000e+00, %118
  %120 = fptosi float %119 to i32
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = icmp ne i32 %122, 0
  %temp4.0 = select i1 %123, float 1.000000e+00, float 0.000000e+00
  %124 = fmul float %22, 9.765625e-04
  %125 = call i32 @llvm.SI.packf16(float %124, float %114)
  %126 = bitcast i32 %125 to float
  %127 = call i32 @llvm.SI.packf16(float %116, float %temp4.0)
  %128 = bitcast i32 %127 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %126, float %128, float %126, float %128)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
c0840300
c0c60500
bf8c007f
f0800a00
00430403
bf8c0770
06060904
060606f3
06080b05
060808f3
100a0904
d2820005
04160703
080a0af2
d0060002
02010105
d2000005
00090105
7e0c5b05
100c0b06
d2060005
22010105
d0080002
02020a80
d2000005
000a0c80
c8200300
c8210301
c8180200
c8190201
100e0d06
d2820007
041e1108
c8240400
c8250401
d2820007
041e1309
7e0e5b07
10100f08
10140b08
c8300900
c8310901
c8200800
c8210801
10161108
d282000b
042e190c
c8340a00
c8350a01
d282000b
042e1b0d
7e165b0b
1018170c
101c090c
c8400600
c8410601
c8300500
c8310501
101e190c
d2820011
043e2110
c83c0700
c83d0701
d2820000
04461f0f
7e005b00
10020110
d282000e
043a0701
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
d2820002
043a030a
d2820002
03c1e102
10120f09
10120b09
1014170d
1014090a
101a010f
d282000a
042a070d
d2820009
042a0309
d00c0002
02010109
d2000009
0009e480
d2060009
22010109
7e121109
d10a0002
02010109
d2000009
0009e480
5e041302
100c0f06
100a0b06
100c1708
10080906
1000010c
d2820000
04120700
d2820000
04020305
d2820000
03c1e100
c0800100
bf8c007f
c2000104
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL CONST[0..203]
DCL TEMP[0..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[11].xyzz, CONST[10].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: MOV TEMP[3].w, IMM[0].xxxx
  6: MOV TEMP[3].xyz, IN[5].xyzx
  7: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww
  8: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz
  9: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy
 10: UMUL TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx
 11: UARL ADDR[0].x, TEMP[7].xxxx
 12: MUL TEMP[7], CONST[ADDR[0].x+12], IN[1].xxxx
 13: UARL ADDR[0].x, TEMP[6].xxxx
 14: MAD TEMP[6], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[7]
 15: UARL ADDR[0].x, TEMP[5].xxxx
 16: MAD TEMP[5], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[6]
 17: UARL ADDR[0].x, TEMP[4].xxxx
 18: UARL ADDR[0].x, TEMP[4].xxxx
 19: MAD TEMP[4], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[5]
 20: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 21: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 22: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 23: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 24: UARL ADDR[0].x, TEMP[8].xxxx
 25: MUL TEMP[8], CONST[ADDR[0].x+12], IN[1].xxxx
 26: UARL ADDR[0].x, TEMP[7].xxxx
 27: MAD TEMP[7], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[8]
 28: UARL ADDR[0].x, TEMP[6].xxxx
 29: MAD TEMP[6], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[7]
 30: UARL ADDR[0].x, TEMP[5].xxxx
 31: UARL ADDR[0].x, TEMP[5].xxxx
 32: MAD TEMP[5], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[6]
 33: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 34: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 35: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 36: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 37: UARL ADDR[0].x, TEMP[0].xxxx
 38: MUL TEMP[0], CONST[ADDR[0].x+12], IN[1].xxxx
 39: UARL ADDR[0].x, TEMP[8].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].yyyy, TEMP[0]
 41: UARL ADDR[0].x, TEMP[7].xxxx
 42: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].zzzz, TEMP[0]
 43: UARL ADDR[0].x, TEMP[6].xxxx
 44: UARL ADDR[0].x, TEMP[6].xxxx
 45: MAD TEMP[0], CONST[ADDR[0].x+12], IN[1].wwww, TEMP[0]
 46: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 47: DP4 TEMP[7].x, TEMP[1], TEMP[5]
 48: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 49: DP4 TEMP[8].x, TEMP[2], TEMP[4]
 50: DP4 TEMP[9].x, TEMP[2], TEMP[5]
 51: MOV TEMP[8].y, TEMP[9].xxxx
 52: DP4 TEMP[2].x, TEMP[2], TEMP[0]
 53: MOV TEMP[8].z, TEMP[2].xxxx
 54: DP4 TEMP[2].x, TEMP[3], TEMP[4]
 55: DP4 TEMP[4].x, TEMP[3], TEMP[5]
 56: MOV TEMP[2].y, TEMP[4].xxxx
 57: DP4 TEMP[0].x, TEMP[3], TEMP[0]
 58: MOV TEMP[2].z, TEMP[0].xxxx
 59: MUL TEMP[0], CONST[4], TEMP[6].xxxx
 60: MAD TEMP[0], CONST[5], TEMP[7].xxxx, TEMP[0]
 61: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 62: ADD TEMP[0], TEMP[0], CONST[7]
 63: MUL TEMP[1].xyz, TEMP[8].xyzz, CONST[9].wwww
 64: MUL TEMP[3], CONST[0], TEMP[1].xxxx
 65: MAD TEMP[3], CONST[1], TEMP[1].yyyy, TEMP[3]
 66: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[3]
 67: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
 68: RSQ TEMP[3].x, TEMP[3].xxxx
 69: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
 70: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[9].wwww
 71: MUL TEMP[3], CONST[0], TEMP[2].xxxx
 72: MAD TEMP[3], CONST[1], TEMP[2].yyyy, TEMP[3]
 73: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[3]
 74: MAD TEMP[3].xy, IN[4].xyyy, CONST[8].xyyy, CONST[8].zwww
 75: MOV TEMP[3].zw, TEMP[1].yyxy
 76: MOV TEMP[4].x, TEMP[1].zzzz
 77: MUL TEMP[5].xyz, TEMP[2].zxyy, TEMP[1].yzxx
 78: MAD TEMP[1].xyz, TEMP[2].yzxx, TEMP[1].zxyy, -TEMP[5].xyzz
 79: MOV TEMP[4].yzw, TEMP[1].yxyz
 80: MOV TEMP[1].xyz, TEMP[2].xyzx
 81: MOV OUT[1], TEMP[3]
 82: MOV OUT[3], TEMP[1]
 83: MOV OUT[2], TEMP[4]
 84: MOV OUT[0], TEMP[0]
 85: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %47 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0
  %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %5)
  %50 = extractelement <4 x float> %49, i32 0
  %51 = extractelement <4 x float> %49, i32 1
  %52 = extractelement <4 x float> %49, i32 2
  %53 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0
  %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %5)
  %56 = extractelement <4 x float> %55, i32 0
  %57 = extractelement <4 x float> %55, i32 1
  %58 = extractelement <4 x float> %55, i32 2
  %59 = extractelement <4 x float> %55, i32 3
  %60 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0
  %62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %5)
  %63 = extractelement <4 x float> %62, i32 0
  %64 = extractelement <4 x float> %62, i32 1
  %65 = extractelement <4 x float> %62, i32 2
  %66 = extractelement <4 x float> %62, i32 3
  %67 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
  %69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %68, i32 0, i32 %5)
  %70 = extractelement <4 x float> %69, i32 0
  %71 = extractelement <4 x float> %69, i32 1
  %72 = extractelement <4 x float> %69, i32 2
  %73 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %74 = load <16 x i8> addrspace(2)* %73, !tbaa !0
  %75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %5)
  %76 = extractelement <4 x float> %75, i32 0
  %77 = extractelement <4 x float> %75, i32 1
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = fptosi float %63 to i32
  %85 = fptosi float %64 to i32
  %86 = fptosi float %65 to i32
  %87 = fptosi float %66 to i32
  %88 = bitcast i32 %84 to float
  %89 = bitcast i32 %85 to float
  %90 = bitcast i32 %86 to float
  %91 = bitcast i32 %87 to float
  %92 = fmul float %50, %44
  %93 = fadd float %92, %41
  %94 = fmul float %51, %45
  %95 = fadd float %94, %42
  %96 = fmul float %52, %46
  %97 = fadd float %96, %43
  %98 = bitcast float %91 to i32
  %99 = mul i32 3, %98
  %100 = bitcast i32 %99 to float
  %101 = bitcast float %90 to i32
  %102 = mul i32 3, %101
  %103 = bitcast i32 %102 to float
  %104 = bitcast float %89 to i32
  %105 = mul i32 3, %104
  %106 = bitcast i32 %105 to float
  %107 = bitcast float %88 to i32
  %108 = mul i32 3, %107
  %109 = bitcast i32 %108 to float
  %110 = bitcast float %109 to i32
  %111 = shl i32 %110, 4
  %112 = add i32 %111, 192
  %113 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %112)
  %114 = fmul float %113, %56
  %115 = shl i32 %110, 4
  %116 = add i32 %115, 196
  %117 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %116)
  %118 = fmul float %117, %56
  %119 = shl i32 %110, 4
  %120 = add i32 %119, 200
  %121 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %120)
  %122 = fmul float %121, %56
  %123 = shl i32 %110, 4
  %124 = add i32 %123, 204
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %56
  %127 = bitcast float %106 to i32
  %128 = shl i32 %127, 4
  %129 = add i32 %128, 192
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %57
  %132 = fadd float %131, %114
  %133 = shl i32 %127, 4
  %134 = add i32 %133, 196
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %57
  %137 = fadd float %136, %118
  %138 = shl i32 %127, 4
  %139 = add i32 %138, 200
  %140 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %139)
  %141 = fmul float %140, %57
  %142 = fadd float %141, %122
  %143 = shl i32 %127, 4
  %144 = add i32 %143, 204
  %145 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %144)
  %146 = fmul float %145, %57
  %147 = fadd float %146, %126
  %148 = bitcast float %103 to i32
  %149 = shl i32 %148, 4
  %150 = add i32 %149, 192
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %58
  %153 = fadd float %152, %132
  %154 = shl i32 %148, 4
  %155 = add i32 %154, 196
  %156 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %155)
  %157 = fmul float %156, %58
  %158 = fadd float %157, %137
  %159 = shl i32 %148, 4
  %160 = add i32 %159, 200
  %161 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %160)
  %162 = fmul float %161, %58
  %163 = fadd float %162, %142
  %164 = shl i32 %148, 4
  %165 = add i32 %164, 204
  %166 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %165)
  %167 = fmul float %166, %58
  %168 = fadd float %167, %147
  %169 = bitcast float %100 to i32
  %170 = shl i32 %169, 4
  %171 = add i32 %170, 192
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %59
  %174 = fadd float %173, %153
  %175 = shl i32 %169, 4
  %176 = add i32 %175, 196
  %177 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %176)
  %178 = fmul float %177, %59
  %179 = fadd float %178, %158
  %180 = shl i32 %169, 4
  %181 = add i32 %180, 200
  %182 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %181)
  %183 = fmul float %182, %59
  %184 = fadd float %183, %163
  %185 = shl i32 %169, 4
  %186 = add i32 %185, 204
  %187 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %186)
  %188 = fmul float %187, %59
  %189 = fadd float %188, %168
  %190 = bitcast float %91 to i32
  %191 = mul i32 3, %190
  %192 = add i32 %191, 1
  %193 = bitcast i32 %192 to float
  %194 = bitcast float %90 to i32
  %195 = mul i32 3, %194
  %196 = add i32 %195, 1
  %197 = bitcast i32 %196 to float
  %198 = bitcast float %89 to i32
  %199 = mul i32 3, %198
  %200 = add i32 %199, 1
  %201 = bitcast i32 %200 to float
  %202 = bitcast float %88 to i32
  %203 = mul i32 3, %202
  %204 = add i32 %203, 1
  %205 = bitcast i32 %204 to float
  %206 = bitcast float %205 to i32
  %207 = shl i32 %206, 4
  %208 = add i32 %207, 192
  %209 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %208)
  %210 = fmul float %209, %56
  %211 = shl i32 %206, 4
  %212 = add i32 %211, 196
  %213 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %212)
  %214 = fmul float %213, %56
  %215 = shl i32 %206, 4
  %216 = add i32 %215, 200
  %217 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %216)
  %218 = fmul float %217, %56
  %219 = shl i32 %206, 4
  %220 = add i32 %219, 204
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %56
  %223 = bitcast float %201 to i32
  %224 = shl i32 %223, 4
  %225 = add i32 %224, 192
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %57
  %228 = fadd float %227, %210
  %229 = shl i32 %223, 4
  %230 = add i32 %229, 196
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %57
  %233 = fadd float %232, %214
  %234 = shl i32 %223, 4
  %235 = add i32 %234, 200
  %236 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %235)
  %237 = fmul float %236, %57
  %238 = fadd float %237, %218
  %239 = shl i32 %223, 4
  %240 = add i32 %239, 204
  %241 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %240)
  %242 = fmul float %241, %57
  %243 = fadd float %242, %222
  %244 = bitcast float %197 to i32
  %245 = shl i32 %244, 4
  %246 = add i32 %245, 192
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %58
  %249 = fadd float %248, %228
  %250 = shl i32 %244, 4
  %251 = add i32 %250, 196
  %252 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %251)
  %253 = fmul float %252, %58
  %254 = fadd float %253, %233
  %255 = shl i32 %244, 4
  %256 = add i32 %255, 200
  %257 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %256)
  %258 = fmul float %257, %58
  %259 = fadd float %258, %238
  %260 = shl i32 %244, 4
  %261 = add i32 %260, 204
  %262 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %261)
  %263 = fmul float %262, %58
  %264 = fadd float %263, %243
  %265 = bitcast float %193 to i32
  %266 = shl i32 %265, 4
  %267 = add i32 %266, 192
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %59
  %270 = fadd float %269, %249
  %271 = shl i32 %265, 4
  %272 = add i32 %271, 196
  %273 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %272)
  %274 = fmul float %273, %59
  %275 = fadd float %274, %254
  %276 = shl i32 %265, 4
  %277 = add i32 %276, 200
  %278 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %277)
  %279 = fmul float %278, %59
  %280 = fadd float %279, %259
  %281 = shl i32 %265, 4
  %282 = add i32 %281, 204
  %283 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %282)
  %284 = fmul float %283, %59
  %285 = fadd float %284, %264
  %286 = bitcast float %91 to i32
  %287 = mul i32 3, %286
  %288 = add i32 %287, 2
  %289 = bitcast i32 %288 to float
  %290 = bitcast float %90 to i32
  %291 = mul i32 3, %290
  %292 = add i32 %291, 2
  %293 = bitcast i32 %292 to float
  %294 = bitcast float %89 to i32
  %295 = mul i32 3, %294
  %296 = add i32 %295, 2
  %297 = bitcast i32 %296 to float
  %298 = bitcast float %88 to i32
  %299 = mul i32 3, %298
  %300 = add i32 %299, 2
  %301 = bitcast i32 %300 to float
  %302 = bitcast float %301 to i32
  %303 = shl i32 %302, 4
  %304 = add i32 %303, 192
  %305 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %304)
  %306 = fmul float %305, %56
  %307 = shl i32 %302, 4
  %308 = add i32 %307, 196
  %309 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %308)
  %310 = fmul float %309, %56
  %311 = shl i32 %302, 4
  %312 = add i32 %311, 200
  %313 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %312)
  %314 = fmul float %313, %56
  %315 = shl i32 %302, 4
  %316 = add i32 %315, 204
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = fmul float %317, %56
  %319 = bitcast float %297 to i32
  %320 = shl i32 %319, 4
  %321 = add i32 %320, 192
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %57
  %324 = fadd float %323, %306
  %325 = shl i32 %319, 4
  %326 = add i32 %325, 196
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %57
  %329 = fadd float %328, %310
  %330 = shl i32 %319, 4
  %331 = add i32 %330, 200
  %332 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %331)
  %333 = fmul float %332, %57
  %334 = fadd float %333, %314
  %335 = shl i32 %319, 4
  %336 = add i32 %335, 204
  %337 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %336)
  %338 = fmul float %337, %57
  %339 = fadd float %338, %318
  %340 = bitcast float %293 to i32
  %341 = shl i32 %340, 4
  %342 = add i32 %341, 192
  %343 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %342)
  %344 = fmul float %343, %58
  %345 = fadd float %344, %324
  %346 = shl i32 %340, 4
  %347 = add i32 %346, 196
  %348 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %347)
  %349 = fmul float %348, %58
  %350 = fadd float %349, %329
  %351 = shl i32 %340, 4
  %352 = add i32 %351, 200
  %353 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %352)
  %354 = fmul float %353, %58
  %355 = fadd float %354, %334
  %356 = shl i32 %340, 4
  %357 = add i32 %356, 204
  %358 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %357)
  %359 = fmul float %358, %58
  %360 = fadd float %359, %339
  %361 = bitcast float %289 to i32
  %362 = shl i32 %361, 4
  %363 = add i32 %362, 192
  %364 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %363)
  %365 = fmul float %364, %59
  %366 = fadd float %365, %345
  %367 = shl i32 %361, 4
  %368 = add i32 %367, 196
  %369 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %368)
  %370 = fmul float %369, %59
  %371 = fadd float %370, %350
  %372 = shl i32 %361, 4
  %373 = add i32 %372, 200
  %374 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %373)
  %375 = fmul float %374, %59
  %376 = fadd float %375, %355
  %377 = shl i32 %361, 4
  %378 = add i32 %377, 204
  %379 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %378)
  %380 = fmul float %379, %59
  %381 = fadd float %380, %360
  %382 = fmul float %93, %174
  %383 = fmul float %95, %179
  %384 = fadd float %382, %383
  %385 = fmul float %97, %184
  %386 = fadd float %384, %385
  %387 = fmul float 1.000000e+00, %189
  %388 = fadd float %386, %387
  %389 = fmul float %93, %270
  %390 = fmul float %95, %275
  %391 = fadd float %389, %390
  %392 = fmul float %97, %280
  %393 = fadd float %391, %392
  %394 = fmul float 1.000000e+00, %285
  %395 = fadd float %393, %394
  %396 = fmul float %93, %366
  %397 = fmul float %95, %371
  %398 = fadd float %396, %397
  %399 = fmul float %97, %376
  %400 = fadd float %398, %399
  %401 = fmul float 1.000000e+00, %381
  %402 = fadd float %400, %401
  %403 = fmul float %70, %174
  %404 = fmul float %71, %179
  %405 = fadd float %403, %404
  %406 = fmul float %72, %184
  %407 = fadd float %405, %406
  %408 = fmul float 1.000000e+00, %189
  %409 = fadd float %407, %408
  %410 = fmul float %70, %270
  %411 = fmul float %71, %275
  %412 = fadd float %410, %411
  %413 = fmul float %72, %280
  %414 = fadd float %412, %413
  %415 = fmul float 1.000000e+00, %285
  %416 = fadd float %414, %415
  %417 = fmul float %70, %366
  %418 = fmul float %71, %371
  %419 = fadd float %417, %418
  %420 = fmul float %72, %376
  %421 = fadd float %419, %420
  %422 = fmul float 1.000000e+00, %381
  %423 = fadd float %421, %422
  %424 = fmul float %81, %174
  %425 = fmul float %82, %179
  %426 = fadd float %424, %425
  %427 = fmul float %83, %184
  %428 = fadd float %426, %427
  %429 = fmul float 1.000000e+00, %189
  %430 = fadd float %428, %429
  %431 = fmul float %81, %270
  %432 = fmul float %82, %275
  %433 = fadd float %431, %432
  %434 = fmul float %83, %280
  %435 = fadd float %433, %434
  %436 = fmul float 1.000000e+00, %285
  %437 = fadd float %435, %436
  %438 = fmul float %81, %366
  %439 = fmul float %82, %371
  %440 = fadd float %438, %439
  %441 = fmul float %83, %376
  %442 = fadd float %440, %441
  %443 = fmul float 1.000000e+00, %381
  %444 = fadd float %442, %443
  %445 = fmul float %20, %388
  %446 = fmul float %21, %388
  %447 = fmul float %22, %388
  %448 = fmul float %23, %388
  %449 = fmul float %24, %395
  %450 = fadd float %449, %445
  %451 = fmul float %25, %395
  %452 = fadd float %451, %446
  %453 = fmul float %26, %395
  %454 = fadd float %453, %447
  %455 = fmul float %27, %395
  %456 = fadd float %455, %448
  %457 = fmul float %28, %402
  %458 = fadd float %457, %450
  %459 = fmul float %29, %402
  %460 = fadd float %459, %452
  %461 = fmul float %30, %402
  %462 = fadd float %461, %454
  %463 = fmul float %31, %402
  %464 = fadd float %463, %456
  %465 = fadd float %458, %32
  %466 = fadd float %460, %33
  %467 = fadd float %462, %34
  %468 = fadd float %464, %35
  %469 = fmul float %409, %40
  %470 = fmul float %416, %40
  %471 = fmul float %423, %40
  %472 = fmul float %11, %469
  %473 = fmul float %12, %469
  %474 = fmul float %13, %469
  %475 = fmul float %14, %470
  %476 = fadd float %475, %472
  %477 = fmul float %15, %470
  %478 = fadd float %477, %473
  %479 = fmul float %16, %470
  %480 = fadd float %479, %474
  %481 = fmul float %17, %471
  %482 = fadd float %481, %476
  %483 = fmul float %18, %471
  %484 = fadd float %483, %478
  %485 = fmul float %19, %471
  %486 = fadd float %485, %480
  %487 = fmul float %482, %482
  %488 = fmul float %484, %484
  %489 = fadd float %488, %487
  %490 = fmul float %486, %486
  %491 = fadd float %489, %490
  %492 = call float @llvm.AMDGPU.rsq(float %491)
  %493 = fmul float %482, %492
  %494 = fmul float %484, %492
  %495 = fmul float %486, %492
  %496 = fmul float %430, %40
  %497 = fmul float %437, %40
  %498 = fmul float %444, %40
  %499 = fmul float %11, %496
  %500 = fmul float %12, %496
  %501 = fmul float %13, %496
  %502 = fmul float %14, %497
  %503 = fadd float %502, %499
  %504 = fmul float %15, %497
  %505 = fadd float %504, %500
  %506 = fmul float %16, %497
  %507 = fadd float %506, %501
  %508 = fmul float %17, %498
  %509 = fadd float %508, %503
  %510 = fmul float %18, %498
  %511 = fadd float %510, %505
  %512 = fmul float %19, %498
  %513 = fadd float %512, %507
  %514 = fmul float %76, %36
  %515 = fadd float %514, %38
  %516 = fmul float %77, %37
  %517 = fadd float %516, %39
  %518 = fmul float %513, %494
  %519 = fmul float %509, %495
  %520 = fmul float %511, %493
  %521 = fsub float -0.000000e+00, %518
  %522 = fmul float %511, %495
  %523 = fadd float %522, %521
  %524 = fsub float -0.000000e+00, %519
  %525 = fmul float %513, %493
  %526 = fadd float %525, %524
  %527 = fsub float -0.000000e+00, %520
  %528 = fmul float %509, %494
  %529 = fadd float %528, %527
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %515, float %517, float %493, float %494)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %495, float %523, float %526, float %529)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %509, float %511, float %513, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %465, float %466, float %467, float %468)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020e00
bf8c0770
7e02110e
d2d6000d
02010701
34081a84
4a0208ff
000000c0
c0800100
bf8c007f
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020600
bf8c0770
10020d01
7e04110f
d2d60012
02010702
340a2484
4a040aff
000000c0
e0301000
80000202
bf8c0770
d2820001
04060f02
7e041110
d2d60014
02010702
34142884
4a0414ff
000000c0
e0301000
80000202
bf8c0770
d2820001
04061102
7e041111
d2d60015
02010702
34162a84
4a0416ff
000000c0
e0301000
80000202
bf8c0770
d2820001
04061302
4a0408ff
000000c4
e0301000
80000202
bf8c0770
10040d02
4a060aff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a0f03
4a0614ff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a1103
4a0616ff
000000c4
e0301000
80000303
bf8c0770
d2820002
040a1303
c084070c
bf8c007f
e00c2000
80020e00
bf8c0770
1006050f
d282000c
040e030e
4a0608ff
000000c8
e0301000
80000303
bf8c0770
10060d03
4a260aff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e0f13
4a2614ff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e1113
4a2616ff
000000c8
e0301000
80001313
bf8c0770
d2820003
040e1313
d282000c
04320710
4a0808ff
000000cc
e0301000
80000404
bf8c0770
10080d04
4a0a0aff
000000cc
e0301000
80000505
bf8c0770
d2820004
04120f05
4a0a14ff
000000cc
e0301000
80000505
bf8c0770
d2820004
04121105
4a0a16ff
000000cc
e0301000
80000505
bf8c0770
d2820004
04121305
060a090c
c2020127
bf8c007f
10260a04
c2028101
bf8c007f
102e2605
4a0a1a81
34180a84
4a0a18ff
000000c0
e0301000
80000505
bf8c0770
100a0d05
4a142481
342c1484
4a142cff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
04160f0a
4a142881
34301484
4a1430ff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
0416110a
4a142a81
34321484
4a1432ff
000000c0
e0301000
80000a0a
bf8c0770
d2820005
0416130a
4a1418ff
000000c4
e0301000
80000a0a
bf8c0770
10140d0a
4a162cff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a0f0b
4a1630ff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a110b
4a1632ff
000000c4
e0301000
80000b0b
bf8c0770
d282000a
042a130b
1016150f
d282001a
042e0b0e
4a1618ff
000000c8
e0301000
80000b0b
bf8c0770
10160d0b
4a362cff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e0f1b
4a3630ff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e111b
4a3632ff
000000c8
e0301000
80001b1b
bf8c0770
d282000b
042e131b
d282001a
046a1710
4a1818ff
000000cc
e0301000
80000c0c
bf8c0770
10180d0c
4a2c2cff
000000cc
e0301000
80001616
bf8c0770
d282000c
04320f16
4a2c30ff
000000cc
e0301000
80001616
bf8c0770
d282000c
04321116
4a2c32ff
000000cc
e0301000
80001616
bf8c0770
d282000c
04321316
062c191a
102c2c04
c2040105
bf8c007f
d2820017
045e2c08
4a1a1a82
34301a84
4a1a30ff
000000c0
e0301000
80000d0d
bf8c0770
101a0d0d
4a242482
34322484
4a2432ff
000000c0
e0301000
80001212
bf8c0770
d282000d
04360f12
4a242882
34342484
4a2434ff
000000c0
e0301000
80001212
bf8c0770
d282000d
04361112
4a242a82
342a2484
4a242aff
000000c0
e0301000
80001212
bf8c0770
d282000d
04361312
4a2430ff
000000c4
e0301000
80001212
bf8c0770
10240d12
4a2832ff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a0f14
4a2834ff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a1114
4a282aff
000000c4
e0301000
80001414
bf8c0770
d2820012
044a1314
1028250f
d282001b
04521b0e
4a2830ff
000000c8
e0301000
80001414
bf8c0770
10280d14
4a3832ff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
04520f1c
4a3834ff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
0452111c
4a382aff
000000c8
e0301000
80001c1c
bf8c0770
d2820014
0452131c
d282000e
046e2910
4a1e30ff
000000cc
e0301000
80000f0f
bf8c0770
101e0d0f
4a2032ff
000000cc
e0301000
80001010
bf8c0770
d282000f
043e0f10
4a2034ff
000000cc
e0301000
80001010
bf8c0770
d282000f
043e1110
4a202aff
000000cc
e0301000
80001010
bf8c0770
d2820006
043e1310
060e0d0e
10100e04
c2048109
bf8c007f
d2820007
045e1009
c2050100
bf8c007f
1012260a
c2058104
bf8c007f
d2820009
04262c0b
c2068108
bf8c007f
d282000f
0426100d
10121f0f
d282000e
04260f07
c2060102
bf8c007f
1012260c
c2070106
bf8c007f
d2820009
04262c0e
c207810a
bf8c007f
d2820009
0426100f
d2820008
043a1309
7e1c5b08
100e1d07
10101d0f
c0880710
bf8c007f
e00c2000
80041500
c2080121
c2088123
bf8c0070
7e1e0211
d282000f
043c2116
c2080120
c2088122
bf8c007f
7e200211
d2820010
04402115
f800020f
07080f10
c0880714
bf8c000f
e00c2000
80041500
bf8c0770
101e0516
d282000f
043e0315
d282000f
043e0717
061e090f
101e1e04
10201e05
10221516
d2820011
04460b15
d2820011
04461717
06221911
10222204
d2820010
04422208
10262516
d2820013
044e1b15
d2820013
044e2917
06260d13
102a2604
d2820010
04422a09
102c1110
10261e0a
d2820013
044e220b
d2820013
044e2a0d
102e0f13
082c2d17
10121d09
102e1313
101c1e0c
d282000e
043a220e
d282000e
043a2a0f
1010110e
08102f08
100e0f0e
101e1310
080e0f0f
f800021f
16080709
bf8c070f
7e0e02f2
f800022f
070e1013
c0820700
bf8c000f
e00c2000
80010e00
c202012d
c2028129
bf8c0070
7e000205
d2820007
0400090f
10000507
c202012c
c2028128
bf8c007f
7e040205
d2820002
0408090e
d2820000
04020302
c202012e
c202812a
bf8c007f
7e020205
d2820008
04040910
d2820000
04020708
06000900
c2020113
bf8c007f
10060004
10021507
d2820001
04060b02
d2820001
04061708
06021901
c2020117
bf8c007f
d2820003
040e0204
10082507
d2820002
04121b02
d2820002
040a2908
06040d02
c202011b
bf8c007f
d2820003
040e0404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080004
c2020116
bf8c007f
d2820004
04120204
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0004
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10000004
c2020114
bf8c007f
d2820000
04020204
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], FACE, CONSTANT
DCL IN[1], GENERIC[19], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..5]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     0.5000,     0.0000}
IMM[1] FLT32 {    0.0010,     0.0000,     0.0000,     0.0000}
  0: MOV_SAT TEMP[0], IN[0]
  1: UIF TEMP[0].xxxx :1
  2:   MOV TEMP[1].x, IMM[0].xxxx
  3: ELSE :1
  4:   MOV TEMP[1].x, IMM[0].yyyy
  5: ENDIF
  6: DP3 TEMP[2].x, IN[1].xyzz, IN[1].xyzz
  7: RSQ TEMP[2].x, TEMP[2].xxxx
  8: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
  9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
 10: MAD TEMP[2].xy, TEMP[1].xyyy, IMM[0].zzzz, IMM[0].zzzz
 11: SGE TEMP[1].x, TEMP[1].zzzz, IMM[0].wwww
 12: F2I TEMP[1].x, -TEMP[1]
 13: UIF TEMP[1].xxxx :1
 14:   MOV TEMP[1].x, IMM[0].yyyy
 15: ELSE :1
 16:   MOV TEMP[1].x, IMM[0].wwww
 17: ENDIF
 18: MOV TEMP[2].z, TEMP[1].xxxx
 19: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
 20: MOV TEMP[1].yzw, TEMP[2].yxyz
 21: MOV OUT[0], TEMP[1]
 22: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
  %23 = fcmp ugt float %16, 0.000000e+00
  %24 = select i1 %23, float 1.000000e+00, float 0.000000e+00
  %25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %28 = call float @llvm.AMDIL.clamp.(float %24, float 0.000000e+00, float 1.000000e+00)
  %29 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %30 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %32 = bitcast float %28 to i32
  %33 = icmp ne i32 %32, 0
  %. = select i1 %33, float -1.000000e+00, float 1.000000e+00
  %34 = fmul float %25, %25
  %35 = fmul float %26, %26
  %36 = fadd float %35, %34
  %37 = fmul float %27, %27
  %38 = fadd float %36, %37
  %39 = call float @llvm.AMDGPU.rsq(float %38)
  %40 = fmul float %25, %39
  %41 = fmul float %26, %39
  %42 = fmul float %27, %39
  %43 = fmul float %40, %.
  %44 = fmul float %41, %.
  %45 = fmul float %42, %.
  %46 = fmul float %43, 5.000000e-01
  %47 = fadd float %46, 5.000000e-01
  %48 = fmul float %44, 5.000000e-01
  %49 = fadd float %48, 5.000000e-01
  %50 = fcmp uge float %45, 0.000000e+00
  %51 = select i1 %50, float 1.000000e+00, float 0.000000e+00
  %52 = fsub float -0.000000e+00, %51
  %53 = fptosi float %52 to i32
  %54 = bitcast i32 %53 to float
  %55 = bitcast float %54 to i32
  %56 = icmp ne i32 %55, 0
  %temp4.1 = select i1 %56, float 1.000000e+00, float 0.000000e+00
  %57 = fmul float %22, 9.765625e-04
  %58 = call i32 @llvm.SI.packf16(float %57, float %47)
  %59 = bitcast i32 %58 to float
  %60 = call i32 @llvm.SI.packf16(float %49, float %temp4.1)
  %61 = bitcast i32 %60 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8100100
c8110101
c80c0000
c80d0001
100a0703
d2820006
04160904
c8140200
c8150201
d2820000
041a0b05
7e005b00
10080104
d0080002
02010102
d2000001
0009e480
d2060801
02010101
d10a0002
02010101
d2000001
0009e6f2
10040304
d2820002
03c1e102
10080105
10080304
d00c0002
02010104
d2000004
0009e480
d2060004
22010104
7e081104
d10a0002
02010104
d2000004
0009e480
5e040902
10000103
10000300
d2820000
03c1e100
c0800100
bf8c007f
c2000100
7e0202ff
3a800000
bf8c007f
10020200
5e000101
f8001c0f
02000200
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL CONST[0..202]
DCL TEMP[0..7], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[10].xyzz, CONST[9].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww
  6: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz
  7: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy
  8: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx
  9: UARL ADDR[0].x, TEMP[6].xxxx
 10: MUL TEMP[6], CONST[ADDR[0].x+11], IN[1].xxxx
 11: UARL ADDR[0].x, TEMP[5].xxxx
 12: MAD TEMP[5], CONST[ADDR[0].x+11], IN[1].yyyy, TEMP[6]
 13: UARL ADDR[0].x, TEMP[4].xxxx
 14: MAD TEMP[4], CONST[ADDR[0].x+11], IN[1].zzzz, TEMP[5]
 15: UARL ADDR[0].x, TEMP[3].xxxx
 16: UARL ADDR[0].x, TEMP[3].xxxx
 17: MAD TEMP[3], CONST[ADDR[0].x+11], IN[1].wwww, TEMP[4]
 18: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 19: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 20: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 21: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 22: UARL ADDR[0].x, TEMP[7].xxxx
 23: MUL TEMP[7], CONST[ADDR[0].x+11], IN[1].xxxx
 24: UARL ADDR[0].x, TEMP[6].xxxx
 25: MAD TEMP[6], CONST[ADDR[0].x+11], IN[1].yyyy, TEMP[7]
 26: UARL ADDR[0].x, TEMP[5].xxxx
 27: MAD TEMP[5], CONST[ADDR[0].x+11], IN[1].zzzz, TEMP[6]
 28: UARL ADDR[0].x, TEMP[4].xxxx
 29: UARL ADDR[0].x, TEMP[4].xxxx
 30: MAD TEMP[4], CONST[ADDR[0].x+11], IN[1].wwww, TEMP[5]
 31: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 32: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 33: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 34: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 35: UARL ADDR[0].x, TEMP[0].xxxx
 36: MUL TEMP[0], CONST[ADDR[0].x+11], IN[1].xxxx
 37: UARL ADDR[0].x, TEMP[7].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+11], IN[1].yyyy, TEMP[0]
 39: UARL ADDR[0].x, TEMP[6].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+11], IN[1].zzzz, TEMP[0]
 41: UARL ADDR[0].x, TEMP[5].xxxx
 42: UARL ADDR[0].x, TEMP[5].xxxx
 43: MAD TEMP[0], CONST[ADDR[0].x+11], IN[1].wwww, TEMP[0]
 44: DP4 TEMP[5].x, TEMP[1], TEMP[3]
 45: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 46: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 47: DP4 TEMP[3].x, TEMP[2], TEMP[3]
 48: DP4 TEMP[4].x, TEMP[2], TEMP[4]
 49: MOV TEMP[3].y, TEMP[4].xxxx
 50: DP4 TEMP[0].x, TEMP[2], TEMP[0]
 51: MOV TEMP[3].z, TEMP[0].xxxx
 52: MUL TEMP[0], CONST[4], TEMP[5].xxxx
 53: MAD TEMP[0], CONST[5], TEMP[6].xxxx, TEMP[0]
 54: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 55: ADD TEMP[0], TEMP[0], CONST[7]
 56: MUL TEMP[1].xyz, TEMP[3].xyzz, CONST[8].wwww
 57: MUL TEMP[2], CONST[0], TEMP[1].xxxx
 58: MAD TEMP[2], CONST[1], TEMP[1].yyyy, TEMP[2]
 59: MAD TEMP[1].xyz, CONST[2], TEMP[1].zzzz, TEMP[2]
 60: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
 61: RSQ TEMP[2].x, TEMP[2].xxxx
 62: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
 63: MOV OUT[1], TEMP[1]
 64: MOV OUT[0], TEMP[0]
 65: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 140)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 160)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 164)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 168)
  %43 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0
  %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %5)
  %46 = extractelement <4 x float> %45, i32 0
  %47 = extractelement <4 x float> %45, i32 1
  %48 = extractelement <4 x float> %45, i32 2
  %49 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %50 = load <16 x i8> addrspace(2)* %49, !tbaa !0
  %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %5)
  %52 = extractelement <4 x float> %51, i32 0
  %53 = extractelement <4 x float> %51, i32 1
  %54 = extractelement <4 x float> %51, i32 2
  %55 = extractelement <4 x float> %51, i32 3
  %56 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0
  %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %5)
  %59 = extractelement <4 x float> %58, i32 0
  %60 = extractelement <4 x float> %58, i32 1
  %61 = extractelement <4 x float> %58, i32 2
  %62 = extractelement <4 x float> %58, i32 3
  %63 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
  %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %5)
  %66 = extractelement <4 x float> %65, i32 0
  %67 = extractelement <4 x float> %65, i32 1
  %68 = extractelement <4 x float> %65, i32 2
  %69 = fptosi float %59 to i32
  %70 = fptosi float %60 to i32
  %71 = fptosi float %61 to i32
  %72 = fptosi float %62 to i32
  %73 = bitcast i32 %69 to float
  %74 = bitcast i32 %70 to float
  %75 = bitcast i32 %71 to float
  %76 = bitcast i32 %72 to float
  %77 = fmul float %46, %40
  %78 = fadd float %77, %37
  %79 = fmul float %47, %41
  %80 = fadd float %79, %38
  %81 = fmul float %48, %42
  %82 = fadd float %81, %39
  %83 = bitcast float %76 to i32
  %84 = mul i32 3, %83
  %85 = bitcast i32 %84 to float
  %86 = bitcast float %75 to i32
  %87 = mul i32 3, %86
  %88 = bitcast i32 %87 to float
  %89 = bitcast float %74 to i32
  %90 = mul i32 3, %89
  %91 = bitcast i32 %90 to float
  %92 = bitcast float %73 to i32
  %93 = mul i32 3, %92
  %94 = bitcast i32 %93 to float
  %95 = bitcast float %94 to i32
  %96 = shl i32 %95, 4
  %97 = add i32 %96, 176
  %98 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %97)
  %99 = fmul float %98, %52
  %100 = shl i32 %95, 4
  %101 = add i32 %100, 180
  %102 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %101)
  %103 = fmul float %102, %52
  %104 = shl i32 %95, 4
  %105 = add i32 %104, 184
  %106 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %105)
  %107 = fmul float %106, %52
  %108 = shl i32 %95, 4
  %109 = add i32 %108, 188
  %110 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %109)
  %111 = fmul float %110, %52
  %112 = bitcast float %91 to i32
  %113 = shl i32 %112, 4
  %114 = add i32 %113, 176
  %115 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %114)
  %116 = fmul float %115, %53
  %117 = fadd float %116, %99
  %118 = shl i32 %112, 4
  %119 = add i32 %118, 180
  %120 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %119)
  %121 = fmul float %120, %53
  %122 = fadd float %121, %103
  %123 = shl i32 %112, 4
  %124 = add i32 %123, 184
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %53
  %127 = fadd float %126, %107
  %128 = shl i32 %112, 4
  %129 = add i32 %128, 188
  %130 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %129)
  %131 = fmul float %130, %53
  %132 = fadd float %131, %111
  %133 = bitcast float %88 to i32
  %134 = shl i32 %133, 4
  %135 = add i32 %134, 176
  %136 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %135)
  %137 = fmul float %136, %54
  %138 = fadd float %137, %117
  %139 = shl i32 %133, 4
  %140 = add i32 %139, 180
  %141 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %140)
  %142 = fmul float %141, %54
  %143 = fadd float %142, %122
  %144 = shl i32 %133, 4
  %145 = add i32 %144, 184
  %146 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %145)
  %147 = fmul float %146, %54
  %148 = fadd float %147, %127
  %149 = shl i32 %133, 4
  %150 = add i32 %149, 188
  %151 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %150)
  %152 = fmul float %151, %54
  %153 = fadd float %152, %132
  %154 = bitcast float %85 to i32
  %155 = shl i32 %154, 4
  %156 = add i32 %155, 176
  %157 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %156)
  %158 = fmul float %157, %55
  %159 = fadd float %158, %138
  %160 = shl i32 %154, 4
  %161 = add i32 %160, 180
  %162 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %161)
  %163 = fmul float %162, %55
  %164 = fadd float %163, %143
  %165 = shl i32 %154, 4
  %166 = add i32 %165, 184
  %167 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %166)
  %168 = fmul float %167, %55
  %169 = fadd float %168, %148
  %170 = shl i32 %154, 4
  %171 = add i32 %170, 188
  %172 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %171)
  %173 = fmul float %172, %55
  %174 = fadd float %173, %153
  %175 = bitcast float %76 to i32
  %176 = mul i32 3, %175
  %177 = add i32 %176, 1
  %178 = bitcast i32 %177 to float
  %179 = bitcast float %75 to i32
  %180 = mul i32 3, %179
  %181 = add i32 %180, 1
  %182 = bitcast i32 %181 to float
  %183 = bitcast float %74 to i32
  %184 = mul i32 3, %183
  %185 = add i32 %184, 1
  %186 = bitcast i32 %185 to float
  %187 = bitcast float %73 to i32
  %188 = mul i32 3, %187
  %189 = add i32 %188, 1
  %190 = bitcast i32 %189 to float
  %191 = bitcast float %190 to i32
  %192 = shl i32 %191, 4
  %193 = add i32 %192, 176
  %194 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %193)
  %195 = fmul float %194, %52
  %196 = shl i32 %191, 4
  %197 = add i32 %196, 180
  %198 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %197)
  %199 = fmul float %198, %52
  %200 = shl i32 %191, 4
  %201 = add i32 %200, 184
  %202 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %201)
  %203 = fmul float %202, %52
  %204 = shl i32 %191, 4
  %205 = add i32 %204, 188
  %206 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %205)
  %207 = fmul float %206, %52
  %208 = bitcast float %186 to i32
  %209 = shl i32 %208, 4
  %210 = add i32 %209, 176
  %211 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %210)
  %212 = fmul float %211, %53
  %213 = fadd float %212, %195
  %214 = shl i32 %208, 4
  %215 = add i32 %214, 180
  %216 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %215)
  %217 = fmul float %216, %53
  %218 = fadd float %217, %199
  %219 = shl i32 %208, 4
  %220 = add i32 %219, 184
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %53
  %223 = fadd float %222, %203
  %224 = shl i32 %208, 4
  %225 = add i32 %224, 188
  %226 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %225)
  %227 = fmul float %226, %53
  %228 = fadd float %227, %207
  %229 = bitcast float %182 to i32
  %230 = shl i32 %229, 4
  %231 = add i32 %230, 176
  %232 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %231)
  %233 = fmul float %232, %54
  %234 = fadd float %233, %213
  %235 = shl i32 %229, 4
  %236 = add i32 %235, 180
  %237 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %236)
  %238 = fmul float %237, %54
  %239 = fadd float %238, %218
  %240 = shl i32 %229, 4
  %241 = add i32 %240, 184
  %242 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %241)
  %243 = fmul float %242, %54
  %244 = fadd float %243, %223
  %245 = shl i32 %229, 4
  %246 = add i32 %245, 188
  %247 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %246)
  %248 = fmul float %247, %54
  %249 = fadd float %248, %228
  %250 = bitcast float %178 to i32
  %251 = shl i32 %250, 4
  %252 = add i32 %251, 176
  %253 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %252)
  %254 = fmul float %253, %55
  %255 = fadd float %254, %234
  %256 = shl i32 %250, 4
  %257 = add i32 %256, 180
  %258 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %257)
  %259 = fmul float %258, %55
  %260 = fadd float %259, %239
  %261 = shl i32 %250, 4
  %262 = add i32 %261, 184
  %263 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %262)
  %264 = fmul float %263, %55
  %265 = fadd float %264, %244
  %266 = shl i32 %250, 4
  %267 = add i32 %266, 188
  %268 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %267)
  %269 = fmul float %268, %55
  %270 = fadd float %269, %249
  %271 = bitcast float %76 to i32
  %272 = mul i32 3, %271
  %273 = add i32 %272, 2
  %274 = bitcast i32 %273 to float
  %275 = bitcast float %75 to i32
  %276 = mul i32 3, %275
  %277 = add i32 %276, 2
  %278 = bitcast i32 %277 to float
  %279 = bitcast float %74 to i32
  %280 = mul i32 3, %279
  %281 = add i32 %280, 2
  %282 = bitcast i32 %281 to float
  %283 = bitcast float %73 to i32
  %284 = mul i32 3, %283
  %285 = add i32 %284, 2
  %286 = bitcast i32 %285 to float
  %287 = bitcast float %286 to i32
  %288 = shl i32 %287, 4
  %289 = add i32 %288, 176
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = fmul float %290, %52
  %292 = shl i32 %287, 4
  %293 = add i32 %292, 180
  %294 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %293)
  %295 = fmul float %294, %52
  %296 = shl i32 %287, 4
  %297 = add i32 %296, 184
  %298 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %297)
  %299 = fmul float %298, %52
  %300 = shl i32 %287, 4
  %301 = add i32 %300, 188
  %302 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %301)
  %303 = fmul float %302, %52
  %304 = bitcast float %282 to i32
  %305 = shl i32 %304, 4
  %306 = add i32 %305, 176
  %307 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %306)
  %308 = fmul float %307, %53
  %309 = fadd float %308, %291
  %310 = shl i32 %304, 4
  %311 = add i32 %310, 180
  %312 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %311)
  %313 = fmul float %312, %53
  %314 = fadd float %313, %295
  %315 = shl i32 %304, 4
  %316 = add i32 %315, 184
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = fmul float %317, %53
  %319 = fadd float %318, %299
  %320 = shl i32 %304, 4
  %321 = add i32 %320, 188
  %322 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %321)
  %323 = fmul float %322, %53
  %324 = fadd float %323, %303
  %325 = bitcast float %278 to i32
  %326 = shl i32 %325, 4
  %327 = add i32 %326, 176
  %328 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %327)
  %329 = fmul float %328, %54
  %330 = fadd float %329, %309
  %331 = shl i32 %325, 4
  %332 = add i32 %331, 180
  %333 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %332)
  %334 = fmul float %333, %54
  %335 = fadd float %334, %314
  %336 = shl i32 %325, 4
  %337 = add i32 %336, 184
  %338 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %337)
  %339 = fmul float %338, %54
  %340 = fadd float %339, %319
  %341 = shl i32 %325, 4
  %342 = add i32 %341, 188
  %343 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %342)
  %344 = fmul float %343, %54
  %345 = fadd float %344, %324
  %346 = bitcast float %274 to i32
  %347 = shl i32 %346, 4
  %348 = add i32 %347, 176
  %349 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %348)
  %350 = fmul float %349, %55
  %351 = fadd float %350, %330
  %352 = shl i32 %346, 4
  %353 = add i32 %352, 180
  %354 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %353)
  %355 = fmul float %354, %55
  %356 = fadd float %355, %335
  %357 = shl i32 %346, 4
  %358 = add i32 %357, 184
  %359 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %358)
  %360 = fmul float %359, %55
  %361 = fadd float %360, %340
  %362 = shl i32 %346, 4
  %363 = add i32 %362, 188
  %364 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %363)
  %365 = fmul float %364, %55
  %366 = fadd float %365, %345
  %367 = fmul float %78, %159
  %368 = fmul float %80, %164
  %369 = fadd float %367, %368
  %370 = fmul float %82, %169
  %371 = fadd float %369, %370
  %372 = fmul float 1.000000e+00, %174
  %373 = fadd float %371, %372
  %374 = fmul float %78, %255
  %375 = fmul float %80, %260
  %376 = fadd float %374, %375
  %377 = fmul float %82, %265
  %378 = fadd float %376, %377
  %379 = fmul float 1.000000e+00, %270
  %380 = fadd float %378, %379
  %381 = fmul float %78, %351
  %382 = fmul float %80, %356
  %383 = fadd float %381, %382
  %384 = fmul float %82, %361
  %385 = fadd float %383, %384
  %386 = fmul float 1.000000e+00, %366
  %387 = fadd float %385, %386
  %388 = fmul float %66, %159
  %389 = fmul float %67, %164
  %390 = fadd float %388, %389
  %391 = fmul float %68, %169
  %392 = fadd float %390, %391
  %393 = fmul float 1.000000e+00, %174
  %394 = fadd float %392, %393
  %395 = fmul float %66, %255
  %396 = fmul float %67, %260
  %397 = fadd float %395, %396
  %398 = fmul float %68, %265
  %399 = fadd float %397, %398
  %400 = fmul float 1.000000e+00, %270
  %401 = fadd float %399, %400
  %402 = fmul float %66, %351
  %403 = fmul float %67, %356
  %404 = fadd float %402, %403
  %405 = fmul float %68, %361
  %406 = fadd float %404, %405
  %407 = fmul float 1.000000e+00, %366
  %408 = fadd float %406, %407
  %409 = fmul float %20, %373
  %410 = fmul float %21, %373
  %411 = fmul float %22, %373
  %412 = fmul float %23, %373
  %413 = fmul float %24, %380
  %414 = fadd float %413, %409
  %415 = fmul float %25, %380
  %416 = fadd float %415, %410
  %417 = fmul float %26, %380
  %418 = fadd float %417, %411
  %419 = fmul float %27, %380
  %420 = fadd float %419, %412
  %421 = fmul float %28, %387
  %422 = fadd float %421, %414
  %423 = fmul float %29, %387
  %424 = fadd float %423, %416
  %425 = fmul float %30, %387
  %426 = fadd float %425, %418
  %427 = fmul float %31, %387
  %428 = fadd float %427, %420
  %429 = fadd float %422, %32
  %430 = fadd float %424, %33
  %431 = fadd float %426, %34
  %432 = fadd float %428, %35
  %433 = fmul float %394, %36
  %434 = fmul float %401, %36
  %435 = fmul float %408, %36
  %436 = fmul float %11, %433
  %437 = fmul float %12, %433
  %438 = fmul float %13, %433
  %439 = fmul float %14, %434
  %440 = fadd float %439, %436
  %441 = fmul float %15, %434
  %442 = fadd float %441, %437
  %443 = fmul float %16, %434
  %444 = fadd float %443, %438
  %445 = fmul float %17, %435
  %446 = fadd float %445, %440
  %447 = fmul float %18, %435
  %448 = fadd float %447, %442
  %449 = fmul float %19, %435
  %450 = fadd float %449, %444
  %451 = fmul float %446, %446
  %452 = fmul float %448, %448
  %453 = fadd float %452, %451
  %454 = fmul float %450, %450
  %455 = fadd float %453, %454
  %456 = call float @llvm.AMDGPU.rsq(float %455)
  %457 = fmul float %446, %456
  %458 = fmul float %448, %456
  %459 = fmul float %450, %456
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %457, float %458, float %459, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %429, float %430, float %431, float %432)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0840708
bf8c007f
e00c2000
80020b00
bf8c0770
7e02110b
d2d6000a
02010701
34101484
4a0210ff
000000b0
c0800100
bf8c007f
e0301000
80000101
c0840704
bf8c0070
e00c2000
80020200
bf8c0770
10020501
7e0c110c
d2d6000f
02010706
34121e84
4a0c12ff
000000b0
e0301000
80000606
bf8c0770
d2820001
04060706
7e0c110d
d2d60012
02010706
34202484
4a0c20ff
000000b0
e0301000
80000606
bf8c0770
d2820001
04060906
7e0c110e
d2d60013
02010706
34222684
4a0c22ff
000000b0
e0301000
80000606
bf8c0770
d2820001
04060b06
4a0c10ff
000000b4
e0301000
80000606
bf8c0770
100c0506
4a0e12ff
000000b4
e0301000
80000707
bf8c0770
d2820006
041a0707
4a0e20ff
000000b4
e0301000
80000707
bf8c0770
d2820006
041a0907
4a0e22ff
000000b4
e0301000
80000707
bf8c0770
d2820006
041a0b07
c084070c
bf8c007f
e00c2000
80020b00
bf8c0770
100e0d0c
d2820014
041e030b
4a0e10ff
000000b8
e0301000
80000707
bf8c0770
100e0507
4a2a12ff
000000b8
e0301000
80001515
bf8c0770
d2820007
041e0715
4a2a20ff
000000b8
e0301000
80001515
bf8c0770
d2820007
041e0915
4a2a22ff
000000b8
e0301000
80001515
bf8c0770
d2820007
041e0b15
d2820014
04520f0d
4a1010ff
000000bc
e0301000
80000808
bf8c0770
10100508
4a1212ff
000000bc
e0301000
80000909
bf8c0770
d2820008
04220709
4a1220ff
000000bc
e0301000
80000909
bf8c0770
d2820008
04220909
4a1222ff
000000bc
e0301000
80000909
bf8c0770
d2820008
04220b09
06121114
c2020123
bf8c007f
10221204
c2028101
bf8c007f
102e2205
4a121481
342a1284
4a122aff
000000b0
e0301000
80000909
bf8c0770
10120509
4a201e81
342c2084
4a202cff
000000b0
e0301000
80001010
bf8c0770
d2820009
04260710
4a202481
34302084
4a2030ff
000000b0
e0301000
80001010
bf8c0770
d2820009
04260910
4a202681
34322084
4a2032ff
000000b0
e0301000
80001010
bf8c0770
d2820009
04260b10
4a202aff
000000b4
e0301000
80001010
bf8c0770
10200510
4a282cff
000000b4
e0301000
80001414
bf8c0770
d2820010
04420714
4a2830ff
000000b4
e0301000
80001414
bf8c0770
d2820010
04420914
4a2832ff
000000b4
e0301000
80001414
bf8c0770
d2820010
04420b14
1028210c
d282001a
0452130b
4a282aff
000000b8
e0301000
80001414
bf8c0770
10280514
4a362cff
000000b8
e0301000
80001b1b
bf8c0770
d2820014
0452071b
4a3630ff
000000b8
e0301000
80001b1b
bf8c0770
d2820014
0452091b
4a3632ff
000000b8
e0301000
80001b1b
bf8c0770
d2820014
04520b1b
d282001a
046a290d
4a2a2aff
000000bc
e0301000
80001515
bf8c0770
102a0515
4a2c2cff
000000bc
e0301000
80001616
bf8c0770
d2820015
04560716
4a2c30ff
000000bc
e0301000
80001616
bf8c0770
d2820015
04560916
4a2c32ff
000000bc
e0301000
80001616
bf8c0770
d2820015
04560b16
062c2b1a
102c2c04
c2028105
bf8c007f
d2820017
045e2c05
4a141482
34301484
4a1430ff
000000b0
e0301000
80000a0a
bf8c0770
1014050a
4a1e1e82
34321e84
4a1e32ff
000000b0
e0301000
80000f0f
bf8c0770
d282000a
042a070f
4a1e2482
34341e84
4a1e34ff
000000b0
e0301000
80000f0f
bf8c0770
d282000a
042a090f
4a1e2682
34261e84
4a1e26ff
000000b0
e0301000
80000f0f
bf8c0770
d282000a
042a0b0f
4a1e30ff
000000b4
e0301000
80000f0f
bf8c0770
101e050f
4a2432ff
000000b4
e0301000
80001212
bf8c0770
d282000f
043e0712
4a2434ff
000000b4
e0301000
80001212
bf8c0770
d282000f
043e0912
4a2426ff
000000b4
e0301000
80001212
bf8c0770
d282000f
043e0b12
10241f0c
d282001b
044a150b
4a2430ff
000000b8
e0301000
80001212
bf8c0770
10240512
4a3832ff
000000b8
e0301000
80001c1c
bf8c0770
d2820012
044a071c
4a3834ff
000000b8
e0301000
80001c1c
bf8c0770
d2820012
044a091c
4a3826ff
000000b8
e0301000
80001c1c
bf8c0770
d2820012
044a0b1c
d282000b
046e250d
4a1830ff
000000bc
e0301000
80000c0c
bf8c0770
1018050c
4a1a32ff
000000bc
e0301000
80000d0d
bf8c0770
d282000c
0432070d
4a1a34ff
000000bc
e0301000
80000d0d
bf8c0770
d282000c
0432090d
4a1a26ff
000000bc
e0301000
80000d0d
bf8c0770
d2820002
04320b0d
0606050b
10080604
c2020109
bf8c007f
d2820003
045e0804
c2020100
bf8c007f
100a2204
c2020104
bf8c007f
d2820005
04162c04
c2020108
bf8c007f
d2820005
04160804
10160b05
d282000b
042e0703
c2020102
bf8c007f
10182204
c2020106
bf8c007f
d282000c
04322c04
c202010a
bf8c007f
d2820004
04320804
d282000b
042e0904
7e165b0b
10081704
10061703
100a1705
7e1602f2
f800020f
0b040305
c0820700
bf8c000f
e00c2000
80010b00
c2020129
c2028125
bf8c0070
7e000205
d2820003
0400090c
10000d03
c2020128
c2028124
bf8c007f
7e080205
d2820004
0410090b
d2820000
04020304
c202012a
c2028126
bf8c007f
7e020205
d2820005
0404090d
d2820000
04020f05
06001100
c2020113
bf8c007f
100c0004
10022103
d2820001
04061304
d2820001
04062905
06022b01
c2020117
bf8c007f
d2820006
041a0204
10061f03
d2820003
040e1504
d2820003
040e2505
06040503
c202011b
bf8c007f
d2820003
041a0404
c202011f
bf8c007f
06060604
c2020112
bf8c007f
10080004
c2020116
bf8c007f
d2820004
04120204
c202011a
bf8c007f
d2820004
04120404
c202011e
bf8c007f
06080804
c2020111
bf8c007f
100a0004
c2020115
bf8c007f
d2820005
04160204
c2020119
bf8c007f
d2820005
04160404
c202011d
bf8c007f
060a0a04
c2020110
bf8c007f
10000004
c2020114
bf8c007f
d2820000
04020204
c2020118
bf8c007f
d2820000
04020404
c200011c
bf8c007f
06000000
f80008cf
03040500
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL IN[7], GENERIC[24], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL CONST[12..13]
DCL CONST[5..11]
DCL TEMP[0..1]
DCL TEMP[2..7], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0000}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     0.0010}
IMM[2] FLT32 {    4.0000,     0.0000,     0.0000,     0.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[13].xxxx, CONST[13].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].xy, IN[5].xyyy
 13: TEX TEMP[5], TEMP[5], SAMP[1], 2D
 14: MAD TEMP[5].yw, IMM[0].zzzz, TEMP[5], IMM[0].xxxx
 15: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[2].xyzz
 16: RSQ TEMP[6].x, TEMP[6].xxxx
 17: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 18: DP2 TEMP[6].x, TEMP[5].ywww, TEMP[5].ywww
 19: ADD TEMP[6].x, IMM[0].yyyy, -TEMP[6].xxxx
 20: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
 21: RSQ TEMP[7].x, TEMP[6].xxxx
 22: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[6].xxxx
 23: CMP TEMP[7].x, -TEMP[6].xxxx, TEMP[7].xxxx, IMM[0].wwww
 24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
 25: DP3 TEMP[6].x, IN[6].yzww, IN[6].yzww
 26: RSQ TEMP[6].x, TEMP[6].xxxx
 27: MUL TEMP[6].xyz, IN[6].yzww, TEMP[6].xxxx
 28: DP3 TEMP[7].x, IN[7].xyzz, IN[7].xyzz
 29: RSQ TEMP[7].x, TEMP[7].xxxx
 30: MUL TEMP[7].xyz, IN[7].xyzz, TEMP[7].xxxx
 31: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].wwww
 32: MAD TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].yyyy, TEMP[7].xyzz
 33: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[5].xyzz
 34: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 35: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 36: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 37: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 38: MOV TEMP[2].xyz, TEMP[2].xyzz
 39: TEX TEMP[2], TEMP[2], SAMP[2], CUBE
 40: DP4 TEMP[3].x, TEMP[4], CONST[10]
 41: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[8].yyyy
 42: LRP TEMP[3], TEMP[3].xxxx, IN[2], IMM[0].yyyy
 43: MUL TEMP[3], TEMP[4], TEMP[3]
 44: MUL TEMP[5].xy, TEMP[0].xyyy, CONST[5].xyyy
 45: MOV TEMP[5].xy, TEMP[5].xyyy
 46: TEX TEMP[5], TEMP[5], SAMP[4], 2D
 47: DP4 TEMP[6].x, TEMP[4], CONST[9]
 48: ADD_SAT TEMP[6].x, TEMP[6].xxxx, CONST[8].xxxx
 49: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[5].wwww
 50: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[1].xyzz
 51: MAX TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
 52: RCP TEMP[7].x, TEMP[7].xxxx
 53: MUL TEMP[7].xyz, TEMP[5].xyzz, TEMP[7].xxxx
 54: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xyzz
 55: MAD TEMP[5].xyz, TEMP[6].xxxx, TEMP[7].xyzz, TEMP[5].xyzz
 56: MUL TEMP[3].xyz, TEMP[5].xyzz, IMM[2].xxxx
 57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[2].wwww
 58: DP4 TEMP[4].x, TEMP[4], CONST[11]
 59: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[8].zzzz
 60: MAD TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xxxx, TEMP[3].xyzz
 61: MOV TEMP[2].xy, IN[3].zwww
 62: TEX TEMP[2].xyz, TEMP[2], SAMP[3], 2D
 63: MAD TEMP[3].xyz, CONST[12].xyzz, TEMP[2].xyzz, TEMP[3].xyzz
 64: MAX TEMP[2].x, IN[4].wwww, CONST[6].wwww
 65: MOV_SAT TEMP[2].x, TEMP[2].xxxx
 66: LRP TEMP[3].xyz, TEMP[2].xxxx, TEMP[3].xyzz, CONST[6].xyzz
 67: MOV OUT[0], TEMP[3]
 68: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 96)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 100)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 104)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 108)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 188)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 200)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 208)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 212)
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = getelementptr <32 x i8> addrspace(2)* %2, i32 4
  %65 = load <32 x i8> addrspace(2)* %64, !tbaa !0
  %66 = getelementptr <16 x i8> addrspace(2)* %1, i32 4
  %67 = load <16 x i8> addrspace(2)* %66, !tbaa !0
  %68 = fcmp ugt float %16, 0.000000e+00
  %69 = select i1 %68, float 1.000000e+00, float 0.000000e+00
  %70 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %81 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %82 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %83 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %84 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %85 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %3, <2 x i32> %5)
  %86 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %3, <2 x i32> %5)
  %87 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %3, <2 x i32> %5)
  %88 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %3, <2 x i32> %5)
  %89 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %3, <2 x i32> %5)
  %90 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %3, <2 x i32> %5)
  %91 = fmul float %13, %46
  %92 = fadd float %91, %47
  %93 = call float @llvm.AMDIL.clamp.(float %69, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %96 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %97 = bitcast float %93 to i32
  %98 = icmp ne i32 %97, 0
  %. = select i1 %98, float -1.000000e+00, float 1.000000e+00
  %99 = bitcast float %80 to i32
  %100 = bitcast float %81 to i32
  %101 = insertelement <2 x i32> undef, i32 %99, i32 0
  %102 = insertelement <2 x i32> %101, i32 %100, i32 1
  %103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %49, <16 x i8> %51, i32 2)
  %104 = extractelement <4 x float> %103, i32 0
  %105 = extractelement <4 x float> %103, i32 1
  %106 = extractelement <4 x float> %103, i32 2
  %107 = extractelement <4 x float> %103, i32 3
  %108 = bitcast float %80 to i32
  %109 = bitcast float %81 to i32
  %110 = insertelement <2 x i32> undef, i32 %108, i32 0
  %111 = insertelement <2 x i32> %110, i32 %109, i32 1
  %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %53, <16 x i8> %55, i32 2)
  %113 = extractelement <4 x float> %112, i32 1
  %114 = extractelement <4 x float> %112, i32 3
  %115 = fmul float 2.000000e+00, %113
  %116 = fadd float %115, -1.000000e+00
  %117 = fmul float 2.000000e+00, %114
  %118 = fadd float %117, -1.000000e+00
  %119 = fmul float %82, %82
  %120 = fmul float %83, %83
  %121 = fadd float %120, %119
  %122 = fmul float %84, %84
  %123 = fadd float %121, %122
  %124 = call float @llvm.AMDGPU.rsq(float %123)
  %125 = fmul float %82, %124
  %126 = fmul float %83, %124
  %127 = fmul float %84, %124
  %128 = fmul float %116, %116
  %129 = fmul float %118, %118
  %130 = fadd float %128, %129
  %131 = fsub float -0.000000e+00, %130
  %132 = fadd float 1.000000e+00, %131
  %133 = fcmp uge float 0.000000e+00, %132
  %134 = select i1 %133, float 0.000000e+00, float %132
  %135 = call float @llvm.AMDGPU.rsq(float %134)
  %136 = fmul float %135, %134
  %137 = fsub float -0.000000e+00, %134
  %138 = call float @llvm.AMDGPU.cndlt(float %137, float %136, float 0.000000e+00)
  %139 = fmul float %125, %138
  %140 = fmul float %126, %138
  %141 = fmul float %127, %138
  %142 = fmul float %85, %85
  %143 = fmul float %86, %86
  %144 = fadd float %143, %142
  %145 = fmul float %87, %87
  %146 = fadd float %144, %145
  %147 = call float @llvm.AMDGPU.rsq(float %146)
  %148 = fmul float %85, %147
  %149 = fmul float %86, %147
  %150 = fmul float %87, %147
  %151 = fmul float %88, %88
  %152 = fmul float %89, %89
  %153 = fadd float %152, %151
  %154 = fmul float %90, %90
  %155 = fadd float %153, %154
  %156 = call float @llvm.AMDGPU.rsq(float %155)
  %157 = fmul float %88, %156
  %158 = fmul float %89, %156
  %159 = fmul float %90, %156
  %160 = fmul float %157, %118
  %161 = fmul float %158, %118
  %162 = fmul float %159, %118
  %163 = fmul float %148, %116
  %164 = fadd float %163, %160
  %165 = fmul float %149, %116
  %166 = fadd float %165, %161
  %167 = fmul float %150, %116
  %168 = fadd float %167, %162
  %169 = fmul float %139, %.
  %170 = fadd float %169, %164
  %171 = fmul float %140, %.
  %172 = fadd float %171, %166
  %173 = fmul float %141, %.
  %174 = fadd float %173, %168
  %175 = fmul float %170, %76
  %176 = fmul float %172, %77
  %177 = fadd float %176, %175
  %178 = fmul float %174, %78
  %179 = fadd float %177, %178
  %180 = fmul float %179, %170
  %181 = fmul float %179, %172
  %182 = fmul float %179, %174
  %183 = fmul float 2.000000e+00, %180
  %184 = fmul float 2.000000e+00, %181
  %185 = fmul float 2.000000e+00, %182
  %186 = fsub float -0.000000e+00, %183
  %187 = fadd float %76, %186
  %188 = fsub float -0.000000e+00, %184
  %189 = fadd float %77, %188
  %190 = fsub float -0.000000e+00, %185
  %191 = fadd float %78, %190
  %192 = insertelement <4 x float> undef, float %187, i32 0
  %193 = insertelement <4 x float> %192, float %189, i32 1
  %194 = insertelement <4 x float> %193, float %191, i32 2
  %195 = insertelement <4 x float> %194, float 0.000000e+00, i32 3
  %196 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %195)
  %197 = extractelement <4 x float> %196, i32 0
  %198 = extractelement <4 x float> %196, i32 1
  %199 = extractelement <4 x float> %196, i32 2
  %200 = extractelement <4 x float> %196, i32 3
  %201 = call float @fabs(float %199)
  %202 = fdiv float 1.000000e+00, %201
  %203 = fmul float %197, %202
  %204 = fadd float %203, 1.500000e+00
  %205 = fmul float %198, %202
  %206 = fadd float %205, 1.500000e+00
  %207 = bitcast float %206 to i32
  %208 = bitcast float %204 to i32
  %209 = bitcast float %200 to i32
  %210 = insertelement <4 x i32> undef, i32 %207, i32 0
  %211 = insertelement <4 x i32> %210, i32 %208, i32 1
  %212 = insertelement <4 x i32> %211, i32 %209, i32 2
  %213 = insertelement <4 x i32> %212, i32 undef, i32 3
  %214 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %213, <32 x i8> %57, <16 x i8> %59, i32 4)
  %215 = extractelement <4 x float> %214, i32 0
  %216 = extractelement <4 x float> %214, i32 1
  %217 = extractelement <4 x float> %214, i32 2
  %218 = extractelement <4 x float> %214, i32 3
  %219 = fmul float %104, %35
  %220 = fmul float %105, %36
  %221 = fadd float %219, %220
  %222 = fmul float %106, %37
  %223 = fadd float %221, %222
  %224 = fmul float %107, %38
  %225 = fadd float %223, %224
  %226 = fadd float %225, %29
  %227 = call float @llvm.AMDIL.clamp.(float %226, float 0.000000e+00, float 1.000000e+00)
  %228 = call float @llvm.AMDGPU.lrp(float %227, float %70, float 1.000000e+00)
  %229 = call float @llvm.AMDGPU.lrp(float %227, float %71, float 1.000000e+00)
  %230 = call float @llvm.AMDGPU.lrp(float %227, float %72, float 1.000000e+00)
  %231 = call float @llvm.AMDGPU.lrp(float %227, float %73, float 1.000000e+00)
  %232 = fmul float %104, %228
  %233 = fmul float %105, %229
  %234 = fmul float %106, %230
  %235 = fmul float %107, %231
  %236 = fmul float %12, %22
  %237 = fmul float %92, %23
  %238 = bitcast float %236 to i32
  %239 = bitcast float %237 to i32
  %240 = insertelement <2 x i32> undef, i32 %238, i32 0
  %241 = insertelement <2 x i32> %240, i32 %239, i32 1
  %242 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %241, <32 x i8> %65, <16 x i8> %67, i32 2)
  %243 = extractelement <4 x float> %242, i32 0
  %244 = extractelement <4 x float> %242, i32 1
  %245 = extractelement <4 x float> %242, i32 2
  %246 = extractelement <4 x float> %242, i32 3
  %247 = fmul float %104, %31
  %248 = fmul float %105, %32
  %249 = fadd float %247, %248
  %250 = fmul float %106, %33
  %251 = fadd float %249, %250
  %252 = fmul float %107, %34
  %253 = fadd float %251, %252
  %254 = fadd float %253, %28
  %255 = call float @llvm.AMDIL.clamp.(float %254, float 0.000000e+00, float 1.000000e+00)
  %256 = fmul float %255, %246
  %257 = fmul float %243, 0x3FCB367A00000000
  %258 = fmul float %244, 0x3FE6E2EB20000000
  %259 = fadd float %258, %257
  %260 = fmul float %245, 0x3FB27BB300000000
  %261 = fadd float %259, %260
  %262 = fcmp uge float %261, 0x3F50624DE0000000
  %263 = select i1 %262, float %261, float 0x3F50624DE0000000
  %264 = fdiv float 1.000000e+00, %263
  %265 = fmul float %243, %264
  %266 = fmul float %244, %264
  %267 = fmul float %245, %264
  %268 = fmul float %232, %243
  %269 = fmul float %233, %244
  %270 = fmul float %234, %245
  %271 = fmul float %256, %265
  %272 = fadd float %271, %268
  %273 = fmul float %256, %266
  %274 = fadd float %273, %269
  %275 = fmul float %256, %267
  %276 = fadd float %275, %270
  %277 = fmul float %272, 4.000000e+00
  %278 = fmul float %274, 4.000000e+00
  %279 = fmul float %276, 4.000000e+00
  %280 = fmul float %215, %218
  %281 = fmul float %216, %218
  %282 = fmul float %217, %218
  %283 = fmul float %104, %39
  %284 = fmul float %105, %40
  %285 = fadd float %283, %284
  %286 = fmul float %106, %41
  %287 = fadd float %285, %286
  %288 = fmul float %107, %42
  %289 = fadd float %287, %288
  %290 = fadd float %289, %30
  %291 = call float @llvm.AMDIL.clamp.(float %290, float 0.000000e+00, float 1.000000e+00)
  %292 = fmul float %280, %291
  %293 = fadd float %292, %277
  %294 = fmul float %281, %291
  %295 = fadd float %294, %278
  %296 = fmul float %282, %291
  %297 = fadd float %296, %279
  %298 = bitcast float %74 to i32
  %299 = bitcast float %75 to i32
  %300 = insertelement <2 x i32> undef, i32 %298, i32 0
  %301 = insertelement <2 x i32> %300, i32 %299, i32 1
  %302 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %301, <32 x i8> %61, <16 x i8> %63, i32 2)
  %303 = extractelement <4 x float> %302, i32 0
  %304 = extractelement <4 x float> %302, i32 1
  %305 = extractelement <4 x float> %302, i32 2
  %306 = fmul float %43, %303
  %307 = fadd float %306, %293
  %308 = fmul float %44, %304
  %309 = fadd float %308, %295
  %310 = fmul float %45, %305
  %311 = fadd float %310, %297
  %312 = fcmp uge float %79, %27
  %313 = select i1 %312, float %79, float %27
  %314 = call float @llvm.AMDIL.clamp.(float %313, float 0.000000e+00, float 1.000000e+00)
  %315 = call float @llvm.AMDGPU.lrp(float %314, float %307, float %24)
  %316 = call float @llvm.AMDGPU.lrp(float %314, float %309, float %25)
  %317 = call float @llvm.AMDGPU.lrp(float %314, float %311, float %26)
  %318 = call i32 @llvm.SI.packf16(float %315, float %316)
  %319 = bitcast i32 %318 to float
  %320 = call i32 @llvm.SI.packf16(float %317, float %235)
  %321 = bitcast i32 %320 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %319, float %321, float %319, float %321)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.cndlt(float, float, float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180d00
c8190d01
c8140c00
c8150c01
c0840304
c0c60508
bf8c007f
f0800a00
00430805
bf8c0770
060e1108
060e0ef3
06101309
061010f3
10121108
d2820009
04260f07
081212f2
d0060008
02010109
d2000009
00210109
7e145b09
1014130a
d2060009
22010109
d0080008
02021280
d2000009
00221480
c8340f00
c8350f01
c8300e00
c8310e01
1014190c
d282000b
042a1b0d
c8281000
c8291001
d282000b
042e150a
7e165b0b
101a170d
101e130d
c8441500
c8451501
c8401400
c8411401
101a2110
d282000e
04362311
c8341600
c8351601
d282000e
043a1b0d
7e1c5b0e
10221d11
10261111
c8541200
c8551201
c8501100
c8511101
10222914
d2820012
04462b15
c8441300
c8451301
d2820012
044a2311
7e245b12
102a2515
d2820015
044e0f15
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000013
0021e6f2
d2820004
0456270f
1018170c
1018130c
101e1d10
101e110f
10202514
d282000f
043e0f10
d282000c
043e270c
c83c0800
c83d0801
10281f0c
c8400900
c8410901
d2820014
04522104
1014170a
1012130a
10141d0d
1010110a
10142511
d2820007
04220f0a
d2820007
041e2709
c8200a00
c8210a01
d2820009
04521107
10140909
d2820004
042a0909
08220910
10081909
d2820004
04121909
0820090f
10080f09
d2820004
04120f09
08240908
7e260280
d28a0008
044a2310
d28c0007
044a2310
d28e0009
044a2310
d288000a
044a2310
d2060104
02010109
7e085504
7e1e02ff
3fc00000
d2820009
043e0907
d2820008
043e0908
c0840308
c0c60510
bf8c007f
f0800f00
00430a08
bf8c0770
10241b0b
c0840300
c0c60500
bf8c007f
f0800f00
00430405
c0840100
bf8c0070
c2000929
bf8c007f
10100a00
c2000928
bf8c007f
d2820008
04200104
c200092a
bf8c007f
d2820008
04200106
c200092b
bf8c007f
d2820008
04200107
c2000921
bf8c007f
06101000
d2060808
02010108
081210f2
c8380100
c8390101
d282000e
04261d08
10261d05
c2000934
c2008935
bf8c007f
7e1c0201
d2820003
04380103
c2000915
bf8c007f
101e0600
c2000914
bf8c007f
101c0400
c0860310
c0c80520
bf8c007f
f0800f00
00640e0e
bf8c0770
10261f13
10041cff
3e59b3d0
7e0602ff
3f371759
d2820002
040a070f
7e0602ff
3d93dd98
d2820002
040a0710
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
1028050f
c2000925
bf8c007f
10060a00
c2000924
bf8c007f
d2820003
040c0104
c2000926
bf8c007f
d2820003
040c0106
c2000927
bf8c007f
d2820003
040c0107
c2000920
bf8c007f
06060600
d2060803
02010103
10062303
d2820013
044e2903
102826f6
c200092d
bf8c007f
10260a00
c200092c
bf8c007f
d2820013
044c0104
c200092e
bf8c007f
d2820013
044c0106
c200092f
bf8c007f
d2820013
044c0107
c2000922
bf8c007f
06262600
d2060813
02010113
d2820012
04522712
c8540700
c8550701
c8500600
c8510601
c080030c
c0c60518
bf8c007f
f0800700
00031414
c2000931
bf8c0070
d2820018
044a2a00
c8480b00
c8490b01
c200091b
bf8c007f
d00c0002
02000112
7e2e0200
d2000012
000a2517
d2060812
02010112
082e24f2
c2000919
bf8c007f
10322e00
d2820018
04663112
10321b0a
c8680000
c8690001
d282001a
04263508
10343504
10341d1a
1036050e
d282001a
046a3703
103434f6
d2820019
046a2719
c2000930
bf8c007f
d2820019
04662800
c2000918
bf8c007f
10342e00
d2820019
046a3312
5e303119
10141b0c
c82c0200
c82d0201
d282000b
04261708
10161706
1016210b
10040510
d2820002
042e0503
100404f6
d2820002
040a270a
c2000932
bf8c007f
d2820002
040a2c00
c200091a
bf8c007f
10062e00
d2820002
040e0512
c80c0300
c80d0301
d2820000
04260708
10000107
5e000102
f8001c0f
00180018
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL OUT[6], GENERIC[24]
DCL CONST[0..206]
DCL TEMP[0..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: MOV TEMP[3].w, IMM[0].xxxx
  6: MOV TEMP[3].xyz, IN[6].xyzx
  7: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww
  8: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz
  9: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy
 10: UMUL TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx
 11: UARL ADDR[0].x, TEMP[7].xxxx
 12: MUL TEMP[7], CONST[ADDR[0].x+15], IN[1].xxxx
 13: UARL ADDR[0].x, TEMP[6].xxxx
 14: MAD TEMP[6], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[7]
 15: UARL ADDR[0].x, TEMP[5].xxxx
 16: MAD TEMP[5], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[6]
 17: UARL ADDR[0].x, TEMP[4].xxxx
 18: UARL ADDR[0].x, TEMP[4].xxxx
 19: MAD TEMP[4], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[5]
 20: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 21: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 22: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 23: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 24: UARL ADDR[0].x, TEMP[8].xxxx
 25: MUL TEMP[8], CONST[ADDR[0].x+15], IN[1].xxxx
 26: UARL ADDR[0].x, TEMP[7].xxxx
 27: MAD TEMP[7], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[8]
 28: UARL ADDR[0].x, TEMP[6].xxxx
 29: MAD TEMP[6], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[7]
 30: UARL ADDR[0].x, TEMP[5].xxxx
 31: UARL ADDR[0].x, TEMP[5].xxxx
 32: MAD TEMP[5], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[6]
 33: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 34: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 35: UMAD TEMP[8].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 36: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 37: UARL ADDR[0].x, TEMP[0].xxxx
 38: MUL TEMP[0], CONST[ADDR[0].x+15], IN[1].xxxx
 39: UARL ADDR[0].x, TEMP[8].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[0]
 41: UARL ADDR[0].x, TEMP[7].xxxx
 42: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[0]
 43: UARL ADDR[0].x, TEMP[6].xxxx
 44: UARL ADDR[0].x, TEMP[6].xxxx
 45: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[0]
 46: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 47: DP4 TEMP[7].x, TEMP[1], TEMP[5]
 48: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 49: DP4 TEMP[8].x, TEMP[2], TEMP[4]
 50: DP4 TEMP[9].x, TEMP[2], TEMP[5]
 51: MOV TEMP[8].y, TEMP[9].xxxx
 52: DP4 TEMP[2].x, TEMP[2], TEMP[0]
 53: MOV TEMP[8].z, TEMP[2].xxxx
 54: DP4 TEMP[2].x, TEMP[3], TEMP[4]
 55: DP4 TEMP[4].x, TEMP[3], TEMP[5]
 56: MOV TEMP[2].y, TEMP[4].xxxx
 57: DP4 TEMP[0].x, TEMP[3], TEMP[0]
 58: MOV TEMP[2].z, TEMP[0].xxxx
 59: MUL TEMP[0], CONST[4], TEMP[6].xxxx
 60: MAD TEMP[0], CONST[5], TEMP[7].xxxx, TEMP[0]
 61: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 62: ADD TEMP[0], TEMP[0], CONST[7]
 63: MOV TEMP[3].w, IMM[0].xxxx
 64: MOV TEMP[3].xyz, CONST[8].xyzx
 65: MUL TEMP[4].xyz, TEMP[8].xyzz, CONST[10].wwww
 66: MUL TEMP[5], CONST[0], TEMP[4].xxxx
 67: MAD TEMP[5], CONST[1], TEMP[4].yyyy, TEMP[5]
 68: MAD TEMP[4].xyz, CONST[2], TEMP[4].zzzz, TEMP[5]
 69: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
 70: RSQ TEMP[5].x, TEMP[5].xxxx
 71: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
 72: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[10].wwww
 73: MUL TEMP[5], CONST[0], TEMP[2].xxxx
 74: MAD TEMP[5], CONST[1], TEMP[2].yyyy, TEMP[5]
 75: MAD TEMP[2].xyz, CONST[2], TEMP[2].zzzz, TEMP[5]
 76: MUL TEMP[5], CONST[0], TEMP[6].xxxx
 77: MAD TEMP[5], CONST[1], TEMP[7].xxxx, TEMP[5]
 78: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[5]
 79: ADD TEMP[1].xyz, TEMP[1], CONST[3]
 80: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[14].xyzz
 81: MAD TEMP[5].x, TEMP[0].zzzz, CONST[13].xxxx, CONST[13].yyyy
 82: MOV TEMP[1].w, TEMP[5].xxxx
 83: MAD TEMP[5].xy, IN[4].xyyy, CONST[9].xyyy, CONST[9].zwww
 84: MOV TEMP[5].zw, TEMP[4].yyxy
 85: MOV TEMP[6].x, TEMP[4].zzzz
 86: MUL TEMP[7].xyz, TEMP[2].zxyy, TEMP[4].yzxx
 87: MAD TEMP[4].xyz, TEMP[2].yzxx, TEMP[4].zxyy, -TEMP[7].xyzz
 88: MOV TEMP[6].yzw, TEMP[4].yxyz
 89: MOV TEMP[2].xyz, TEMP[2].xyzx
 90: MOV OUT[2], IN[5]
 91: MOV OUT[1], TEMP[3]
 92: MOV OUT[4], TEMP[5]
 93: MOV OUT[6], TEMP[2]
 94: MOV OUT[5], TEMP[6]
 95: MOV OUT[3], TEMP[1]
 96: MOV OUT[0], TEMP[0]
 97: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = extractelement <4 x float> %66, i32 3
  %71 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %5)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = extractelement <4 x float> %73, i32 2
  %77 = extractelement <4 x float> %73, i32 3
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0
  %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %5)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %90 = load <16 x i8> addrspace(2)* %89, !tbaa !0
  %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %5)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = extractelement <4 x float> %91, i32 1
  %94 = extractelement <4 x float> %91, i32 2
  %95 = extractelement <4 x float> %91, i32 3
  %96 = getelementptr <16 x i8> addrspace(2)* %3, i32 6
  %97 = load <16 x i8> addrspace(2)* %96, !tbaa !0
  %98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %5)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = extractelement <4 x float> %98, i32 1
  %101 = extractelement <4 x float> %98, i32 2
  %102 = fptosi float %74 to i32
  %103 = fptosi float %75 to i32
  %104 = fptosi float %76 to i32
  %105 = fptosi float %77 to i32
  %106 = bitcast i32 %102 to float
  %107 = bitcast i32 %103 to float
  %108 = bitcast i32 %104 to float
  %109 = bitcast i32 %105 to float
  %110 = fmul float %61, %50
  %111 = fadd float %110, %47
  %112 = fmul float %62, %51
  %113 = fadd float %112, %48
  %114 = fmul float %63, %52
  %115 = fadd float %114, %49
  %116 = bitcast float %109 to i32
  %117 = mul i32 3, %116
  %118 = bitcast i32 %117 to float
  %119 = bitcast float %108 to i32
  %120 = mul i32 3, %119
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %107 to i32
  %123 = mul i32 3, %122
  %124 = bitcast i32 %123 to float
  %125 = bitcast float %106 to i32
  %126 = mul i32 3, %125
  %127 = bitcast i32 %126 to float
  %128 = bitcast float %127 to i32
  %129 = shl i32 %128, 4
  %130 = add i32 %129, 240
  %131 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %130)
  %132 = fmul float %131, %67
  %133 = shl i32 %128, 4
  %134 = add i32 %133, 244
  %135 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %134)
  %136 = fmul float %135, %67
  %137 = shl i32 %128, 4
  %138 = add i32 %137, 248
  %139 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %138)
  %140 = fmul float %139, %67
  %141 = shl i32 %128, 4
  %142 = add i32 %141, 252
  %143 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %142)
  %144 = fmul float %143, %67
  %145 = bitcast float %124 to i32
  %146 = shl i32 %145, 4
  %147 = add i32 %146, 240
  %148 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %147)
  %149 = fmul float %148, %68
  %150 = fadd float %149, %132
  %151 = shl i32 %145, 4
  %152 = add i32 %151, 244
  %153 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %152)
  %154 = fmul float %153, %68
  %155 = fadd float %154, %136
  %156 = shl i32 %145, 4
  %157 = add i32 %156, 248
  %158 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %157)
  %159 = fmul float %158, %68
  %160 = fadd float %159, %140
  %161 = shl i32 %145, 4
  %162 = add i32 %161, 252
  %163 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %162)
  %164 = fmul float %163, %68
  %165 = fadd float %164, %144
  %166 = bitcast float %121 to i32
  %167 = shl i32 %166, 4
  %168 = add i32 %167, 240
  %169 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %168)
  %170 = fmul float %169, %69
  %171 = fadd float %170, %150
  %172 = shl i32 %166, 4
  %173 = add i32 %172, 244
  %174 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %173)
  %175 = fmul float %174, %69
  %176 = fadd float %175, %155
  %177 = shl i32 %166, 4
  %178 = add i32 %177, 248
  %179 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %178)
  %180 = fmul float %179, %69
  %181 = fadd float %180, %160
  %182 = shl i32 %166, 4
  %183 = add i32 %182, 252
  %184 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %183)
  %185 = fmul float %184, %69
  %186 = fadd float %185, %165
  %187 = bitcast float %118 to i32
  %188 = shl i32 %187, 4
  %189 = add i32 %188, 240
  %190 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %189)
  %191 = fmul float %190, %70
  %192 = fadd float %191, %171
  %193 = shl i32 %187, 4
  %194 = add i32 %193, 244
  %195 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %194)
  %196 = fmul float %195, %70
  %197 = fadd float %196, %176
  %198 = shl i32 %187, 4
  %199 = add i32 %198, 248
  %200 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %199)
  %201 = fmul float %200, %70
  %202 = fadd float %201, %181
  %203 = shl i32 %187, 4
  %204 = add i32 %203, 252
  %205 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %204)
  %206 = fmul float %205, %70
  %207 = fadd float %206, %186
  %208 = bitcast float %109 to i32
  %209 = mul i32 3, %208
  %210 = add i32 %209, 1
  %211 = bitcast i32 %210 to float
  %212 = bitcast float %108 to i32
  %213 = mul i32 3, %212
  %214 = add i32 %213, 1
  %215 = bitcast i32 %214 to float
  %216 = bitcast float %107 to i32
  %217 = mul i32 3, %216
  %218 = add i32 %217, 1
  %219 = bitcast i32 %218 to float
  %220 = bitcast float %106 to i32
  %221 = mul i32 3, %220
  %222 = add i32 %221, 1
  %223 = bitcast i32 %222 to float
  %224 = bitcast float %223 to i32
  %225 = shl i32 %224, 4
  %226 = add i32 %225, 240
  %227 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %226)
  %228 = fmul float %227, %67
  %229 = shl i32 %224, 4
  %230 = add i32 %229, 244
  %231 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %230)
  %232 = fmul float %231, %67
  %233 = shl i32 %224, 4
  %234 = add i32 %233, 248
  %235 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %234)
  %236 = fmul float %235, %67
  %237 = shl i32 %224, 4
  %238 = add i32 %237, 252
  %239 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %238)
  %240 = fmul float %239, %67
  %241 = bitcast float %219 to i32
  %242 = shl i32 %241, 4
  %243 = add i32 %242, 240
  %244 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %243)
  %245 = fmul float %244, %68
  %246 = fadd float %245, %228
  %247 = shl i32 %241, 4
  %248 = add i32 %247, 244
  %249 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %248)
  %250 = fmul float %249, %68
  %251 = fadd float %250, %232
  %252 = shl i32 %241, 4
  %253 = add i32 %252, 248
  %254 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %253)
  %255 = fmul float %254, %68
  %256 = fadd float %255, %236
  %257 = shl i32 %241, 4
  %258 = add i32 %257, 252
  %259 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %258)
  %260 = fmul float %259, %68
  %261 = fadd float %260, %240
  %262 = bitcast float %215 to i32
  %263 = shl i32 %262, 4
  %264 = add i32 %263, 240
  %265 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %264)
  %266 = fmul float %265, %69
  %267 = fadd float %266, %246
  %268 = shl i32 %262, 4
  %269 = add i32 %268, 244
  %270 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %269)
  %271 = fmul float %270, %69
  %272 = fadd float %271, %251
  %273 = shl i32 %262, 4
  %274 = add i32 %273, 248
  %275 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %274)
  %276 = fmul float %275, %69
  %277 = fadd float %276, %256
  %278 = shl i32 %262, 4
  %279 = add i32 %278, 252
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = fmul float %280, %69
  %282 = fadd float %281, %261
  %283 = bitcast float %211 to i32
  %284 = shl i32 %283, 4
  %285 = add i32 %284, 240
  %286 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %285)
  %287 = fmul float %286, %70
  %288 = fadd float %287, %267
  %289 = shl i32 %283, 4
  %290 = add i32 %289, 244
  %291 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %290)
  %292 = fmul float %291, %70
  %293 = fadd float %292, %272
  %294 = shl i32 %283, 4
  %295 = add i32 %294, 248
  %296 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %295)
  %297 = fmul float %296, %70
  %298 = fadd float %297, %277
  %299 = shl i32 %283, 4
  %300 = add i32 %299, 252
  %301 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %300)
  %302 = fmul float %301, %70
  %303 = fadd float %302, %282
  %304 = bitcast float %109 to i32
  %305 = mul i32 3, %304
  %306 = add i32 %305, 2
  %307 = bitcast i32 %306 to float
  %308 = bitcast float %108 to i32
  %309 = mul i32 3, %308
  %310 = add i32 %309, 2
  %311 = bitcast i32 %310 to float
  %312 = bitcast float %107 to i32
  %313 = mul i32 3, %312
  %314 = add i32 %313, 2
  %315 = bitcast i32 %314 to float
  %316 = bitcast float %106 to i32
  %317 = mul i32 3, %316
  %318 = add i32 %317, 2
  %319 = bitcast i32 %318 to float
  %320 = bitcast float %319 to i32
  %321 = shl i32 %320, 4
  %322 = add i32 %321, 240
  %323 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %322)
  %324 = fmul float %323, %67
  %325 = shl i32 %320, 4
  %326 = add i32 %325, 244
  %327 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %326)
  %328 = fmul float %327, %67
  %329 = shl i32 %320, 4
  %330 = add i32 %329, 248
  %331 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %330)
  %332 = fmul float %331, %67
  %333 = shl i32 %320, 4
  %334 = add i32 %333, 252
  %335 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %334)
  %336 = fmul float %335, %67
  %337 = bitcast float %315 to i32
  %338 = shl i32 %337, 4
  %339 = add i32 %338, 240
  %340 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %339)
  %341 = fmul float %340, %68
  %342 = fadd float %341, %324
  %343 = shl i32 %337, 4
  %344 = add i32 %343, 244
  %345 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %344)
  %346 = fmul float %345, %68
  %347 = fadd float %346, %328
  %348 = shl i32 %337, 4
  %349 = add i32 %348, 248
  %350 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %349)
  %351 = fmul float %350, %68
  %352 = fadd float %351, %332
  %353 = shl i32 %337, 4
  %354 = add i32 %353, 252
  %355 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %354)
  %356 = fmul float %355, %68
  %357 = fadd float %356, %336
  %358 = bitcast float %311 to i32
  %359 = shl i32 %358, 4
  %360 = add i32 %359, 240
  %361 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %360)
  %362 = fmul float %361, %69
  %363 = fadd float %362, %342
  %364 = shl i32 %358, 4
  %365 = add i32 %364, 244
  %366 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %365)
  %367 = fmul float %366, %69
  %368 = fadd float %367, %347
  %369 = shl i32 %358, 4
  %370 = add i32 %369, 248
  %371 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %370)
  %372 = fmul float %371, %69
  %373 = fadd float %372, %352
  %374 = shl i32 %358, 4
  %375 = add i32 %374, 252
  %376 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %375)
  %377 = fmul float %376, %69
  %378 = fadd float %377, %357
  %379 = bitcast float %307 to i32
  %380 = shl i32 %379, 4
  %381 = add i32 %380, 240
  %382 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %381)
  %383 = fmul float %382, %70
  %384 = fadd float %383, %363
  %385 = shl i32 %379, 4
  %386 = add i32 %385, 244
  %387 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %386)
  %388 = fmul float %387, %70
  %389 = fadd float %388, %368
  %390 = shl i32 %379, 4
  %391 = add i32 %390, 248
  %392 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %391)
  %393 = fmul float %392, %70
  %394 = fadd float %393, %373
  %395 = shl i32 %379, 4
  %396 = add i32 %395, 252
  %397 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %396)
  %398 = fmul float %397, %70
  %399 = fadd float %398, %378
  %400 = fmul float %111, %192
  %401 = fmul float %113, %197
  %402 = fadd float %400, %401
  %403 = fmul float %115, %202
  %404 = fadd float %402, %403
  %405 = fmul float 1.000000e+00, %207
  %406 = fadd float %404, %405
  %407 = fmul float %111, %288
  %408 = fmul float %113, %293
  %409 = fadd float %407, %408
  %410 = fmul float %115, %298
  %411 = fadd float %409, %410
  %412 = fmul float 1.000000e+00, %303
  %413 = fadd float %411, %412
  %414 = fmul float %111, %384
  %415 = fmul float %113, %389
  %416 = fadd float %414, %415
  %417 = fmul float %115, %394
  %418 = fadd float %416, %417
  %419 = fmul float 1.000000e+00, %399
  %420 = fadd float %418, %419
  %421 = fmul float %81, %192
  %422 = fmul float %82, %197
  %423 = fadd float %421, %422
  %424 = fmul float %83, %202
  %425 = fadd float %423, %424
  %426 = fmul float 1.000000e+00, %207
  %427 = fadd float %425, %426
  %428 = fmul float %81, %288
  %429 = fmul float %82, %293
  %430 = fadd float %428, %429
  %431 = fmul float %83, %298
  %432 = fadd float %430, %431
  %433 = fmul float 1.000000e+00, %303
  %434 = fadd float %432, %433
  %435 = fmul float %81, %384
  %436 = fmul float %82, %389
  %437 = fadd float %435, %436
  %438 = fmul float %83, %394
  %439 = fadd float %437, %438
  %440 = fmul float 1.000000e+00, %399
  %441 = fadd float %439, %440
  %442 = fmul float %99, %192
  %443 = fmul float %100, %197
  %444 = fadd float %442, %443
  %445 = fmul float %101, %202
  %446 = fadd float %444, %445
  %447 = fmul float 1.000000e+00, %207
  %448 = fadd float %446, %447
  %449 = fmul float %99, %288
  %450 = fmul float %100, %293
  %451 = fadd float %449, %450
  %452 = fmul float %101, %298
  %453 = fadd float %451, %452
  %454 = fmul float 1.000000e+00, %303
  %455 = fadd float %453, %454
  %456 = fmul float %99, %384
  %457 = fmul float %100, %389
  %458 = fadd float %456, %457
  %459 = fmul float %101, %394
  %460 = fadd float %458, %459
  %461 = fmul float 1.000000e+00, %399
  %462 = fadd float %460, %461
  %463 = fmul float %23, %406
  %464 = fmul float %24, %406
  %465 = fmul float %25, %406
  %466 = fmul float %26, %406
  %467 = fmul float %27, %413
  %468 = fadd float %467, %463
  %469 = fmul float %28, %413
  %470 = fadd float %469, %464
  %471 = fmul float %29, %413
  %472 = fadd float %471, %465
  %473 = fmul float %30, %413
  %474 = fadd float %473, %466
  %475 = fmul float %31, %420
  %476 = fadd float %475, %468
  %477 = fmul float %32, %420
  %478 = fadd float %477, %470
  %479 = fmul float %33, %420
  %480 = fadd float %479, %472
  %481 = fmul float %34, %420
  %482 = fadd float %481, %474
  %483 = fadd float %476, %35
  %484 = fadd float %478, %36
  %485 = fadd float %480, %37
  %486 = fadd float %482, %38
  %487 = fmul float %427, %46
  %488 = fmul float %434, %46
  %489 = fmul float %441, %46
  %490 = fmul float %11, %487
  %491 = fmul float %12, %487
  %492 = fmul float %13, %487
  %493 = fmul float %14, %488
  %494 = fadd float %493, %490
  %495 = fmul float %15, %488
  %496 = fadd float %495, %491
  %497 = fmul float %16, %488
  %498 = fadd float %497, %492
  %499 = fmul float %17, %489
  %500 = fadd float %499, %494
  %501 = fmul float %18, %489
  %502 = fadd float %501, %496
  %503 = fmul float %19, %489
  %504 = fadd float %503, %498
  %505 = fmul float %500, %500
  %506 = fmul float %502, %502
  %507 = fadd float %506, %505
  %508 = fmul float %504, %504
  %509 = fadd float %507, %508
  %510 = call float @llvm.AMDGPU.rsq(float %509)
  %511 = fmul float %500, %510
  %512 = fmul float %502, %510
  %513 = fmul float %504, %510
  %514 = fmul float %448, %46
  %515 = fmul float %455, %46
  %516 = fmul float %462, %46
  %517 = fmul float %11, %514
  %518 = fmul float %12, %514
  %519 = fmul float %13, %514
  %520 = fmul float %14, %515
  %521 = fadd float %520, %517
  %522 = fmul float %15, %515
  %523 = fadd float %522, %518
  %524 = fmul float %16, %515
  %525 = fadd float %524, %519
  %526 = fmul float %17, %516
  %527 = fadd float %526, %521
  %528 = fmul float %18, %516
  %529 = fadd float %528, %523
  %530 = fmul float %19, %516
  %531 = fadd float %530, %525
  %532 = fmul float %11, %406
  %533 = fmul float %12, %406
  %534 = fmul float %13, %406
  %535 = fmul float %14, %413
  %536 = fadd float %535, %532
  %537 = fmul float %15, %413
  %538 = fadd float %537, %533
  %539 = fmul float %16, %413
  %540 = fadd float %539, %534
  %541 = fmul float %17, %420
  %542 = fadd float %541, %536
  %543 = fmul float %18, %420
  %544 = fadd float %543, %538
  %545 = fmul float %19, %420
  %546 = fadd float %545, %540
  %547 = fadd float %542, %20
  %548 = fadd float %544, %21
  %549 = fadd float %546, %22
  %550 = fsub float -0.000000e+00, %55
  %551 = fadd float %547, %550
  %552 = fsub float -0.000000e+00, %56
  %553 = fadd float %548, %552
  %554 = fsub float -0.000000e+00, %57
  %555 = fadd float %549, %554
  %556 = fmul float %485, %53
  %557 = fadd float %556, %54
  %558 = fmul float %87, %42
  %559 = fadd float %558, %44
  %560 = fmul float %88, %43
  %561 = fadd float %560, %45
  %562 = fmul float %531, %512
  %563 = fmul float %527, %513
  %564 = fmul float %529, %511
  %565 = fsub float -0.000000e+00, %562
  %566 = fmul float %529, %513
  %567 = fadd float %566, %565
  %568 = fsub float -0.000000e+00, %563
  %569 = fmul float %531, %511
  %570 = fadd float %569, %568
  %571 = fsub float -0.000000e+00, %564
  %572 = fmul float %527, %512
  %573 = fadd float %572, %571
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %92, float %93, float %94, float %95)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %551, float %553, float %555, float %557)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %559, float %561, float %511, float %512)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %513, float %567, float %570, float %573)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %527, float %529, float %531, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %483, float %484, float %485, float %486)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840714
bf8c000f
e00c2000
80020200
bf8c0770
f800021f
05040302
c0840708
bf8c000f
e00c2000
80020c00
bf8c0770
7e04110c
d2d60010
02010702
34062084
4a0406ff
000000f0
e0301000
80000202
c0840704
bf8c0070
e00c2000
80020800
bf8c0770
10041102
7e08110d
d2d60011
02010704
340c2284
4a080cff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1304
7e08110e
d2d60012
02010704
340e2484
4a080eff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1504
7e08110f
d2d60013
02010704
34182684
4a0818ff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1704
4a0806ff
000000f4
e0301000
80000404
bf8c0770
10081104
4a0a0cff
000000f4
e0301000
80000505
bf8c0770
d2820004
04121305
4a0a0eff
000000f4
e0301000
80000505
bf8c0770
d2820004
04121505
4a0a18ff
000000f4
e0301000
80000505
bf8c0770
d2820004
04121705
c0840700
bf8c007f
e00c2000
80021600
c2020131
c202812d
bf8c0070
7e0a0205
d2820014
04140917
100a0914
c2020130
c202812c
bf8c007f
7e1a0205
d2820015
04340916
d282000d
04160515
4a0a06ff
000000f8
e0301000
80000505
bf8c0770
100a1105
4a1c0cff
000000f8
e0301000
80000e0e
bf8c0770
d2820005
0416130e
4a1c0eff
000000f8
e0301000
80000e0e
bf8c0770
d2820005
0416150e
4a1c18ff
000000f8
e0301000
80000e0e
bf8c0770
d2820005
0416170e
c2020132
c202812e
bf8c007f
7e1c0205
d2820016
04380918
d282000d
04360b16
4a0606ff
000000fc
e0301000
80000303
bf8c0770
10061103
4a0c0cff
000000fc
e0301000
80000606
bf8c0770
d2820003
040e1306
4a0c0eff
000000fc
e0301000
80000606
bf8c0770
d2820003
040e1506
4a0c18ff
000000fc
e0301000
80000606
bf8c0770
d2820006
040e1706
06060d0d
c2020112
bf8c007f
102e0604
4a0e2081
340e0e84
4a180eff
000000f0
e0301000
80000c0c
bf8c0770
1018110c
4a1a2281
341e1a84
4a1a1eff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432130d
4a1a2481
34301a84
4a1a30ff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432150d
4a1a2681
34321a84
4a1a32ff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432170d
4a1a0eff
000000f4
e0301000
80000d0d
bf8c0770
101a110d
4a1c1eff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436130e
4a1c30ff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436150e
4a1c32ff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436170e
101c1b14
d282001a
043a1915
4a1c0eff
000000f8
e0301000
80000e0e
bf8c0770
101c110e
4a361eff
000000f8
e0301000
80001b1b
bf8c0770
d282000e
043a131b
4a3630ff
000000f8
e0301000
80001b1b
bf8c0770
d282000e
043a151b
4a3632ff
000000f8
e0301000
80001b1b
bf8c0770
d282000e
043a171b
d282001a
046a1d16
4a0e0eff
000000fc
e0301000
80000707
bf8c0770
100e1107
4a1e1eff
000000fc
e0301000
80000f0f
bf8c0770
d2820007
041e130f
4a1e30ff
000000fc
e0301000
80000f0f
bf8c0770
d2820007
041e150f
4a1e32ff
000000fc
e0301000
80000f0f
bf8c0770
d282000f
041e170f
060e1f1a
c2020116
bf8c007f
d2820017
045e0e04
4a202082
34302084
4a2030ff
000000f0
e0301000
80001010
bf8c0770
10201110
4a222282
34322284
4a2232ff
000000f0
e0301000
80001111
bf8c0770
d2820010
04421311
4a222482
34342284
4a2234ff
000000f0
e0301000
80001111
bf8c0770
d2820010
04421511
4a222682
34262284
4a2226ff
000000f0
e0301000
80001111
bf8c0770
d2820010
04421711
4a2230ff
000000f4
e0301000
80001111
bf8c0770
10221111
4a2432ff
000000f4
e0301000
80001212
bf8c0770
d2820011
04461312
4a2434ff
000000f4
e0301000
80001212
bf8c0770
d2820011
04461512
4a2426ff
000000f4
e0301000
80001212
bf8c0770
d2820011
04461712
10242314
d2820014
044a2115
4a2430ff
000000f8
e0301000
80001212
bf8c0770
10241112
4a2a32ff
000000f8
e0301000
80001515
bf8c0770
d2820012
044a1315
4a2a34ff
000000f8
e0301000
80001515
bf8c0770
d2820012
044a1515
4a2a26ff
000000f8
e0301000
80001515
bf8c0770
d2820012
044a1715
d2820014
04522516
4a2a30ff
000000fc
e0301000
80001515
bf8c0770
102a1115
4a2c32ff
000000fc
e0301000
80001616
bf8c0770
d2820015
04561316
4a2c34ff
000000fc
e0301000
80001616
bf8c0770
d2820015
04561516
4a2626ff
000000fc
e0301000
80001313
bf8c0770
d282000a
04561713
06101514
c202011a
bf8c007f
d2820009
045e1004
c202011e
bf8c007f
06121204
c2020134
c2028135
bf8c007f
7e160205
d282000b
042c0909
c2020102
bf8c007f
10260604
c2028106
bf8c007f
d2820013
044e0e05
c204010a
bf8c007f
d2820013
044e1008
c204810e
bf8c007f
06262609
c204813a
bf8c007f
0a262609
c2070101
bf8c007f
1028060e
c2050105
bf8c007f
d2820014
04520e0a
c2048109
bf8c007f
d2820014
04521009
c205810d
bf8c007f
0628280b
c2058139
bf8c007f
0a28280b
c2058100
bf8c007f
102a060b
c2060104
bf8c007f
d2820015
04560e0c
c2068108
bf8c007f
d2820015
0456100d
c207810c
bf8c007f
062a2a0f
c2078138
bf8c007f
0a2a2a0f
f800022f
0b131415
c088070c
bf8c000f
e00c2000
80041300
bf8c0770
10160914
d282000b
042e0513
d282000b
042e0b15
06160d0b
c207812b
bf8c007f
1016160f
1030160e
102e1b14
d2820017
045e1913
d2820017
045e1d15
062e1f17
102e2e0f
d2820018
04622e0a
10322314
d2820019
04662113
d2820013
04662515
06261513
1028260f
d2820013
04622809
102a160b
d2820015
04562e0c
d2820016
0456280d
102a2d16
d2820015
04562713
10161604
d282000b
042e2e05
d2820014
042e2808
d282000b
04562914
7e2a5b0b
10162b13
10262b16
c0880710
bf8c007f
e00c2000
80041600
c2080125
c2088127
bf8c0070
7e340211
d282001a
04682117
c2080124
c2088126
bf8c007f
7e360211
d2820016
046c2116
f800023f
0b131a16
c0880718
bf8c000f
e00c2000
80041600
bf8c0770
10000917
d2820000
04020516
d2820000
04020b18
06000d00
1000000f
1004000e
10081b17
d2820004
04121916
d2820004
04121d18
06081f04
1008080f
d2820002
040a080a
100a2317
d2820005
04162116
d2820005
04162518
060a1505
100c0a0f
d2820002
040a0c09
10142702
100a000b
d2820005
0416080c
d2820005
04160c0d
10181705
0814150c
10182b14
101a1905
10000004
d2820000
04020805
d2820000
04020c08
10082700
08081b04
100c1700
10161902
080c0d0b
f800024f
0a04060c
f800025f
01000205
c2020113
bf8c000f
10000604
c2020117
bf8c007f
d2820000
04020e04
c202011b
bf8c007f
d2820000
04021004
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10020604
c2020115
bf8c007f
d2820001
04060e04
c2020119
bf8c007f
d2820001
04061004
c202011d
bf8c007f
06020204
c2020110
bf8c007f
10040604
c2020114
bf8c007f
d2820002
040a0e04
c2020118
bf8c007f
d2820002
040a1004
c200011c
bf8c007f
06040400
f80008cf
00090102
bf810000
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], POSITION, LINEAR
DCL IN[1], FACE, CONSTANT
DCL IN[2], GENERIC[19], PERSPECTIVE
DCL IN[3], GENERIC[20], PERSPECTIVE
DCL IN[4], GENERIC[21], PERSPECTIVE
DCL IN[5], GENERIC[22], PERSPECTIVE
DCL IN[6], GENERIC[23], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL CONST[11..12]
DCL CONST[4..10]
DCL TEMP[0..1]
DCL TEMP[2..6], LOCAL
IMM[0] FLT32 {   -1.0000,     1.0000,     2.0000,     0.0010}
IMM[1] FLT32 {    0.2126,     0.7152,     0.0722,     4.0000}
  0: MOV TEMP[0], IN[0]
  1: MAD TEMP[0].y, IN[0], CONST[12].xxxx, CONST[12].yyyy
  2: MOV_SAT TEMP[1], IN[1]
  3: MOV TEMP[2].z, IN[6].xxxx
  4: MOV TEMP[2].xy, IN[5].zwzz
  5: UIF TEMP[1].xxxx :3
  6:   MOV TEMP[3].x, IMM[0].xxxx
  7: ELSE :3
  8:   MOV TEMP[3].x, IMM[0].yyyy
  9: ENDIF
 10: MOV TEMP[4].xy, IN[5].xyyy
 11: TEX TEMP[4], TEMP[4], SAMP[0], 2D
 12: MOV TEMP[5].w, TEMP[4].wwww
 13: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[2].xyzz
 14: RSQ TEMP[6].x, TEMP[6].xxxx
 15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xxxx
 16: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
 17: DP3 TEMP[3].x, TEMP[2].xyzz, IN[4].xyzz
 18: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
 19: MUL TEMP[2].xyz, IMM[0].zzzz, TEMP[2].xyzz
 20: ADD TEMP[2].xyz, IN[4].xyzz, -TEMP[2].xyzz
 21: MOV TEMP[2].xyz, TEMP[2].xyzz
 22: TEX TEMP[2], TEMP[2], SAMP[1], CUBE
 23: DP4 TEMP[3].x, TEMP[4], CONST[10]
 24: ADD_SAT TEMP[3].x, TEMP[3].xxxx, CONST[7].zzzz
 25: MUL TEMP[3].x, TEMP[2].wwww, TEMP[3].xxxx
 26: LRP TEMP[5].xyz, TEMP[3].xxxx, TEMP[2].xyzz, TEMP[4].xyzz
 27: DP4 TEMP[2].x, TEMP[4], CONST[9]
 28: ADD_SAT TEMP[2].x, TEMP[2].xxxx, CONST[7].yyyy
 29: LRP TEMP[2], TEMP[2].xxxx, IN[2], IMM[0].yyyy
 30: MUL TEMP[2], TEMP[5], TEMP[2]
 31: MUL TEMP[3].xy, TEMP[0].xyyy, CONST[4].xyyy
 32: MOV TEMP[3].xy, TEMP[3].xyyy
 33: TEX TEMP[3], TEMP[3], SAMP[3], 2D
 34: DP4 TEMP[4].x, TEMP[4], CONST[8]
 35: ADD_SAT TEMP[4].x, TEMP[4].xxxx, CONST[7].xxxx
 36: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].wwww
 37: DP3 TEMP[5].x, TEMP[3].xyzz, IMM[1].xyzz
 38: MAX TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
 39: RCP TEMP[5].x, TEMP[5].xxxx
 40: MUL TEMP[5].xyz, TEMP[3].xyzz, TEMP[5].xxxx
 41: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xyzz
 42: MAD TEMP[3].xyz, TEMP[4].xxxx, TEMP[5].xyzz, TEMP[3].xyzz
 43: MUL TEMP[2].xyz, TEMP[3].xyzz, IMM[1].wwww
 44: MOV TEMP[3].xy, IN[3].zwww
 45: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D
 46: MAD TEMP[2].xyz, CONST[11].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
 47: MAX TEMP[3].x, IN[4].wwww, CONST[5].wwww
 48: MOV_SAT TEMP[3].x, TEMP[3].xxxx
 49: LRP TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz, CONST[5].xyzz
 50: MOV OUT[0], TEMP[2]
 51: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0
  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 64)
  %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 68)
  %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 80)
  %25 = call float @llvm.SI.load.const(<16 x i8> %21, i32 84)
  %26 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
  %27 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
  %28 = call float @llvm.SI.load.const(<16 x i8> %21, i32 112)
  %29 = call float @llvm.SI.load.const(<16 x i8> %21, i32 116)
  %30 = call float @llvm.SI.load.const(<16 x i8> %21, i32 120)
  %31 = call float @llvm.SI.load.const(<16 x i8> %21, i32 128)
  %32 = call float @llvm.SI.load.const(<16 x i8> %21, i32 132)
  %33 = call float @llvm.SI.load.const(<16 x i8> %21, i32 136)
  %34 = call float @llvm.SI.load.const(<16 x i8> %21, i32 140)
  %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 144)
  %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 148)
  %37 = call float @llvm.SI.load.const(<16 x i8> %21, i32 152)
  %38 = call float @llvm.SI.load.const(<16 x i8> %21, i32 156)
  %39 = call float @llvm.SI.load.const(<16 x i8> %21, i32 160)
  %40 = call float @llvm.SI.load.const(<16 x i8> %21, i32 164)
  %41 = call float @llvm.SI.load.const(<16 x i8> %21, i32 168)
  %42 = call float @llvm.SI.load.const(<16 x i8> %21, i32 172)
  %43 = call float @llvm.SI.load.const(<16 x i8> %21, i32 176)
  %44 = call float @llvm.SI.load.const(<16 x i8> %21, i32 180)
  %45 = call float @llvm.SI.load.const(<16 x i8> %21, i32 184)
  %46 = call float @llvm.SI.load.const(<16 x i8> %21, i32 192)
  %47 = call float @llvm.SI.load.const(<16 x i8> %21, i32 196)
  %48 = getelementptr <32 x i8> addrspace(2)* %2, i32 0
  %49 = load <32 x i8> addrspace(2)* %48, !tbaa !0
  %50 = getelementptr <16 x i8> addrspace(2)* %1, i32 0
  %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0
  %52 = getelementptr <32 x i8> addrspace(2)* %2, i32 1
  %53 = load <32 x i8> addrspace(2)* %52, !tbaa !0
  %54 = getelementptr <16 x i8> addrspace(2)* %1, i32 1
  %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0
  %56 = getelementptr <32 x i8> addrspace(2)* %2, i32 2
  %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0
  %58 = getelementptr <16 x i8> addrspace(2)* %1, i32 2
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = getelementptr <32 x i8> addrspace(2)* %2, i32 3
  %61 = load <32 x i8> addrspace(2)* %60, !tbaa !0
  %62 = getelementptr <16 x i8> addrspace(2)* %1, i32 3
  %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0
  %64 = fcmp ugt float %16, 0.000000e+00
  %65 = select i1 %64, float 1.000000e+00, float 0.000000e+00
  %66 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
  %67 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
  %68 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %3, <2 x i32> %5)
  %69 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %3, <2 x i32> %5)
  %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %3, <2 x i32> %5)
  %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %3, <2 x i32> %5)
  %72 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %3, <2 x i32> %5)
  %73 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %3, <2 x i32> %5)
  %74 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %3, <2 x i32> %5)
  %75 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %3, <2 x i32> %5)
  %76 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %3, <2 x i32> %5)
  %77 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %3, <2 x i32> %5)
  %78 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %3, <2 x i32> %5)
  %79 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %3, <2 x i32> %5)
  %80 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %3, <2 x i32> %5)
  %81 = fmul float %13, %46
  %82 = fadd float %81, %47
  %83 = call float @llvm.AMDIL.clamp.(float %65, float 0.000000e+00, float 1.000000e+00)
  %84 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %85 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %86 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
  %87 = bitcast float %83 to i32
  %88 = icmp ne i32 %87, 0
  %. = select i1 %88, float -1.000000e+00, float 1.000000e+00
  %89 = bitcast float %76 to i32
  %90 = bitcast float %77 to i32
  %91 = insertelement <2 x i32> undef, i32 %89, i32 0
  %92 = insertelement <2 x i32> %91, i32 %90, i32 1
  %93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %49, <16 x i8> %51, i32 2)
  %94 = extractelement <4 x float> %93, i32 0
  %95 = extractelement <4 x float> %93, i32 1
  %96 = extractelement <4 x float> %93, i32 2
  %97 = extractelement <4 x float> %93, i32 3
  %98 = fmul float %78, %78
  %99 = fmul float %79, %79
  %100 = fadd float %99, %98
  %101 = fmul float %80, %80
  %102 = fadd float %100, %101
  %103 = call float @llvm.AMDGPU.rsq(float %102)
  %104 = fmul float %78, %103
  %105 = fmul float %79, %103
  %106 = fmul float %80, %103
  %107 = fmul float %104, %.
  %108 = fmul float %105, %.
  %109 = fmul float %106, %.
  %110 = fmul float %107, %72
  %111 = fmul float %108, %73
  %112 = fadd float %111, %110
  %113 = fmul float %109, %74
  %114 = fadd float %112, %113
  %115 = fmul float %114, %107
  %116 = fmul float %114, %108
  %117 = fmul float %114, %109
  %118 = fmul float 2.000000e+00, %115
  %119 = fmul float 2.000000e+00, %116
  %120 = fmul float 2.000000e+00, %117
  %121 = fsub float -0.000000e+00, %118
  %122 = fadd float %72, %121
  %123 = fsub float -0.000000e+00, %119
  %124 = fadd float %73, %123
  %125 = fsub float -0.000000e+00, %120
  %126 = fadd float %74, %125
  %127 = insertelement <4 x float> undef, float %122, i32 0
  %128 = insertelement <4 x float> %127, float %124, i32 1
  %129 = insertelement <4 x float> %128, float %126, i32 2
  %130 = insertelement <4 x float> %129, float 0.000000e+00, i32 3
  %131 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %130)
  %132 = extractelement <4 x float> %131, i32 0
  %133 = extractelement <4 x float> %131, i32 1
  %134 = extractelement <4 x float> %131, i32 2
  %135 = extractelement <4 x float> %131, i32 3
  %136 = call float @fabs(float %134)
  %137 = fdiv float 1.000000e+00, %136
  %138 = fmul float %132, %137
  %139 = fadd float %138, 1.500000e+00
  %140 = fmul float %133, %137
  %141 = fadd float %140, 1.500000e+00
  %142 = bitcast float %141 to i32
  %143 = bitcast float %139 to i32
  %144 = bitcast float %135 to i32
  %145 = insertelement <4 x i32> undef, i32 %142, i32 0
  %146 = insertelement <4 x i32> %145, i32 %143, i32 1
  %147 = insertelement <4 x i32> %146, i32 %144, i32 2
  %148 = insertelement <4 x i32> %147, i32 undef, i32 3
  %149 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %148, <32 x i8> %53, <16 x i8> %55, i32 4)
  %150 = extractelement <4 x float> %149, i32 0
  %151 = extractelement <4 x float> %149, i32 1
  %152 = extractelement <4 x float> %149, i32 2
  %153 = extractelement <4 x float> %149, i32 3
  %154 = fmul float %94, %39
  %155 = fmul float %95, %40
  %156 = fadd float %154, %155
  %157 = fmul float %96, %41
  %158 = fadd float %156, %157
  %159 = fmul float %97, %42
  %160 = fadd float %158, %159
  %161 = fadd float %160, %30
  %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
  %163 = fmul float %153, %162
  %164 = call float @llvm.AMDGPU.lrp(float %163, float %150, float %94)
  %165 = call float @llvm.AMDGPU.lrp(float %163, float %151, float %95)
  %166 = call float @llvm.AMDGPU.lrp(float %163, float %152, float %96)
  %167 = fmul float %94, %35
  %168 = fmul float %95, %36
  %169 = fadd float %167, %168
  %170 = fmul float %96, %37
  %171 = fadd float %169, %170
  %172 = fmul float %97, %38
  %173 = fadd float %171, %172
  %174 = fadd float %173, %29
  %175 = call float @llvm.AMDIL.clamp.(float %174, float 0.000000e+00, float 1.000000e+00)
  %176 = call float @llvm.AMDGPU.lrp(float %175, float %66, float 1.000000e+00)
  %177 = call float @llvm.AMDGPU.lrp(float %175, float %67, float 1.000000e+00)
  %178 = call float @llvm.AMDGPU.lrp(float %175, float %68, float 1.000000e+00)
  %179 = call float @llvm.AMDGPU.lrp(float %175, float %69, float 1.000000e+00)
  %180 = fmul float %164, %176
  %181 = fmul float %165, %177
  %182 = fmul float %166, %178
  %183 = fmul float %97, %179
  %184 = fmul float %12, %22
  %185 = fmul float %82, %23
  %186 = bitcast float %184 to i32
  %187 = bitcast float %185 to i32
  %188 = insertelement <2 x i32> undef, i32 %186, i32 0
  %189 = insertelement <2 x i32> %188, i32 %187, i32 1
  %190 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %189, <32 x i8> %61, <16 x i8> %63, i32 2)
  %191 = extractelement <4 x float> %190, i32 0
  %192 = extractelement <4 x float> %190, i32 1
  %193 = extractelement <4 x float> %190, i32 2
  %194 = extractelement <4 x float> %190, i32 3
  %195 = fmul float %94, %31
  %196 = fmul float %95, %32
  %197 = fadd float %195, %196
  %198 = fmul float %96, %33
  %199 = fadd float %197, %198
  %200 = fmul float %97, %34
  %201 = fadd float %199, %200
  %202 = fadd float %201, %28
  %203 = call float @llvm.AMDIL.clamp.(float %202, float 0.000000e+00, float 1.000000e+00)
  %204 = fmul float %203, %194
  %205 = fmul float %191, 0x3FCB367A00000000
  %206 = fmul float %192, 0x3FE6E2EB20000000
  %207 = fadd float %206, %205
  %208 = fmul float %193, 0x3FB27BB300000000
  %209 = fadd float %207, %208
  %210 = fcmp uge float %209, 0x3F50624DE0000000
  %211 = select i1 %210, float %209, float 0x3F50624DE0000000
  %212 = fdiv float 1.000000e+00, %211
  %213 = fmul float %191, %212
  %214 = fmul float %192, %212
  %215 = fmul float %193, %212
  %216 = fmul float %180, %191
  %217 = fmul float %181, %192
  %218 = fmul float %182, %193
  %219 = fmul float %204, %213
  %220 = fadd float %219, %216
  %221 = fmul float %204, %214
  %222 = fadd float %221, %217
  %223 = fmul float %204, %215
  %224 = fadd float %223, %218
  %225 = fmul float %220, 4.000000e+00
  %226 = fmul float %222, 4.000000e+00
  %227 = fmul float %224, 4.000000e+00
  %228 = bitcast float %70 to i32
  %229 = bitcast float %71 to i32
  %230 = insertelement <2 x i32> undef, i32 %228, i32 0
  %231 = insertelement <2 x i32> %230, i32 %229, i32 1
  %232 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %57, <16 x i8> %59, i32 2)
  %233 = extractelement <4 x float> %232, i32 0
  %234 = extractelement <4 x float> %232, i32 1
  %235 = extractelement <4 x float> %232, i32 2
  %236 = fmul float %43, %233
  %237 = fadd float %236, %225
  %238 = fmul float %44, %234
  %239 = fadd float %238, %226
  %240 = fmul float %45, %235
  %241 = fadd float %240, %227
  %242 = fcmp uge float %75, %27
  %243 = select i1 %242, float %75, float %27
  %244 = call float @llvm.AMDIL.clamp.(float %243, float 0.000000e+00, float 1.000000e+00)
  %245 = call float @llvm.AMDGPU.lrp(float %244, float %237, float %24)
  %246 = call float @llvm.AMDGPU.lrp(float %244, float %239, float %25)
  %247 = call float @llvm.AMDGPU.lrp(float %244, float %241, float %26)
  %248 = call i32 @llvm.SI.packf16(float %245, float %246)
  %249 = bitcast i32 %248 to float
  %250 = call i32 @llvm.SI.packf16(float %247, float %183)
  %251 = bitcast i32 %250 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %249, float %251, float %249, float %251)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2

; Function Attrs: readnone
declare float @fabs(float) #2

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
befe0a7e
befc0306
c8180f00
c8190f01
c81c0e00
c81d0e01
100a0f07
d2820008
04160d06
c8141000
c8151001
d2820008
04220b05
7e105b08
100c1106
d0080008
02010104
d2000004
0021e480
d2060804
02010104
d10a0008
02010104
d2000009
0021e6f2
100c1306
10081107
10081304
c81c0800
c81d0801
10160f04
c8280900
c8290901
d282000b
042e1506
100a1105
100a1305
c8200a00
c8210a01
d2820009
042e1105
10160d09
d2820006
042e0d09
081a0d0a
100c0909
d2820004
041a0909
08180907
10080b09
d2820004
04120b09
081c0908
7e1e0280
d28a0005
043a1b0c
d28c0004
043a1b0c
d28e0006
043a1b0c
d2880007
043a1b0c
d206010c
02010106
7e18550c
7e1a02ff
3fc00000
d2820006
04361904
d2820005
04361905
c0840304
c0c60508
bf8c007f
f0800f00
00430805
c8140d00
c8150d01
c8100c00
c8110c01
c0840300
c0c60500
bf8c0070
f0800f00
00430404
c0840100
bf8c0070
c2000929
bf8c007f
10180a00
c2000928
bf8c007f
d282000c
04300104
c200092a
bf8c007f
d282000c
04300106
c200092b
bf8c007f
d282000c
04300107
c200091e
bf8c007f
06181800
d206080c
0201010c
101c190b
081e1cf2
10180b0f
d2820010
0432130e
c2000925
bf8c007f
10180a00
c2000924
bf8c007f
d282000c
04300104
c2000926
bf8c007f
d282000c
04300106
c2000927
bf8c007f
d282000c
04300107
c200091d
bf8c007f
06181800
d206080c
0201010c
081a18f2
c8440100
c8450101
d2820011
0436230c
10282310
c2000930
c2008931
bf8c007f
7e200201
d2820003
04400103
c2000911
bf8c007f
10220600
c2000910
bf8c007f
10200400
c086030c
c0c80518
bf8c007f
f0800f00
00641010
bf8c0770
10282314
100420ff
3e59b3d0
7e0602ff
3f371759
d2820002
040a0711
7e0602ff
3d93dd98
d2820002
040a0712
7e0602ff
3a83126f
d00c0000
02020702
d2000002
00020503
7e045502
102a0511
c2000921
bf8c007f
10060a00
c2000920
bf8c007f
d2820003
040c0104
c2000922
bf8c007f
d2820003
040c0106
c2000923
bf8c007f
d2820003
040c0107
c200091c
bf8c007f
06060600
d2060803
02010103
10062703
d2820014
04522b03
102e28f6
c8540700
c8550701
c8500600
c8510601
c0800308
c0c60510
bf8c007f
f0800700
00031414
c200092d
bf8c0070
d2820019
045e2a00
c85c0b00
c85d0b01
c2000917
bf8c007f
d00c0002
02000117
7e300200
d2000017
000a2f18
d2060817
02010117
08302ef2
c2000915
bf8c007f
10343000
d2820019
046a3317
1034090f
d282001a
046a110e
c86c0000
c86d0001
d282001b
0436370c
1034371a
1034211a
10360510
d282001a
046a3703
103434f6
c200092c
bf8c007f
d282001a
046a2800
c2000914
bf8c007f
10363000
d282001a
046e3517
5e32331a
101e0d0f
d2820008
043e150e
c8240200
c8250201
d2820009
0436130c
10101308
10102508
10040512
d2820002
04220503
100404f6
c200092e
bf8c007f
d2820002
040a2c00
c2000916
bf8c007f
10063000
d2820002
040e0517
c80c0300
c80d0301
d2820000
0436070c
10000107
5e000102
f8001c0f
00190019
bf810000
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[19]
DCL OUT[2], GENERIC[20]
DCL OUT[3], GENERIC[21]
DCL OUT[4], GENERIC[22]
DCL OUT[5], GENERIC[23]
DCL CONST[0..206]
DCL TEMP[0..7], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {3, 1, 2, 0}
  0: F2I TEMP[0], IN[2]
  1: MOV TEMP[1].w, IMM[0].xxxx
  2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[12].xyzz, CONST[11].xyzz
  3: MOV TEMP[2].w, IMM[0].xxxx
  4: MOV TEMP[2].xyz, IN[3].xyzx
  5: UMUL TEMP[3].x, IMM[1].xxxx, TEMP[0].wwww
  6: UMUL TEMP[4].x, IMM[1].xxxx, TEMP[0].zzzz
  7: UMUL TEMP[5].x, IMM[1].xxxx, TEMP[0].yyyy
  8: UMUL TEMP[6].x, IMM[1].xxxx, TEMP[0].xxxx
  9: UARL ADDR[0].x, TEMP[6].xxxx
 10: MUL TEMP[6], CONST[ADDR[0].x+15], IN[1].xxxx
 11: UARL ADDR[0].x, TEMP[5].xxxx
 12: MAD TEMP[5], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[6]
 13: UARL ADDR[0].x, TEMP[4].xxxx
 14: MAD TEMP[4], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[5]
 15: UARL ADDR[0].x, TEMP[3].xxxx
 16: UARL ADDR[0].x, TEMP[3].xxxx
 17: MAD TEMP[3], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[4]
 18: UMAD TEMP[4].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].yyyy
 19: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].yyyy
 20: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].yyyy
 21: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].yyyy
 22: UARL ADDR[0].x, TEMP[7].xxxx
 23: MUL TEMP[7], CONST[ADDR[0].x+15], IN[1].xxxx
 24: UARL ADDR[0].x, TEMP[6].xxxx
 25: MAD TEMP[6], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[7]
 26: UARL ADDR[0].x, TEMP[5].xxxx
 27: MAD TEMP[5], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[6]
 28: UARL ADDR[0].x, TEMP[4].xxxx
 29: UARL ADDR[0].x, TEMP[4].xxxx
 30: MAD TEMP[4], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[5]
 31: UMAD TEMP[5].x, IMM[1].xxxx, TEMP[0].wwww, IMM[1].zzzz
 32: UMAD TEMP[6].x, IMM[1].xxxx, TEMP[0].zzzz, IMM[1].zzzz
 33: UMAD TEMP[7].x, IMM[1].xxxx, TEMP[0].yyyy, IMM[1].zzzz
 34: UMAD TEMP[0].x, IMM[1].xxxx, TEMP[0].xxxx, IMM[1].zzzz
 35: UARL ADDR[0].x, TEMP[0].xxxx
 36: MUL TEMP[0], CONST[ADDR[0].x+15], IN[1].xxxx
 37: UARL ADDR[0].x, TEMP[7].xxxx
 38: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].yyyy, TEMP[0]
 39: UARL ADDR[0].x, TEMP[6].xxxx
 40: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].zzzz, TEMP[0]
 41: UARL ADDR[0].x, TEMP[5].xxxx
 42: UARL ADDR[0].x, TEMP[5].xxxx
 43: MAD TEMP[0], CONST[ADDR[0].x+15], IN[1].wwww, TEMP[0]
 44: DP4 TEMP[5].x, TEMP[1], TEMP[3]
 45: DP4 TEMP[6].x, TEMP[1], TEMP[4]
 46: DP4 TEMP[1].x, TEMP[1], TEMP[0]
 47: DP4 TEMP[3].x, TEMP[2], TEMP[3]
 48: DP4 TEMP[4].x, TEMP[2], TEMP[4]
 49: MOV TEMP[3].y, TEMP[4].xxxx
 50: DP4 TEMP[0].x, TEMP[2], TEMP[0]
 51: MOV TEMP[3].z, TEMP[0].xxxx
 52: MUL TEMP[0], CONST[4], TEMP[5].xxxx
 53: MAD TEMP[0], CONST[5], TEMP[6].xxxx, TEMP[0]
 54: MAD TEMP[0], CONST[6], TEMP[1].xxxx, TEMP[0]
 55: ADD TEMP[0], TEMP[0], CONST[7]
 56: MOV TEMP[2].w, IMM[0].xxxx
 57: MOV TEMP[2].xyz, CONST[8].xyzx
 58: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[10].wwww
 59: MUL TEMP[4], CONST[0], TEMP[3].xxxx
 60: MAD TEMP[4], CONST[1], TEMP[3].yyyy, TEMP[4]
 61: MAD TEMP[3].xyz, CONST[2], TEMP[3].zzzz, TEMP[4]
 62: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
 63: RSQ TEMP[4].x, TEMP[4].xxxx
 64: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 65: MUL TEMP[4], CONST[0], TEMP[5].xxxx
 66: MAD TEMP[4], CONST[1], TEMP[6].xxxx, TEMP[4]
 67: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[4]
 68: ADD TEMP[1].xyz, TEMP[1], CONST[3]
 69: ADD TEMP[1].xyz, TEMP[1].xyzz, -CONST[14].xyzz
 70: MAD TEMP[4].x, TEMP[0].zzzz, CONST[13].xxxx, CONST[13].yyyy
 71: MOV TEMP[1].w, TEMP[4].xxxx
 72: MAD TEMP[4].xy, IN[4].xyyy, CONST[9].xyyy, CONST[9].zwww
 73: MOV TEMP[4].zw, TEMP[3].yyxy
 74: MOV TEMP[3].x, TEMP[3].zzzz
 75: MOV OUT[2], IN[5]
 76: MOV OUT[1], TEMP[2]
 77: MOV OUT[4], TEMP[4]
 78: MOV OUT[5], TEMP[3]
 79: MOV OUT[3], TEMP[1]
 80: MOV OUT[0], TEMP[0]
 81: END
; ModuleID = 'tgsi'

define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
  %9 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
  %10 = load <16 x i8> addrspace(2)* %9, !tbaa !0
  %11 = call float @llvm.SI.load.const(<16 x i8> %10, i32 0)
  %12 = call float @llvm.SI.load.const(<16 x i8> %10, i32 4)
  %13 = call float @llvm.SI.load.const(<16 x i8> %10, i32 8)
  %14 = call float @llvm.SI.load.const(<16 x i8> %10, i32 16)
  %15 = call float @llvm.SI.load.const(<16 x i8> %10, i32 20)
  %16 = call float @llvm.SI.load.const(<16 x i8> %10, i32 24)
  %17 = call float @llvm.SI.load.const(<16 x i8> %10, i32 32)
  %18 = call float @llvm.SI.load.const(<16 x i8> %10, i32 36)
  %19 = call float @llvm.SI.load.const(<16 x i8> %10, i32 40)
  %20 = call float @llvm.SI.load.const(<16 x i8> %10, i32 48)
  %21 = call float @llvm.SI.load.const(<16 x i8> %10, i32 52)
  %22 = call float @llvm.SI.load.const(<16 x i8> %10, i32 56)
  %23 = call float @llvm.SI.load.const(<16 x i8> %10, i32 64)
  %24 = call float @llvm.SI.load.const(<16 x i8> %10, i32 68)
  %25 = call float @llvm.SI.load.const(<16 x i8> %10, i32 72)
  %26 = call float @llvm.SI.load.const(<16 x i8> %10, i32 76)
  %27 = call float @llvm.SI.load.const(<16 x i8> %10, i32 80)
  %28 = call float @llvm.SI.load.const(<16 x i8> %10, i32 84)
  %29 = call float @llvm.SI.load.const(<16 x i8> %10, i32 88)
  %30 = call float @llvm.SI.load.const(<16 x i8> %10, i32 92)
  %31 = call float @llvm.SI.load.const(<16 x i8> %10, i32 96)
  %32 = call float @llvm.SI.load.const(<16 x i8> %10, i32 100)
  %33 = call float @llvm.SI.load.const(<16 x i8> %10, i32 104)
  %34 = call float @llvm.SI.load.const(<16 x i8> %10, i32 108)
  %35 = call float @llvm.SI.load.const(<16 x i8> %10, i32 112)
  %36 = call float @llvm.SI.load.const(<16 x i8> %10, i32 116)
  %37 = call float @llvm.SI.load.const(<16 x i8> %10, i32 120)
  %38 = call float @llvm.SI.load.const(<16 x i8> %10, i32 124)
  %39 = call float @llvm.SI.load.const(<16 x i8> %10, i32 128)
  %40 = call float @llvm.SI.load.const(<16 x i8> %10, i32 132)
  %41 = call float @llvm.SI.load.const(<16 x i8> %10, i32 136)
  %42 = call float @llvm.SI.load.const(<16 x i8> %10, i32 144)
  %43 = call float @llvm.SI.load.const(<16 x i8> %10, i32 148)
  %44 = call float @llvm.SI.load.const(<16 x i8> %10, i32 152)
  %45 = call float @llvm.SI.load.const(<16 x i8> %10, i32 156)
  %46 = call float @llvm.SI.load.const(<16 x i8> %10, i32 172)
  %47 = call float @llvm.SI.load.const(<16 x i8> %10, i32 176)
  %48 = call float @llvm.SI.load.const(<16 x i8> %10, i32 180)
  %49 = call float @llvm.SI.load.const(<16 x i8> %10, i32 184)
  %50 = call float @llvm.SI.load.const(<16 x i8> %10, i32 192)
  %51 = call float @llvm.SI.load.const(<16 x i8> %10, i32 196)
  %52 = call float @llvm.SI.load.const(<16 x i8> %10, i32 200)
  %53 = call float @llvm.SI.load.const(<16 x i8> %10, i32 208)
  %54 = call float @llvm.SI.load.const(<16 x i8> %10, i32 212)
  %55 = call float @llvm.SI.load.const(<16 x i8> %10, i32 224)
  %56 = call float @llvm.SI.load.const(<16 x i8> %10, i32 228)
  %57 = call float @llvm.SI.load.const(<16 x i8> %10, i32 232)
  %58 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
  %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0
  %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %5)
  %61 = extractelement <4 x float> %60, i32 0
  %62 = extractelement <4 x float> %60, i32 1
  %63 = extractelement <4 x float> %60, i32 2
  %64 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
  %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0
  %66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %5)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = extractelement <4 x float> %66, i32 1
  %69 = extractelement <4 x float> %66, i32 2
  %70 = extractelement <4 x float> %66, i32 3
  %71 = getelementptr <16 x i8> addrspace(2)* %3, i32 2
  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
  %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %5)
  %74 = extractelement <4 x float> %73, i32 0
  %75 = extractelement <4 x float> %73, i32 1
  %76 = extractelement <4 x float> %73, i32 2
  %77 = extractelement <4 x float> %73, i32 3
  %78 = getelementptr <16 x i8> addrspace(2)* %3, i32 3
  %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0
  %80 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %5)
  %81 = extractelement <4 x float> %80, i32 0
  %82 = extractelement <4 x float> %80, i32 1
  %83 = extractelement <4 x float> %80, i32 2
  %84 = getelementptr <16 x i8> addrspace(2)* %3, i32 4
  %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0
  %86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %5)
  %87 = extractelement <4 x float> %86, i32 0
  %88 = extractelement <4 x float> %86, i32 1
  %89 = getelementptr <16 x i8> addrspace(2)* %3, i32 5
  %90 = load <16 x i8> addrspace(2)* %89, !tbaa !0
  %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %5)
  %92 = extractelement <4 x float> %91, i32 0
  %93 = extractelement <4 x float> %91, i32 1
  %94 = extractelement <4 x float> %91, i32 2
  %95 = extractelement <4 x float> %91, i32 3
  %96 = fptosi float %74 to i32
  %97 = fptosi float %75 to i32
  %98 = fptosi float %76 to i32
  %99 = fptosi float %77 to i32
  %100 = bitcast i32 %96 to float
  %101 = bitcast i32 %97 to float
  %102 = bitcast i32 %98 to float
  %103 = bitcast i32 %99 to float
  %104 = fmul float %61, %50
  %105 = fadd float %104, %47
  %106 = fmul float %62, %51
  %107 = fadd float %106, %48
  %108 = fmul float %63, %52
  %109 = fadd float %108, %49
  %110 = bitcast float %103 to i32
  %111 = mul i32 3, %110
  %112 = bitcast i32 %111 to float
  %113 = bitcast float %102 to i32
  %114 = mul i32 3, %113
  %115 = bitcast i32 %114 to float
  %116 = bitcast float %101 to i32
  %117 = mul i32 3, %116
  %118 = bitcast i32 %117 to float
  %119 = bitcast float %100 to i32
  %120 = mul i32 3, %119
  %121 = bitcast i32 %120 to float
  %122 = bitcast float %121 to i32
  %123 = shl i32 %122, 4
  %124 = add i32 %123, 240
  %125 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %124)
  %126 = fmul float %125, %67
  %127 = shl i32 %122, 4
  %128 = add i32 %127, 244
  %129 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %128)
  %130 = fmul float %129, %67
  %131 = shl i32 %122, 4
  %132 = add i32 %131, 248
  %133 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %132)
  %134 = fmul float %133, %67
  %135 = shl i32 %122, 4
  %136 = add i32 %135, 252
  %137 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %136)
  %138 = fmul float %137, %67
  %139 = bitcast float %118 to i32
  %140 = shl i32 %139, 4
  %141 = add i32 %140, 240
  %142 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %141)
  %143 = fmul float %142, %68
  %144 = fadd float %143, %126
  %145 = shl i32 %139, 4
  %146 = add i32 %145, 244
  %147 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %146)
  %148 = fmul float %147, %68
  %149 = fadd float %148, %130
  %150 = shl i32 %139, 4
  %151 = add i32 %150, 248
  %152 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %151)
  %153 = fmul float %152, %68
  %154 = fadd float %153, %134
  %155 = shl i32 %139, 4
  %156 = add i32 %155, 252
  %157 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %156)
  %158 = fmul float %157, %68
  %159 = fadd float %158, %138
  %160 = bitcast float %115 to i32
  %161 = shl i32 %160, 4
  %162 = add i32 %161, 240
  %163 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %162)
  %164 = fmul float %163, %69
  %165 = fadd float %164, %144
  %166 = shl i32 %160, 4
  %167 = add i32 %166, 244
  %168 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %167)
  %169 = fmul float %168, %69
  %170 = fadd float %169, %149
  %171 = shl i32 %160, 4
  %172 = add i32 %171, 248
  %173 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %172)
  %174 = fmul float %173, %69
  %175 = fadd float %174, %154
  %176 = shl i32 %160, 4
  %177 = add i32 %176, 252
  %178 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %177)
  %179 = fmul float %178, %69
  %180 = fadd float %179, %159
  %181 = bitcast float %112 to i32
  %182 = shl i32 %181, 4
  %183 = add i32 %182, 240
  %184 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %183)
  %185 = fmul float %184, %70
  %186 = fadd float %185, %165
  %187 = shl i32 %181, 4
  %188 = add i32 %187, 244
  %189 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %188)
  %190 = fmul float %189, %70
  %191 = fadd float %190, %170
  %192 = shl i32 %181, 4
  %193 = add i32 %192, 248
  %194 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %193)
  %195 = fmul float %194, %70
  %196 = fadd float %195, %175
  %197 = shl i32 %181, 4
  %198 = add i32 %197, 252
  %199 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %198)
  %200 = fmul float %199, %70
  %201 = fadd float %200, %180
  %202 = bitcast float %103 to i32
  %203 = mul i32 3, %202
  %204 = add i32 %203, 1
  %205 = bitcast i32 %204 to float
  %206 = bitcast float %102 to i32
  %207 = mul i32 3, %206
  %208 = add i32 %207, 1
  %209 = bitcast i32 %208 to float
  %210 = bitcast float %101 to i32
  %211 = mul i32 3, %210
  %212 = add i32 %211, 1
  %213 = bitcast i32 %212 to float
  %214 = bitcast float %100 to i32
  %215 = mul i32 3, %214
  %216 = add i32 %215, 1
  %217 = bitcast i32 %216 to float
  %218 = bitcast float %217 to i32
  %219 = shl i32 %218, 4
  %220 = add i32 %219, 240
  %221 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %220)
  %222 = fmul float %221, %67
  %223 = shl i32 %218, 4
  %224 = add i32 %223, 244
  %225 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %224)
  %226 = fmul float %225, %67
  %227 = shl i32 %218, 4
  %228 = add i32 %227, 248
  %229 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %228)
  %230 = fmul float %229, %67
  %231 = shl i32 %218, 4
  %232 = add i32 %231, 252
  %233 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %232)
  %234 = fmul float %233, %67
  %235 = bitcast float %213 to i32
  %236 = shl i32 %235, 4
  %237 = add i32 %236, 240
  %238 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %237)
  %239 = fmul float %238, %68
  %240 = fadd float %239, %222
  %241 = shl i32 %235, 4
  %242 = add i32 %241, 244
  %243 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %242)
  %244 = fmul float %243, %68
  %245 = fadd float %244, %226
  %246 = shl i32 %235, 4
  %247 = add i32 %246, 248
  %248 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %247)
  %249 = fmul float %248, %68
  %250 = fadd float %249, %230
  %251 = shl i32 %235, 4
  %252 = add i32 %251, 252
  %253 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %252)
  %254 = fmul float %253, %68
  %255 = fadd float %254, %234
  %256 = bitcast float %209 to i32
  %257 = shl i32 %256, 4
  %258 = add i32 %257, 240
  %259 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %258)
  %260 = fmul float %259, %69
  %261 = fadd float %260, %240
  %262 = shl i32 %256, 4
  %263 = add i32 %262, 244
  %264 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %263)
  %265 = fmul float %264, %69
  %266 = fadd float %265, %245
  %267 = shl i32 %256, 4
  %268 = add i32 %267, 248
  %269 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %268)
  %270 = fmul float %269, %69
  %271 = fadd float %270, %250
  %272 = shl i32 %256, 4
  %273 = add i32 %272, 252
  %274 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %273)
  %275 = fmul float %274, %69
  %276 = fadd float %275, %255
  %277 = bitcast float %205 to i32
  %278 = shl i32 %277, 4
  %279 = add i32 %278, 240
  %280 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %279)
  %281 = fmul float %280, %70
  %282 = fadd float %281, %261
  %283 = shl i32 %277, 4
  %284 = add i32 %283, 244
  %285 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %284)
  %286 = fmul float %285, %70
  %287 = fadd float %286, %266
  %288 = shl i32 %277, 4
  %289 = add i32 %288, 248
  %290 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %289)
  %291 = fmul float %290, %70
  %292 = fadd float %291, %271
  %293 = shl i32 %277, 4
  %294 = add i32 %293, 252
  %295 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %294)
  %296 = fmul float %295, %70
  %297 = fadd float %296, %276
  %298 = bitcast float %103 to i32
  %299 = mul i32 3, %298
  %300 = add i32 %299, 2
  %301 = bitcast i32 %300 to float
  %302 = bitcast float %102 to i32
  %303 = mul i32 3, %302
  %304 = add i32 %303, 2
  %305 = bitcast i32 %304 to float
  %306 = bitcast float %101 to i32
  %307 = mul i32 3, %306
  %308 = add i32 %307, 2
  %309 = bitcast i32 %308 to float
  %310 = bitcast float %100 to i32
  %311 = mul i32 3, %310
  %312 = add i32 %311, 2
  %313 = bitcast i32 %312 to float
  %314 = bitcast float %313 to i32
  %315 = shl i32 %314, 4
  %316 = add i32 %315, 240
  %317 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %316)
  %318 = fmul float %317, %67
  %319 = shl i32 %314, 4
  %320 = add i32 %319, 244
  %321 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %320)
  %322 = fmul float %321, %67
  %323 = shl i32 %314, 4
  %324 = add i32 %323, 248
  %325 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %324)
  %326 = fmul float %325, %67
  %327 = shl i32 %314, 4
  %328 = add i32 %327, 252
  %329 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %328)
  %330 = fmul float %329, %67
  %331 = bitcast float %309 to i32
  %332 = shl i32 %331, 4
  %333 = add i32 %332, 240
  %334 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %333)
  %335 = fmul float %334, %68
  %336 = fadd float %335, %318
  %337 = shl i32 %331, 4
  %338 = add i32 %337, 244
  %339 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %338)
  %340 = fmul float %339, %68
  %341 = fadd float %340, %322
  %342 = shl i32 %331, 4
  %343 = add i32 %342, 248
  %344 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %343)
  %345 = fmul float %344, %68
  %346 = fadd float %345, %326
  %347 = shl i32 %331, 4
  %348 = add i32 %347, 252
  %349 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %348)
  %350 = fmul float %349, %68
  %351 = fadd float %350, %330
  %352 = bitcast float %305 to i32
  %353 = shl i32 %352, 4
  %354 = add i32 %353, 240
  %355 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %354)
  %356 = fmul float %355, %69
  %357 = fadd float %356, %336
  %358 = shl i32 %352, 4
  %359 = add i32 %358, 244
  %360 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %359)
  %361 = fmul float %360, %69
  %362 = fadd float %361, %341
  %363 = shl i32 %352, 4
  %364 = add i32 %363, 248
  %365 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %364)
  %366 = fmul float %365, %69
  %367 = fadd float %366, %346
  %368 = shl i32 %352, 4
  %369 = add i32 %368, 252
  %370 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %369)
  %371 = fmul float %370, %69
  %372 = fadd float %371, %351
  %373 = bitcast float %301 to i32
  %374 = shl i32 %373, 4
  %375 = add i32 %374, 240
  %376 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %375)
  %377 = fmul float %376, %70
  %378 = fadd float %377, %357
  %379 = shl i32 %373, 4
  %380 = add i32 %379, 244
  %381 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %380)
  %382 = fmul float %381, %70
  %383 = fadd float %382, %362
  %384 = shl i32 %373, 4
  %385 = add i32 %384, 248
  %386 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %385)
  %387 = fmul float %386, %70
  %388 = fadd float %387, %367
  %389 = shl i32 %373, 4
  %390 = add i32 %389, 252
  %391 = call float @llvm.SI.load.const(<16 x i8> %10, i32 %390)
  %392 = fmul float %391, %70
  %393 = fadd float %392, %372
  %394 = fmul float %105, %186
  %395 = fmul float %107, %191
  %396 = fadd float %394, %395
  %397 = fmul float %109, %196
  %398 = fadd float %396, %397
  %399 = fmul float 1.000000e+00, %201
  %400 = fadd float %398, %399
  %401 = fmul float %105, %282
  %402 = fmul float %107, %287
  %403 = fadd float %401, %402
  %404 = fmul float %109, %292
  %405 = fadd float %403, %404
  %406 = fmul float 1.000000e+00, %297
  %407 = fadd float %405, %406
  %408 = fmul float %105, %378
  %409 = fmul float %107, %383
  %410 = fadd float %408, %409
  %411 = fmul float %109, %388
  %412 = fadd float %410, %411
  %413 = fmul float 1.000000e+00, %393
  %414 = fadd float %412, %413
  %415 = fmul float %81, %186
  %416 = fmul float %82, %191
  %417 = fadd float %415, %416
  %418 = fmul float %83, %196
  %419 = fadd float %417, %418
  %420 = fmul float 1.000000e+00, %201
  %421 = fadd float %419, %420
  %422 = fmul float %81, %282
  %423 = fmul float %82, %287
  %424 = fadd float %422, %423
  %425 = fmul float %83, %292
  %426 = fadd float %424, %425
  %427 = fmul float 1.000000e+00, %297
  %428 = fadd float %426, %427
  %429 = fmul float %81, %378
  %430 = fmul float %82, %383
  %431 = fadd float %429, %430
  %432 = fmul float %83, %388
  %433 = fadd float %431, %432
  %434 = fmul float 1.000000e+00, %393
  %435 = fadd float %433, %434
  %436 = fmul float %23, %400
  %437 = fmul float %24, %400
  %438 = fmul float %25, %400
  %439 = fmul float %26, %400
  %440 = fmul float %27, %407
  %441 = fadd float %440, %436
  %442 = fmul float %28, %407
  %443 = fadd float %442, %437
  %444 = fmul float %29, %407
  %445 = fadd float %444, %438
  %446 = fmul float %30, %407
  %447 = fadd float %446, %439
  %448 = fmul float %31, %414
  %449 = fadd float %448, %441
  %450 = fmul float %32, %414
  %451 = fadd float %450, %443
  %452 = fmul float %33, %414
  %453 = fadd float %452, %445
  %454 = fmul float %34, %414
  %455 = fadd float %454, %447
  %456 = fadd float %449, %35
  %457 = fadd float %451, %36
  %458 = fadd float %453, %37
  %459 = fadd float %455, %38
  %460 = fmul float %421, %46
  %461 = fmul float %428, %46
  %462 = fmul float %435, %46
  %463 = fmul float %11, %460
  %464 = fmul float %12, %460
  %465 = fmul float %13, %460
  %466 = fmul float %14, %461
  %467 = fadd float %466, %463
  %468 = fmul float %15, %461
  %469 = fadd float %468, %464
  %470 = fmul float %16, %461
  %471 = fadd float %470, %465
  %472 = fmul float %17, %462
  %473 = fadd float %472, %467
  %474 = fmul float %18, %462
  %475 = fadd float %474, %469
  %476 = fmul float %19, %462
  %477 = fadd float %476, %471
  %478 = fmul float %473, %473
  %479 = fmul float %475, %475
  %480 = fadd float %479, %478
  %481 = fmul float %477, %477
  %482 = fadd float %480, %481
  %483 = call float @llvm.AMDGPU.rsq(float %482)
  %484 = fmul float %473, %483
  %485 = fmul float %475, %483
  %486 = fmul float %477, %483
  %487 = fmul float %11, %400
  %488 = fmul float %12, %400
  %489 = fmul float %13, %400
  %490 = fmul float %14, %407
  %491 = fadd float %490, %487
  %492 = fmul float %15, %407
  %493 = fadd float %492, %488
  %494 = fmul float %16, %407
  %495 = fadd float %494, %489
  %496 = fmul float %17, %414
  %497 = fadd float %496, %491
  %498 = fmul float %18, %414
  %499 = fadd float %498, %493
  %500 = fmul float %19, %414
  %501 = fadd float %500, %495
  %502 = fadd float %497, %20
  %503 = fadd float %499, %21
  %504 = fadd float %501, %22
  %505 = fsub float -0.000000e+00, %55
  %506 = fadd float %502, %505
  %507 = fsub float -0.000000e+00, %56
  %508 = fadd float %503, %507
  %509 = fsub float -0.000000e+00, %57
  %510 = fadd float %504, %509
  %511 = fmul float %458, %53
  %512 = fadd float %511, %54
  %513 = fmul float %87, %42
  %514 = fadd float %513, %44
  %515 = fmul float %88, %43
  %516 = fadd float %515, %45
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %39, float %40, float %41, float 1.000000e+00)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %92, float %93, float %94, float %95)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %506, float %508, float %510, float %512)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %514, float %516, float %484, float %485)
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %486, float %485, float %486, float %201)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %456, float %457, float %458, float %459)
  ret void
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #2

declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }

!0 = metadata !{metadata !"const", null, i32 1}
SI CODE:
c0800100
bf8c007f
c2020122
c2028121
c2040120
7e0202f2
bf8c007f
7e040208
7e060205
7e080204
f800020f
01040302
c0840714
bf8c000f
e00c2000
80020100
bf8c0770
f800021f
04030201
c0840708
bf8c000f
e00c2000
80020f00
bf8c0770
7e02110f
d2d6000c
02010701
34021884
4a0402ff
000000f0
e0301000
80000202
c0840704
bf8c0070
e00c2000
80020500
bf8c0770
10040b02
7e061110
d2d6000d
02010703
34061a84
4a0806ff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a0d04
7e081111
d2d6000e
02010704
34141c84
4a0814ff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a0f04
7e081112
d2d6000f
02010704
34161e84
4a0816ff
000000f0
e0301000
80000404
bf8c0770
d2820002
040a1104
4a0802ff
000000f4
e0301000
80000404
bf8c0770
10080b04
4a1206ff
000000f4
e0301000
80000909
bf8c0770
d2820004
04120d09
4a1214ff
000000f4
e0301000
80000909
bf8c0770
d2820004
04120f09
4a1216ff
000000f4
e0301000
80000909
bf8c0770
d2820004
04121109
c0840700
bf8c007f
e00c2000
80021300
c2020131
c202812d
bf8c0070
7e120205
d2820011
04240914
10120911
c2020130
c202812c
bf8c007f
7e200205
d2820012
04400913
d2820010
04260512
4a1202ff
000000f8
e0301000
80000909
bf8c0770
10120b09
4a2e06ff
000000f8
e0301000
80001717
bf8c0770
d2820009
04260d17
4a2e14ff
000000f8
e0301000
80001717
bf8c0770
d2820009
04260f17
4a2e16ff
000000f8
e0301000
80001717
bf8c0770
d2820009
04261117
c2020132
c202812e
bf8c007f
7e2e0205
d2820014
045c0915
d2820010
04421314
4a0202ff
000000fc
e0301000
80000101
bf8c0770
10020b01
4a0606ff
000000fc
e0301000
80000303
bf8c0770
d2820001
04060d03
4a0614ff
000000fc
e0301000
80000303
bf8c0770
d2820001
04060f03
4a0616ff
000000fc
e0301000
80000303
bf8c0770
d2820003
04061103
06020710
c2020112
bf8c007f
102c0204
4a141881
34141484
4a1614ff
000000f0
e0301000
80000b0b
bf8c0770
10160b0b
4a201a81
342a2084
4a202aff
000000f0
e0301000
80001010
bf8c0770
d282000b
042e0d10
4a201c81
342e2084
4a202eff
000000f0
e0301000
80001010
bf8c0770
d282000b
042e0f10
4a201e81
34302084
4a2030ff
000000f0
e0301000
80001010
bf8c0770
d282000b
042e1110
4a2014ff
000000f4
e0301000
80001010
bf8c0770
10200b10
4a262aff
000000f4
e0301000
80001313
bf8c0770
d2820010
04420d13
4a262eff
000000f4
e0301000
80001313
bf8c0770
d2820010
04420f13
4a2630ff
000000f4
e0301000
80001313
bf8c0770
d2820010
04421113
10262111
d2820019
044e1712
4a2614ff
000000f8
e0301000
80001313
bf8c0770
10260b13
4a342aff
000000f8
e0301000
80001a1a
bf8c0770
d2820013
044e0d1a
4a342eff
000000f8
e0301000
80001a1a
bf8c0770
d2820013
044e0f1a
4a3430ff
000000f8
e0301000
80001a1a
bf8c0770
d2820013
044e111a
d2820019
04662714
4a1414ff
000000fc
e0301000
80000a0a
bf8c0770
10140b0a
4a2a2aff
000000fc
e0301000
80001515
bf8c0770
d282000a
042a0d15
4a2a2eff
000000fc
e0301000
80001515
bf8c0770
d282000a
042a0f15
4a2a30ff
000000fc
e0301000
80001515
bf8c0770
d2820015
042a1115
06142b19
c2020116
bf8c007f
d2820016
045a1404
4a181882
342e1884
4a182eff
000000f0
e0301000
80000c0c
bf8c0770
10180b0c
4a1a1a82
34301a84
4a1a30ff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
04320d0d
4a1a1c82
34321a84
4a1a32ff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
04320f0d
4a1a1e82
341e1a84
4a1a1eff
000000f0
e0301000
80000d0d
bf8c0770
d282000c
0432110d
4a1a2eff
000000f4
e0301000
80000d0d
bf8c0770
101a0b0d
4a1c30ff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
04360d0e
4a1c32ff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
04360f0e
4a1c1eff
000000f4
e0301000
80000e0e
bf8c0770
d282000d
0436110e
101c1b11
d2820011
043a1912
4a1c2eff
000000f8
e0301000
80000e0e
bf8c0770
101c0b0e
4a2430ff
000000f8
e0301000
80001212
bf8c0770
d282000e
043a0d12
4a2432ff
000000f8
e0301000
80001212
bf8c0770
d282000e
043a0f12
4a241eff
000000f8
e0301000
80001212
bf8c0770
d282000e
043a1112
d2820011
04461d14
4a242eff
000000fc
e0301000
80001212
bf8c0770
10240b12
4a2830ff
000000fc
e0301000
80001414
bf8c0770
d2820012
044a0d14
4a2832ff
000000fc
e0301000
80001414
bf8c0770
d2820012
044a0f14
4a1e1eff
000000fc
e0301000
80000f0f
bf8c0770
d2820007
044a110f
060a0f11
c202011a
bf8c007f
d2820006
045a0a04
c202011e
bf8c007f
060c0c04
c2020134
c2028135
bf8c007f
7e100205
d2820008
04200906
c2020102
bf8c007f
101e0204
c2028106
bf8c007f
d282000f
043e1405
c204010a
bf8c007f
d282000f
043e0a08
c204810e
bf8c007f
061e1e09
c204813a
bf8c007f
0a1e1e09
c2070101
bf8c007f
1022020e
c2050105
bf8c007f
d2820011
0446140a
c2048109
bf8c007f
d2820011
04460a09
c205810d
bf8c007f
0622220b
c2058139
bf8c007f
0a22220b
c2058100
bf8c007f
1024020b
c2060104
bf8c007f
d2820012
044a140c
c2068108
bf8c007f
d2820012
044a0a0d
c207810c
bf8c007f
0624240f
c2078138
bf8c007f
0a24240f
f800022f
080f1112
c088070c
bf8c000f
e00c2000
80041600
bf8c0770
10080917
d2820002
04120516
d2820002
040a1318
06040702
c207812b
bf8c007f
1004040f
1010040e
10082117
d2820004
04121716
d2820004
04122718
06082b04
1008080f
d2820009
0422080a
10101b17
d2820008
04221916
d2820008
04221d18
060e0f08
10100e0f
d2820007
04261009
1012040b
d2820009
0426080c
d2820009
0426100d
10161309
d282000b
042e0f07
10040404
d2820002
040a0805
d2820002
040a1008
d2820004
042e0502
7e085b04
100e0907
10100909
c0820710
bf8c007f
e00c2000
80010b00
c2020125
c2028127
bf8c0070
7e000205
d2820000
0400090c
c2020124
c2028126
bf8c007f
7e120205
d2820009
0424090b
f800023f
07080009
bf8c070f
10000902
f800024f
03000700
c2020113
bf8c000f
10000204
c2020117
bf8c007f
d2820000
04021404
c202011b
bf8c007f
d2820000
04020a04
c202011f
bf8c007f
06000004
c2020111
bf8c007f
10040204
c2020115
bf8c007f
d2820002
040a1404
c2020119
bf8c007f
d2820002
040a0a04
c202011d
bf8c007f
06040404
c2020110
bf8c007f
10020204
c2020114
bf8c007f
d2820001
04061404
c2020118
bf8c007f
d2820001
04060a04
c200011c
bf8c007f
06020200
f80008cf
00060201
bf810000
Game removed: AppID 91200 "Anomaly Warzone Earth", ProcID 4947 
Generating new string page texture 134: 384x256, total string texture memory is 909,31 KB
unlinked 2 orphaned pipes
CAsyncIOManager: 0 threads terminating.  0 reads, 0 writes, 0 deferrals.
CAsyncIOManager: 12957 single object sleeps, 0 multi object sleeps
CAsyncIOManager: 0 single object alertable sleeps, 2 multi object alertable sleeps
Shutting down. . .
[2013-07-20 12:20:09] Shutdown