72615 – [llvmpipe] piglit copyteximage 1D regression

Bug 72615 - [llvmpipe] piglit copyteximage 1D regression

Summary: [llvmpipe] piglit copyteximage 1D regression

Status:	RESOLVED FIXED

Alias:	None

Product:	Mesa
Classification:	Unclassified
Component:	Other (show other bugs)
Version:	git
Hardware:	x86-64 (AMD64) Linux (All)

Importance:	medium normal
Assignee:	mesa-dev
QA Contact:

URL:
Whiteboard:
Keywords:	regression

Depends on:
Blocks:

Reported:	2013-12-11 22:10 UTC by Vinson Lee
Modified:	2014-01-01 02:27 UTC (History)
CC List:	3 users (show)

See Also:
i915 platform:
i915 features:

Attachments

Description Vinson Lee 2013-12-11 22:10:28 UTC

mesa: e84a1ab3c400f819408a7ebe01c2325cd59d94d3 (master)

$ ./bin/copyteximage 1D -auto
Testing GL_TEXTURE_1D
Texture target = GL_TEXTURE_1D, Internal format = GL_RED
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RG
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RGB8
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RGB16
Illegal sampler view creation without bind flag
Texture target = GL_TEXTURE_1D, Internal format = GL_RGB16F
Illegal sampler view creation without bind flag
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.stmxcsr
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.stmxcsr
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.ldmxcsr
Intrinsic has incorrect argument type!
void (i32*)* @llvm.x86.sse.ldmxcsr
Broken module found, verification continues.

define void @fs5_variant4_partial({ [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* noalias %context, i32 %x, i32 %y, i32, float* noalias %a0, float* noalias %dadx, float* noalias %dady, <16 x i8>** noalias %color_ptr_ptr, i8* noalias %depth, i32 %mask_input, { i64, i32 }* noalias %thread_data, i32* noalias %stride_ptr, i32 %depth_stride) {
entry:
  %mxcsr_ptr10 = alloca i32
  %mxcsr_ptr = alloca i32
  %packed_var = alloca <16 x i8>
  %output8 = alloca <4 x float>
  %output7 = alloca <4 x float>
  %output6 = alloca <4 x float>
  %output = alloca <4 x float>
  %looplimiter = alloca i32
  %execution_mask = alloca <4 x i32>
  %color5 = alloca <4 x float>, i32 2
  %color4 = alloca <4 x float>, i32 2
  %color3 = alloca <4 x float>, i32 2
  %color = alloca <4 x float>, i32 2
  %loop_counter = alloca i32
  %1 = alloca <4 x float>
  %2 = alloca <4 x float>
  %3 = alloca <4 x float>
  %4 = alloca <4 x float>
  %5 = alloca <4 x float>
  %6 = alloca <4 x float>
  %mask_store = alloca <4 x i32>, i32 2
  %7 = sitofp i32 %x to float
  %8 = sitofp i32 %y to float
  %9 = getelementptr float* %dadx, i32 0
  %10 = bitcast float* %9 to <4 x float>*
  %pos.x.dadxaos = load <4 x float>* %10
  %11 = getelementptr float* %dady, i32 0
  %12 = bitcast float* %11 to <4 x float>*
  %pos.x.dadyaos = load <4 x float>* %12
  %13 = getelementptr float* %a0, i32 0
  %14 = bitcast float* %13 to <4 x float>*
  %pos.x.a0aos = load <4 x float>* %14
  %15 = insertelement <4 x float> undef, float %7, i32 0
  %16 = shufflevector <4 x float> %15, <4 x float> undef, <4 x i32> zeroinitializer
  %17 = fmul <4 x float> %16, %pos.x.dadxaos
  %18 = insertelement <4 x float> undef, float %8, i32 0
  %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
  %20 = fmul <4 x float> %19, %pos.x.dadyaos
  %21 = fadd <4 x float> %pos.x.a0aos, %20
  %22 = fadd <4 x float> %21, %17
  %23 = fadd float %7, 5.000000e-01
  %24 = insertelement <4 x float> undef, float %23, i32 0
  %25 = shufflevector <4 x float> %24, <4 x float> undef, <4 x i32> zeroinitializer
  %pos.x.a = fadd <4 x float> %25, <float 0.000000e+00, float 2.000000e+00, float 0.000000e+00, float 2.000000e+00>
  store <4 x float> zeroinitializer, <4 x float>* %6
  store <4 x float> %pos.x.a, <4 x float>* %6
  %26 = fadd float %8, 5.000000e-01
  %27 = insertelement <4 x float> undef, float %26, i32 0
  %28 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer
  %pos.y.a = fadd <4 x float> %28, <float 0.000000e+00, float 0.000000e+00, float 2.000000e+00, float 2.000000e+00>
  store <4 x float> zeroinitializer, <4 x float>* %5
  store <4 x float> %pos.y.a, <4 x float>* %5
  %29 = shufflevector <4 x float> %pos.x.dadxaos, <4 x float> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
  %30 = shufflevector <4 x float> %pos.x.dadyaos, <4 x float> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
  %31 = shufflevector <4 x float> %22, <4 x float> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
  %32 = fmul <4 x float> %29, <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>
  %33 = fmul <4 x float> %30, <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>
  %pos.z.dadq = fadd <4 x float> %32, %33
  %34 = fadd <4 x float> %pos.z.dadq, %pos.z.dadq
  %pos.z.a = fadd <4 x float> %31, %34
  store <4 x float> zeroinitializer, <4 x float>* %4
  store <4 x float> %pos.z.a, <4 x float>* %4
  %35 = shufflevector <4 x float> %pos.x.dadxaos, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %36 = shufflevector <4 x float> %pos.x.dadyaos, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %37 = shufflevector <4 x float> %22, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %38 = fmul <4 x float> %35, <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>
  %39 = fmul <4 x float> %36, <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>
  %pos.w.dadq = fadd <4 x float> %38, %39
  %40 = fadd <4 x float> %pos.w.dadq, %pos.w.dadq
  %pos.w.a = fadd <4 x float> %37, %40
  store <4 x float> zeroinitializer, <4 x float>* %3
  store <4 x float> %pos.w.a, <4 x float>* %3
  %41 = getelementptr float* %dadx, i32 4
  %42 = bitcast float* %41 to <4 x float>*
  %input0.x.dadxaos = load <4 x float>* %42
  %43 = getelementptr float* %dady, i32 4
  %44 = bitcast float* %43 to <4 x float>*
  %input0.x.dadyaos = load <4 x float>* %44
  %45 = getelementptr float* %a0, i32 4
  %46 = bitcast float* %45 to <4 x float>*
  %input0.x.a0aos = load <4 x float>* %46
  %47 = insertelement <4 x float> undef, float %7, i32 0
  %48 = shufflevector <4 x float> %47, <4 x float> undef, <4 x i32> zeroinitializer
  %49 = fmul <4 x float> %48, %input0.x.dadxaos
  %50 = insertelement <4 x float> undef, float %8, i32 0
  %51 = shufflevector <4 x float> %50, <4 x float> undef, <4 x i32> zeroinitializer
  %52 = fmul <4 x float> %51, %input0.x.dadyaos
  %53 = fadd <4 x float> %input0.x.a0aos, %52
  %54 = fadd <4 x float> %53, %49
  %55 = shufflevector <4 x float> %input0.x.dadxaos, <4 x float> undef, <4 x i32> zeroinitializer
  %56 = shufflevector <4 x float> %input0.x.dadyaos, <4 x float> undef, <4 x i32> zeroinitializer
  %57 = shufflevector <4 x float> %54, <4 x float> undef, <4 x i32> zeroinitializer
  %58 = fmul <4 x float> %55, <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>
  %59 = fmul <4 x float> %56, <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>
  %input0.x.dadq = fadd <4 x float> %58, %59
  %60 = fadd <4 x float> %input0.x.dadq, %input0.x.dadq
  %input0.x.a = fadd <4 x float> %57, %60
  store <4 x float> zeroinitializer, <4 x float>* %2
  store <4 x float> %input0.x.a, <4 x float>* %2
  %61 = shufflevector <4 x float> %input0.x.dadxaos, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  %62 = shufflevector <4 x float> %input0.x.dadyaos, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  %63 = shufflevector <4 x float> %54, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  %64 = fmul <4 x float> %61, <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>
  %65 = fmul <4 x float> %62, <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>
  %input0.y.dadq = fadd <4 x float> %64, %65
  %66 = fadd <4 x float> %input0.y.dadq, %input0.y.dadq
  %input0.y.a = fadd <4 x float> %63, %66
  store <4 x float> zeroinitializer, <4 x float>* %1
  store <4 x float> %input0.y.a, <4 x float>* %1
  %mask_ptr = getelementptr <4 x i32>* %mask_store, i32 0
  %67 = lshr i32 %mask_input, 0
  %68 = insertelement <4 x i32> undef, i32 %67, i32 0
  %69 = shufflevector <4 x i32> %68, <4 x i32> undef, <4 x i32> zeroinitializer
  %70 = and <4 x i32> %69, <i32 1, i32 2, i32 16, i32 32>
  %71 = icmp ne <4 x i32> %70, zeroinitializer
  %72 = sext <4 x i1> %71 to <4 x i32>
  store <4 x i32> %72, <4 x i32>* %mask_ptr
  %mask_ptr1 = getelementptr <4 x i32>* %mask_store, i32 1
  %73 = lshr i32 %mask_input, 2
  %74 = insertelement <4 x i32> undef, i32 %73, i32 0
  %75 = shufflevector <4 x i32> %74, <4 x i32> undef, <4 x i32> zeroinitializer
  %76 = and <4 x i32> %75, <i32 1, i32 2, i32 16, i32 32>
  %77 = icmp ne <4 x i32> %76, zeroinitializer
  %78 = sext <4 x i1> %77 to <4 x i32>
  store <4 x i32> %78, <4 x i32>* %mask_ptr1
  %context.stencil_ref_front_ptr = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 2
  %context.stencil_ref_front = load i32* %context.stencil_ref_front_ptr
  %context.stencil_ref_back_ptr = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 3
  %context.stencil_ref_back = load i32* %context.stencil_ref_back_ptr
  %context.constants_ptr = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 0
  store i32 0, i32* %loop_counter
  store i32 0, i32* %loop_counter
  br label %loop_begin

loop_begin:                                       ; preds = %skip, %entry
  %79 = load i32* %loop_counter
  %80 = icmp ult i32 %79, 2
  br i1 %80, label %loop_body, label %loop_exit

loop_body:                                        ; preds = %loop_begin
  %mask_ptr2 = getelementptr <4 x i32>* %mask_store, i32 %79
  %81 = load <4 x i32>* %mask_ptr2
  store <4 x i32> zeroinitializer, <4 x i32>* %execution_mask
  store <4 x i32> %81, <4 x i32>* %execution_mask
  %82 = load <4 x i32>* %execution_mask
  %83 = bitcast <4 x i32> %82 to i128
  %84 = icmp eq i128 %83, 0
  br i1 %84, label %skip, label %85

; <label>:85                                      ; preds = %loop_body
  %86 = bitcast <4 x float>* %6 to float*
  %87 = getelementptr float* %86, i32 %79
  %88 = load float* %87
  %89 = insertelement <4 x float> undef, float %88, i32 0
  %90 = shufflevector <4 x float> %89, <4 x float> undef, <4 x i32> zeroinitializer
  %pos.x = fadd <4 x float> %90, <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>
  %91 = bitcast <4 x float>* %5 to float*
  %92 = getelementptr float* %91, i32 %79
  %93 = load float* %92
  %94 = insertelement <4 x float> undef, float %93, i32 0
  %95 = shufflevector <4 x float> %94, <4 x float> undef, <4 x i32> zeroinitializer
  %pos.y = fadd <4 x float> %95, <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00>
  %96 = bitcast <4 x float>* %4 to float*
  %97 = getelementptr float* %96, i32 %79
  %98 = load float* %97
  %99 = insertelement <4 x float> undef, float %98, i32 0
  %100 = shufflevector <4 x float> %99, <4 x float> undef, <4 x i32> zeroinitializer
  %101 = fadd <4 x float> %100, %pos.z.dadq
  %pos.z = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %101, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>)
  %102 = bitcast <4 x float>* %3 to float*
  %103 = getelementptr float* %102, i32 %79
  %104 = load float* %103
  %105 = insertelement <4 x float> undef, float %104, i32 0
  %106 = shufflevector <4 x float> %105, <4 x float> undef, <4 x i32> zeroinitializer
  %pos.w = fadd <4 x float> %106, %pos.w.dadq
  %107 = bitcast <4 x float>* %2 to float*
  %108 = getelementptr float* %107, i32 %79
  %109 = load float* %108
  %110 = insertelement <4 x float> undef, float %109, i32 0
  %111 = shufflevector <4 x float> %110, <4 x float> undef, <4 x i32> zeroinitializer
  %input0.x = fadd <4 x float> %111, %input0.x.dadq
  %112 = bitcast <4 x float>* %1 to float*
  %113 = getelementptr float* %112, i32 %79
  %114 = load float* %113
  %115 = insertelement <4 x float> undef, float %114, i32 0
  %116 = shufflevector <4 x float> %115, <4 x float> undef, <4 x i32> zeroinitializer
  %input0.y = fadd <4 x float> %116, %input0.y.dadq
  store i32 0, i32* %looplimiter
  store i32 65535, i32* %looplimiter
  store <4 x float> zeroinitializer, <4 x float>* %output
  store <4 x float> zeroinitializer, <4 x float>* %output6
  store <4 x float> zeroinitializer, <4 x float>* %output7
  store <4 x float> zeroinitializer, <4 x float>* %output8
  %117 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 0
  %context.texture0.width = load i32* %117
  %context.texture0.row_stride = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 6
  %context.texture0.img_stride = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 7
  %118 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 5
  %context.texture0.base_ptr = load i8** %118
  %context.texture0.mip_offsets = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 8
  %119 = insertelement <4 x i32> undef, i32 %context.texture0.width, i32 0
  %120 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 1
  %context.texture0.height = load i32* %120
  %121 = insertelement <4 x i32> %119, i32 %context.texture0.height, i32 1
  %122 = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 7, i32 0, i32 3
  %context.texture0.first_level = load i32* %122
  store <16 x i8> zeroinitializer, <16 x i8>* %packed_var
  %123 = insertelement <4 x i32> undef, i32 %context.texture0.first_level, i32 0
  %124 = shufflevector <4 x i32> %123, <4 x i32> undef, <4 x i32> zeroinitializer
  %minify = lshr <4 x i32> %121, %124
  %125 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %minify, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
  %126 = getelementptr [14 x i32]* %context.texture0.row_stride, i32 0, i32 %context.texture0.first_level
  %127 = load i32* %126
  %128 = insertelement <4 x i32> undef, i32 %127, i32 0
  %129 = shufflevector <4 x i32> %128, <4 x i32> undef, <4 x i32> zeroinitializer
  %130 = getelementptr [14 x i32]* %context.texture0.mip_offsets, i32 0, i32 %context.texture0.first_level
  %131 = load i32* %130
  %132 = getelementptr i8* %context.texture0.base_ptr, i32 %131
  %133 = shufflevector <4 x i32> %125, <4 x i32> undef, <4 x i32> zeroinitializer
  %134 = shufflevector <4 x i32> %125, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  %135 = shl <4 x i32> %125, <i32 8, i32 8, i32 8, i32 8>
  %136 = sitofp <4 x i32> %135 to <4 x float>
  %137 = shufflevector <4 x float> %136, <4 x float> undef, <4 x i32> zeroinitializer
  %138 = shufflevector <4 x float> %136, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  %139 = fmul <4 x float> %input0.x, %137
  %140 = fmul <4 x float> %input0.y, %138
  %141 = fptosi <4 x float> %139 to <4 x i32>
  %142 = fptosi <4 x float> %140 to <4 x i32>
  %143 = ashr <4 x i32> %141, <i32 8, i32 8, i32 8, i32 8>
  %144 = ashr <4 x i32> %142, <i32 8, i32 8, i32 8, i32 8>
  %145 = sub <4 x i32> %133, <i32 1, i32 1, i32 1, i32 1>
  %146 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %143, <4 x i32> zeroinitializer)
  %147 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %146, <4 x i32> %145)
  %148 = mul <4 x i32> %147, <i32 4, i32 4, i32 4, i32 4>
  %149 = sub <4 x i32> %134, <i32 1, i32 1, i32 1, i32 1>
  %150 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %144, <4 x i32> zeroinitializer)
  %151 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %150, <4 x i32> %149)
  %152 = mul <4 x i32> %151, %129
  %153 = add <4 x i32> %148, %152
  %154 = extractelement <4 x i32> %153, i32 0
  %155 = getelementptr i8* %132, i32 %154
  %156 = bitcast i8* %155 to i32*
  %157 = load i32* %156
  %158 = insertelement <4 x i32> undef, i32 %157, i32 0
  %159 = extractelement <4 x i32> %153, i32 1
  %160 = getelementptr i8* %132, i32 %159
  %161 = bitcast i8* %160 to i32*
  %162 = load i32* %161
  %163 = insertelement <4 x i32> %158, i32 %162, i32 1
  %164 = extractelement <4 x i32> %153, i32 2
  %165 = getelementptr i8* %132, i32 %164
  %166 = bitcast i8* %165 to i32*
  %167 = load i32* %166
  %168 = insertelement <4 x i32> %163, i32 %167, i32 2
  %169 = extractelement <4 x i32> %153, i32 3
  %170 = getelementptr i8* %132, i32 %169
  %171 = bitcast i8* %170 to i32*
  %172 = load i32* %171
  %173 = insertelement <4 x i32> %168, i32 %172, i32 3
  %174 = bitcast <4 x i32> %173 to <16 x i8>
  store <16 x i8> %174, <16 x i8>* %packed_var
  %175 = load <16 x i8>* %packed_var
  %176 = bitcast <16 x i8> %175 to <4 x i32>
  %177 = and <4 x i32> %176, <i32 255, i32 255, i32 255, i32 255>
  %178 = sitofp <4 x i32> %177 to <4 x float>
  %179 = fmul <4 x float> %178, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
  %180 = lshr <4 x i32> %176, <i32 8, i32 8, i32 8, i32 8>
  %181 = and <4 x i32> %180, <i32 255, i32 255, i32 255, i32 255>
  %182 = sitofp <4 x i32> %181 to <4 x float>
  %183 = fmul <4 x float> %182, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
  %184 = lshr <4 x i32> %176, <i32 16, i32 16, i32 16, i32 16>
  %185 = and <4 x i32> %184, <i32 255, i32 255, i32 255, i32 255>
  %186 = sitofp <4 x i32> %185 to <4 x float>
  %187 = fmul <4 x float> %186, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
  %188 = lshr <4 x i32> %176, <i32 24, i32 24, i32 24, i32 24>
  %189 = sitofp <4 x i32> %188 to <4 x float>
  %190 = fmul <4 x float> %189, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
  store <4 x float> %187, <4 x float>* %output
  store <4 x float> %183, <4 x float>* %output6
  store <4 x float> %179, <4 x float>* %output7
  store <4 x float> %190, <4 x float>* %output8
  %color0.r = load <4 x float>* %output
  %191 = getelementptr <4 x float>* %color, i32 %79
  store <4 x float> %color0.r, <4 x float>* %191
  %color0.g = load <4 x float>* %output6
  %192 = getelementptr <4 x float>* %color3, i32 %79
  store <4 x float> %color0.g, <4 x float>* %192
  %color0.b = load <4 x float>* %output7
  %193 = getelementptr <4 x float>* %color4, i32 %79
  store <4 x float> %color0.b, <4 x float>* %193
  %color0.a = load <4 x float>* %output8
  %194 = getelementptr <4 x float>* %color5, i32 %79
  store <4 x float> %color0.a, <4 x float>* %194
  br label %skip

skip:                                             ; preds = %85, %loop_body
  %195 = load <4 x i32>* %execution_mask
  store <4 x i32> %195, <4 x i32>* %mask_ptr2
  %196 = add i32 %79, 1
  store i32 %196, i32* %loop_counter
  br label %loop_begin

loop_exit:                                        ; preds = %loop_begin
  %197 = getelementptr <4 x i32>* %mask_store, i32 0
  %mask = load <4 x i32>* %197
  %198 = getelementptr <4 x float>* %color, i32 0
  %199 = getelementptr <4 x float>* %color3, i32 0
  %200 = getelementptr <4 x float>* %color4, i32 0
  %201 = getelementptr <4 x float>* %color5, i32 0
  %202 = getelementptr <4 x i32>* %mask_store, i32 1
  %mask9 = load <4 x i32>* %202
  %203 = getelementptr <4 x float>* %color, i32 1
  %204 = getelementptr <4 x float>* %color3, i32 1
  %205 = getelementptr <4 x float>* %color4, i32 1
  %206 = getelementptr <4 x float>* %color5, i32 1
  %207 = getelementptr <16 x i8>** %color_ptr_ptr, i32 0
  %color_ptr0 = load <16 x i8>** %207
  %208 = getelementptr i32* %stride_ptr, i32 0
  %209 = load i32* %208
  store i32 0, i32* %mxcsr_ptr
  call void @llvm.x86.sse.stmxcsr(i32* %mxcsr_ptr)
  store i32 0, i32* %mxcsr_ptr10
  call void @llvm.x86.sse.stmxcsr(i32* %mxcsr_ptr10)
  %mxcsr = load i32* %mxcsr_ptr10
  %210 = and i32 %mxcsr, -32833
  store i32 %210, i32* %mxcsr_ptr10
  call void @llvm.x86.sse.ldmxcsr(i32* %mxcsr_ptr10)
  %211 = load <4 x float>* %201
  %212 = load <4 x float>* %198
  %213 = load <4 x float>* %199
  %214 = load <4 x float>* %200
  %215 = load <4 x float>* %206
  %216 = load <4 x float>* %203
  %217 = load <4 x float>* %204
  %218 = load <4 x float>* %205
  %219 = shufflevector <4 x float> %212, <4 x float> %213, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  %220 = shufflevector <4 x float> %214, <4 x float> %211, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  %221 = shufflevector <4 x float> %212, <4 x float> %213, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
  %222 = shufflevector <4 x float> %214, <4 x float> %211, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
  %t0 = bitcast <4 x float> %219 to <2 x double>
  %t1 = bitcast <4 x float> %220 to <2 x double>
  %t2 = bitcast <4 x float> %221 to <2 x double>
  %t3 = bitcast <4 x float> %222 to <2 x double>
  %223 = shufflevector <2 x double> %t0, <2 x double> %t1, <2 x i32> <i32 0, i32 2>
  %224 = shufflevector <2 x double> %t0, <2 x double> %t1, <2 x i32> <i32 1, i32 3>
  %225 = shufflevector <2 x double> %t2, <2 x double> %t3, <2 x i32> <i32 0, i32 2>
  %226 = shufflevector <2 x double> %t2, <2 x double> %t3, <2 x i32> <i32 1, i32 3>
  %dst0 = bitcast <2 x double> %223 to <4 x float>
  %dst1 = bitcast <2 x double> %224 to <4 x float>
  %dst2 = bitcast <2 x double> %225 to <4 x float>
  %dst3 = bitcast <2 x double> %226 to <4 x float>
  %227 = shufflevector <4 x float> %216, <4 x float> %217, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  %228 = shufflevector <4 x float> %218, <4 x float> %215, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
  %229 = shufflevector <4 x float> %216, <4 x float> %217, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
  %230 = shufflevector <4 x float> %218, <4 x float> %215, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
  %t011 = bitcast <4 x float> %227 to <2 x double>
  %t112 = bitcast <4 x float> %228 to <2 x double>
  %t213 = bitcast <4 x float> %229 to <2 x double>
  %t314 = bitcast <4 x float> %230 to <2 x double>
  %231 = shufflevector <2 x double> %t011, <2 x double> %t112, <2 x i32> <i32 0, i32 2>
  %232 = shufflevector <2 x double> %t011, <2 x double> %t112, <2 x i32> <i32 1, i32 3>
  %233 = shufflevector <2 x double> %t213, <2 x double> %t314, <2 x i32> <i32 0, i32 2>
  %234 = shufflevector <2 x double> %t213, <2 x double> %t314, <2 x i32> <i32 1, i32 3>
  %dst015 = bitcast <2 x double> %231 to <4 x float>
  %dst116 = bitcast <2 x double> %232 to <4 x float>
  %dst217 = bitcast <2 x double> %233 to <4 x float>
  %dst318 = bitcast <2 x double> %234 to <4 x float>
  %context.f_blend_color_ptr = getelementptr { [16 x float*], float, i32, i32, i8*, float*, { float, float }*, [32 x { i32, i32, i32, i32, i32, i8*, [14 x i32], [14 x i32], [14 x i32] }], [16 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 5
  %context.f_blend_color = load float** %context.f_blend_color_ptr
  %235 = bitcast float* %context.f_blend_color to <4 x float>*
  %236 = getelementptr <4 x float>* %235, i32 0
  %237 = load <4 x float>* %236
  %238 = shufflevector <4 x float> %237, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %239 = shufflevector <4 x float> %237, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
  %240 = bitcast <4 x i32> %mask to <2 x i64>
  %241 = bitcast <4 x i32> %mask9 to <2 x i64>
  %242 = shufflevector <2 x i64> %240, <2 x i64> %241, <2 x i32> <i32 0, i32 2>
  %243 = shufflevector <2 x i64> %240, <2 x i64> %241, <2 x i32> <i32 1, i32 3>
  %244 = bitcast <2 x i64> %242 to <4 x i32>
  %245 = bitcast <2 x i64> %243 to <4 x i32>
  %246 = extractelement <4 x i32> %245, i32 3
  %247 = extractelement <4 x i32> %245, i32 2
  %248 = extractelement <4 x i32> %245, i32 1
  %249 = extractelement <4 x i32> %245, i32 0
  %250 = extractelement <4 x i32> %244, i32 3
  %251 = extractelement <4 x i32> %244, i32 2
  %252 = extractelement <4 x i32> %244, i32 1
  %253 = extractelement <4 x i32> %244, i32 0
  %254 = sext i32 %253 to i96
  %255 = bitcast i96 %254 to <3 x i32>
  %256 = shufflevector <3 x i32> %255, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %257 = sext i32 %252 to i96
  %258 = bitcast i96 %257 to <3 x i32>
  %259 = shufflevector <3 x i32> %258, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %260 = sext i32 %251 to i96
  %261 = bitcast i96 %260 to <3 x i32>
  %262 = shufflevector <3 x i32> %261, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %263 = sext i32 %250 to i96
  %264 = bitcast i96 %263 to <3 x i32>
  %265 = shufflevector <3 x i32> %264, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %266 = sext i32 %249 to i96
  %267 = bitcast i96 %266 to <3 x i32>
  %268 = shufflevector <3 x i32> %267, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %269 = sext i32 %248 to i96
  %270 = bitcast i96 %269 to <3 x i32>
  %271 = shufflevector <3 x i32> %270, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %272 = sext i32 %247 to i96
  %273 = bitcast i96 %272 to <3 x i32>
  %274 = shufflevector <3 x i32> %273, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %275 = sext i32 %246 to i96
  %276 = bitcast i96 %275 to <3 x i32>
  %277 = shufflevector <3 x i32> %276, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %278 = bitcast <4 x float> %211 to <2 x i64>
  %279 = bitcast <4 x float> %215 to <2 x i64>
  %280 = shufflevector <2 x i64> %278, <2 x i64> %279, <2 x i32> <i32 0, i32 2>
  %281 = shufflevector <2 x i64> %278, <2 x i64> %279, <2 x i32> <i32 1, i32 3>
  %282 = bitcast <2 x i64> %280 to <4 x float>
  %283 = bitcast <2 x i64> %281 to <4 x float>
  %284 = extractelement <4 x float> %283, i32 3
  %285 = extractelement <4 x float> %283, i32 2
  %286 = extractelement <4 x float> %283, i32 1
  %287 = extractelement <4 x float> %283, i32 0
  %288 = extractelement <4 x float> %282, i32 3
  %289 = extractelement <4 x float> %282, i32 2
  %290 = extractelement <4 x float> %282, i32 1
  %291 = extractelement <4 x float> %282, i32 0
  %292 = insertelement <4 x float> undef, float %291, i32 0
  %293 = shufflevector <4 x float> %292, <4 x float> undef, <4 x i32> zeroinitializer
  %294 = insertelement <4 x float> undef, float %290, i32 0
  %295 = shufflevector <4 x float> %294, <4 x float> undef, <4 x i32> zeroinitializer
  %296 = insertelement <4 x float> undef, float %289, i32 0
  %297 = shufflevector <4 x float> %296, <4 x float> undef, <4 x i32> zeroinitializer
  %298 = insertelement <4 x float> undef, float %288, i32 0
  %299 = shufflevector <4 x float> %298, <4 x float> undef, <4 x i32> zeroinitializer
  %300 = insertelement <4 x float> undef, float %287, i32 0
  %301 = shufflevector <4 x float> %300, <4 x float> undef, <4 x i32> zeroinitializer
  %302 = insertelement <4 x float> undef, float %286, i32 0
  %303 = shufflevector <4 x float> %302, <4 x float> undef, <4 x i32> zeroinitializer
  %304 = insertelement <4 x float> undef, float %285, i32 0
  %305 = shufflevector <4 x float> %304, <4 x float> undef, <4 x i32> zeroinitializer
  %306 = insertelement <4 x float> undef, float %284, i32 0
  %307 = shufflevector <4 x float> %306, <4 x float> undef, <4 x i32> zeroinitializer
  %308 = mul i32 0, %209
  %309 = add i32 0, %308
  %310 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %309
  %311 = bitcast i8* %310 to <3 x i16>*
  %312 = load <3 x i16>* %311, align 2
  %313 = mul i32 0, %209
  %314 = add i32 6, %313
  %315 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %314
  %316 = bitcast i8* %315 to <3 x i16>*
  %317 = load <3 x i16>* %316, align 2
  %318 = mul i32 0, %209
  %319 = add i32 12, %318
  %320 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %319
  %321 = bitcast i8* %320 to <3 x i16>*
  %322 = load <3 x i16>* %321, align 2
  %323 = mul i32 0, %209
  %324 = add i32 18, %323
  %325 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %324
  %326 = bitcast i8* %325 to <3 x i16>*
  %327 = load <3 x i16>* %326, align 2
  %328 = shufflevector <3 x i16> %312, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %329 = shufflevector <3 x i16> %317, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %330 = shufflevector <3 x i16> %322, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %331 = shufflevector <3 x i16> %327, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %332 = zext <4 x i16> %328 to <4 x i32>
  %333 = shl <4 x i32> %332, <i32 13, i32 13, i32 13, i32 13>
  %334 = and <4 x i32> %333, <i32 268427264, i32 268427264, i32 268427264, i32 268427264>
  %335 = bitcast <4 x i32> %334 to <4 x float>
  %336 = fmul <4 x float> %335, <float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
  %337 = fcmp uge <4 x float> %335, <float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
  %338 = sext <4 x i1> %337 to <4 x i32>
  %339 = bitcast <4 x float> %336 to <4 x i32>
  %340 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, %338
  %341 = or <4 x i32> %340, %339
  %342 = shl <4 x i32> %333, <i32 3, i32 3, i32 3, i32 3>
  %343 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %342
  %344 = or <4 x i32> %341, %343
  %345 = bitcast <4 x i32> %344 to <4 x float>
  %346 = zext <4 x i16> %329 to <4 x i32>
  %347 = shl <4 x i32> %346, <i32 13, i32 13, i32 13, i32 13>
  %348 = and <4 x i32> %347, <i32 268427264, i32 268427264, i32 268427264, i32 268427264>
  %349 = bitcast <4 x i32> %348 to <4 x float>
  %350 = fmul <4 x float> %349, <float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
  %351 = fcmp uge <4 x float> %349, <float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
  %352 = sext <4 x i1> %351 to <4 x i32>
  %353 = bitcast <4 x float> %350 to <4 x i32>
  %354 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, %352
  %355 = or <4 x i32> %354, %353
  %356 = shl <4 x i32> %347, <i32 3, i32 3, i32 3, i32 3>
  %357 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %356
  %358 = or <4 x i32> %355, %357
  %359 = bitcast <4 x i32> %358 to <4 x float>
  %360 = zext <4 x i16> %330 to <4 x i32>
  %361 = shl <4 x i32> %360, <i32 13, i32 13, i32 13, i32 13>
  %362 = and <4 x i32> %361, <i32 268427264, i32 268427264, i32 268427264, i32 268427264>
  %363 = bitcast <4 x i32> %362 to <4 x float>
  %364 = fmul <4 x float> %363, <float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
  %365 = fcmp uge <4 x float> %363, <float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
  %366 = sext <4 x i1> %365 to <4 x i32>
  %367 = bitcast <4 x float> %364 to <4 x i32>
  %368 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, %366
  %369 = or <4 x i32> %368, %367
  %370 = shl <4 x i32> %361, <i32 3, i32 3, i32 3, i32 3>
  %371 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %370
  %372 = or <4 x i32> %369, %371
  %373 = bitcast <4 x i32> %372 to <4 x float>
  %374 = zext <4 x i16> %331 to <4 x i32>
  %375 = shl <4 x i32> %374, <i32 13, i32 13, i32 13, i32 13>
  %376 = and <4 x i32> %375, <i32 268427264, i32 268427264, i32 268427264, i32 268427264>
  %377 = bitcast <4 x i32> %376 to <4 x float>
  %378 = fmul <4 x float> %377, <float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000, float 0x46F0000000000000>
  %379 = fcmp uge <4 x float> %377, <float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000, float 0x39F0000000000000>
  %380 = sext <4 x i1> %379 to <4 x i32>
  %381 = bitcast <4 x float> %378 to <4 x i32>
  %382 = and <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, %380
  %383 = or <4 x i32> %382, %381
  %384 = shl <4 x i32> %375, <i32 3, i32 3, i32 3, i32 3>
  %385 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %384
  %386 = or <4 x i32> %383, %385
  %387 = bitcast <4 x i32> %386 to <4 x float>
  %388 = bitcast <4 x i32> %256 to <4 x float>
  %389 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %345, <4 x float> %dst0, <4 x float> %388)
  %390 = bitcast <4 x i32> %259 to <4 x float>
  %391 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %359, <4 x float> %dst1, <4 x float> %390)
  %392 = bitcast <4 x i32> %262 to <4 x float>
  %393 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %373, <4 x float> %dst015, <4 x float> %392)
  %394 = bitcast <4 x i32> %265 to <4 x float>
  %395 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %387, <4 x float> %dst116, <4 x float> %394)
  %396 = bitcast <4 x float> %dst2 to <4 x i32>
  %397 = and <4 x i32> %396, %268
  %398 = xor <4 x i32> %268, <i32 -1, i32 -1, i32 -1, i32 -1>
  %399 = and <4 x i32> zeroinitializer, %398
  %400 = or <4 x i32> %397, %399
  %401 = bitcast <4 x i32> %400 to <4 x float>
  %402 = bitcast <4 x float> %dst3 to <4 x i32>
  %403 = and <4 x i32> %402, %271
  %404 = xor <4 x i32> %271, <i32 -1, i32 -1, i32 -1, i32 -1>
  %405 = and <4 x i32> zeroinitializer, %404
  %406 = or <4 x i32> %403, %405
  %407 = bitcast <4 x i32> %406 to <4 x float>
  %408 = bitcast <4 x float> %dst217 to <4 x i32>
  %409 = and <4 x i32> %408, %274
  %410 = xor <4 x i32> %274, <i32 -1, i32 -1, i32 -1, i32 -1>
  %411 = and <4 x i32> zeroinitializer, %410
  %412 = or <4 x i32> %409, %411
  %413 = bitcast <4 x i32> %412 to <4 x float>
  %414 = bitcast <4 x float> %dst318 to <4 x i32>
  %415 = and <4 x i32> %414, %277
  %416 = xor <4 x i32> %277, <i32 -1, i32 -1, i32 -1, i32 -1>
  %417 = and <4 x i32> zeroinitializer, %416
  %418 = or <4 x i32> %415, %417
  %419 = bitcast <4 x i32> %418 to <4 x float>
  %420 = bitcast <4 x float> %389 to <4 x i32>
  %421 = bitcast <4 x float> %389 to <4 x i32>
  %422 = and <4 x i32> %421, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %423 = bitcast <4 x i32> %422 to <4 x float>
  %424 = fmul <4 x float> %423, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %425 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %424, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %426 = bitcast <4 x float> %425 to <4 x i32>
  %427 = bitcast <4 x float> %389 to <4 x i32>
  %428 = and <4 x i32> %427, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %429 = bitcast <4 x i32> %428 to <4 x float>
  %430 = bitcast <4 x float> %429 to <4 x i32>
  %431 = icmp sgt <4 x i32> %430, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %432 = sext <4 x i1> %431 to <4 x i32>
  %433 = icmp eq <4 x i32> %430, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %434 = sext <4 x i1> %433 to <4 x i32>
  %435 = or <4 x i32> %432, %434
  %436 = and <4 x i32> %432, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %437 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %436
  %438 = bitcast <4 x i32> %435 to <16 x i8>
  %439 = bitcast <4 x i32> %437 to <16 x i8>
  %440 = bitcast <4 x i32> %426 to <16 x i8>
  %441 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %440, <16 x i8> %439, <16 x i8> %438)
  %442 = bitcast <16 x i8> %441 to <4 x i32>
  %443 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %420
  %444 = lshr <4 x i32> %443, <i32 3, i32 3, i32 3, i32 3>
  %445 = or <4 x i32> %444, %442
  %446 = ashr <4 x i32> %445, <i32 13, i32 13, i32 13, i32 13>
  %447 = trunc <4 x i32> %446 to <4 x i16>
  %448 = bitcast <4 x float> %391 to <4 x i32>
  %449 = bitcast <4 x float> %391 to <4 x i32>
  %450 = and <4 x i32> %449, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %451 = bitcast <4 x i32> %450 to <4 x float>
  %452 = fmul <4 x float> %451, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %453 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %452, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %454 = bitcast <4 x float> %453 to <4 x i32>
  %455 = bitcast <4 x float> %391 to <4 x i32>
  %456 = and <4 x i32> %455, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %457 = bitcast <4 x i32> %456 to <4 x float>
  %458 = bitcast <4 x float> %457 to <4 x i32>
  %459 = icmp sgt <4 x i32> %458, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %460 = sext <4 x i1> %459 to <4 x i32>
  %461 = icmp eq <4 x i32> %458, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %462 = sext <4 x i1> %461 to <4 x i32>
  %463 = or <4 x i32> %460, %462
  %464 = and <4 x i32> %460, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %465 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %464
  %466 = bitcast <4 x i32> %463 to <16 x i8>
  %467 = bitcast <4 x i32> %465 to <16 x i8>
  %468 = bitcast <4 x i32> %454 to <16 x i8>
  %469 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %468, <16 x i8> %467, <16 x i8> %466)
  %470 = bitcast <16 x i8> %469 to <4 x i32>
  %471 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %448
  %472 = lshr <4 x i32> %471, <i32 3, i32 3, i32 3, i32 3>
  %473 = or <4 x i32> %472, %470
  %474 = ashr <4 x i32> %473, <i32 13, i32 13, i32 13, i32 13>
  %475 = trunc <4 x i32> %474 to <4 x i16>
  %476 = bitcast <4 x float> %393 to <4 x i32>
  %477 = bitcast <4 x float> %393 to <4 x i32>
  %478 = and <4 x i32> %477, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %479 = bitcast <4 x i32> %478 to <4 x float>
  %480 = fmul <4 x float> %479, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %481 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %480, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %482 = bitcast <4 x float> %481 to <4 x i32>
  %483 = bitcast <4 x float> %393 to <4 x i32>
  %484 = and <4 x i32> %483, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %485 = bitcast <4 x i32> %484 to <4 x float>
  %486 = bitcast <4 x float> %485 to <4 x i32>
  %487 = icmp sgt <4 x i32> %486, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %488 = sext <4 x i1> %487 to <4 x i32>
  %489 = icmp eq <4 x i32> %486, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %490 = sext <4 x i1> %489 to <4 x i32>
  %491 = or <4 x i32> %488, %490
  %492 = and <4 x i32> %488, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %493 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %492
  %494 = bitcast <4 x i32> %491 to <16 x i8>
  %495 = bitcast <4 x i32> %493 to <16 x i8>
  %496 = bitcast <4 x i32> %482 to <16 x i8>
  %497 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %496, <16 x i8> %495, <16 x i8> %494)
  %498 = bitcast <16 x i8> %497 to <4 x i32>
  %499 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %476
  %500 = lshr <4 x i32> %499, <i32 3, i32 3, i32 3, i32 3>
  %501 = or <4 x i32> %500, %498
  %502 = ashr <4 x i32> %501, <i32 13, i32 13, i32 13, i32 13>
  %503 = trunc <4 x i32> %502 to <4 x i16>
  %504 = bitcast <4 x float> %395 to <4 x i32>
  %505 = bitcast <4 x float> %395 to <4 x i32>
  %506 = and <4 x i32> %505, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %507 = bitcast <4 x i32> %506 to <4 x float>
  %508 = fmul <4 x float> %507, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %509 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %508, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %510 = bitcast <4 x float> %509 to <4 x i32>
  %511 = bitcast <4 x float> %395 to <4 x i32>
  %512 = and <4 x i32> %511, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %513 = bitcast <4 x i32> %512 to <4 x float>
  %514 = bitcast <4 x float> %513 to <4 x i32>
  %515 = icmp sgt <4 x i32> %514, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %516 = sext <4 x i1> %515 to <4 x i32>
  %517 = icmp eq <4 x i32> %514, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %518 = sext <4 x i1> %517 to <4 x i32>
  %519 = or <4 x i32> %516, %518
  %520 = and <4 x i32> %516, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %521 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %520
  %522 = bitcast <4 x i32> %519 to <16 x i8>
  %523 = bitcast <4 x i32> %521 to <16 x i8>
  %524 = bitcast <4 x i32> %510 to <16 x i8>
  %525 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %524, <16 x i8> %523, <16 x i8> %522)
  %526 = bitcast <16 x i8> %525 to <4 x i32>
  %527 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %504
  %528 = lshr <4 x i32> %527, <i32 3, i32 3, i32 3, i32 3>
  %529 = or <4 x i32> %528, %526
  %530 = ashr <4 x i32> %529, <i32 13, i32 13, i32 13, i32 13>
  %531 = trunc <4 x i32> %530 to <4 x i16>
  %532 = bitcast <4 x float> %401 to <4 x i32>
  %533 = bitcast <4 x float> %401 to <4 x i32>
  %534 = and <4 x i32> %533, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %535 = bitcast <4 x i32> %534 to <4 x float>
  %536 = fmul <4 x float> %535, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %537 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %536, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %538 = bitcast <4 x float> %537 to <4 x i32>
  %539 = bitcast <4 x float> %401 to <4 x i32>
  %540 = and <4 x i32> %539, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %541 = bitcast <4 x i32> %540 to <4 x float>
  %542 = bitcast <4 x float> %541 to <4 x i32>
  %543 = icmp sgt <4 x i32> %542, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %544 = sext <4 x i1> %543 to <4 x i32>
  %545 = icmp eq <4 x i32> %542, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %546 = sext <4 x i1> %545 to <4 x i32>
  %547 = or <4 x i32> %544, %546
  %548 = and <4 x i32> %544, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %549 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %548
  %550 = bitcast <4 x i32> %547 to <16 x i8>
  %551 = bitcast <4 x i32> %549 to <16 x i8>
  %552 = bitcast <4 x i32> %538 to <16 x i8>
  %553 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %552, <16 x i8> %551, <16 x i8> %550)
  %554 = bitcast <16 x i8> %553 to <4 x i32>
  %555 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %532
  %556 = lshr <4 x i32> %555, <i32 3, i32 3, i32 3, i32 3>
  %557 = or <4 x i32> %556, %554
  %558 = ashr <4 x i32> %557, <i32 13, i32 13, i32 13, i32 13>
  %559 = trunc <4 x i32> %558 to <4 x i16>
  %560 = bitcast <4 x float> %407 to <4 x i32>
  %561 = bitcast <4 x float> %407 to <4 x i32>
  %562 = and <4 x i32> %561, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %563 = bitcast <4 x i32> %562 to <4 x float>
  %564 = fmul <4 x float> %563, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %565 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %564, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %566 = bitcast <4 x float> %565 to <4 x i32>
  %567 = bitcast <4 x float> %407 to <4 x i32>
  %568 = and <4 x i32> %567, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %569 = bitcast <4 x i32> %568 to <4 x float>
  %570 = bitcast <4 x float> %569 to <4 x i32>
  %571 = icmp sgt <4 x i32> %570, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %572 = sext <4 x i1> %571 to <4 x i32>
  %573 = icmp eq <4 x i32> %570, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %574 = sext <4 x i1> %573 to <4 x i32>
  %575 = or <4 x i32> %572, %574
  %576 = and <4 x i32> %572, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %577 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %576
  %578 = bitcast <4 x i32> %575 to <16 x i8>
  %579 = bitcast <4 x i32> %577 to <16 x i8>
  %580 = bitcast <4 x i32> %566 to <16 x i8>
  %581 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %580, <16 x i8> %579, <16 x i8> %578)
  %582 = bitcast <16 x i8> %581 to <4 x i32>
  %583 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %560
  %584 = lshr <4 x i32> %583, <i32 3, i32 3, i32 3, i32 3>
  %585 = or <4 x i32> %584, %582
  %586 = ashr <4 x i32> %585, <i32 13, i32 13, i32 13, i32 13>
  %587 = trunc <4 x i32> %586 to <4 x i16>
  %588 = bitcast <4 x float> %413 to <4 x i32>
  %589 = bitcast <4 x float> %413 to <4 x i32>
  %590 = and <4 x i32> %589, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %591 = bitcast <4 x i32> %590 to <4 x float>
  %592 = fmul <4 x float> %591, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %593 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %592, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %594 = bitcast <4 x float> %593 to <4 x i32>
  %595 = bitcast <4 x float> %413 to <4 x i32>
  %596 = and <4 x i32> %595, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %597 = bitcast <4 x i32> %596 to <4 x float>
  %598 = bitcast <4 x float> %597 to <4 x i32>
  %599 = icmp sgt <4 x i32> %598, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %600 = sext <4 x i1> %599 to <4 x i32>
  %601 = icmp eq <4 x i32> %598, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %602 = sext <4 x i1> %601 to <4 x i32>
  %603 = or <4 x i32> %600, %602
  %604 = and <4 x i32> %600, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %605 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %604
  %606 = bitcast <4 x i32> %603 to <16 x i8>
  %607 = bitcast <4 x i32> %605 to <16 x i8>
  %608 = bitcast <4 x i32> %594 to <16 x i8>
  %609 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %608, <16 x i8> %607, <16 x i8> %606)
  %610 = bitcast <16 x i8> %609 to <4 x i32>
  %611 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %588
  %612 = lshr <4 x i32> %611, <i32 3, i32 3, i32 3, i32 3>
  %613 = or <4 x i32> %612, %610
  %614 = ashr <4 x i32> %613, <i32 13, i32 13, i32 13, i32 13>
  %615 = trunc <4 x i32> %614 to <4 x i16>
  %616 = bitcast <4 x float> %419 to <4 x i32>
  %617 = bitcast <4 x float> %419 to <4 x i32>
  %618 = and <4 x i32> %617, <i32 2147475456, i32 2147475456, i32 2147475456, i32 2147475456>
  %619 = bitcast <4 x i32> %618 to <4 x float>
  %620 = fmul <4 x float> %619, <float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000, float 0x38F0000000000000>
  %621 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %620, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %622 = bitcast <4 x float> %621 to <4 x i32>
  %623 = bitcast <4 x float> %419 to <4 x i32>
  %624 = and <4 x i32> %623, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
  %625 = bitcast <4 x i32> %624 to <4 x float>
  %626 = bitcast <4 x float> %625 to <4 x i32>
  %627 = icmp sgt <4 x i32> %626, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %628 = sext <4 x i1> %627 to <4 x i32>
  %629 = icmp eq <4 x i32> %626, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
  %630 = sext <4 x i1> %629 to <4 x i32>
  %631 = or <4 x i32> %628, %630
  %632 = and <4 x i32> %628, <i32 4194304, i32 4194304, i32 4194304, i32 4194304>
  %633 = or <4 x i32> <i32 260046848, i32 260046848, i32 260046848, i32 260046848>, %632
  %634 = bitcast <4 x i32> %631 to <16 x i8>
  %635 = bitcast <4 x i32> %633 to <16 x i8>
  %636 = bitcast <4 x i32> %622 to <16 x i8>
  %637 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %636, <16 x i8> %635, <16 x i8> %634)
  %638 = bitcast <16 x i8> %637 to <4 x i32>
  %639 = and <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, %616
  %640 = lshr <4 x i32> %639, <i32 3, i32 3, i32 3, i32 3>
  %641 = or <4 x i32> %640, %638
  %642 = ashr <4 x i32> %641, <i32 13, i32 13, i32 13, i32 13>
  %643 = trunc <4 x i32> %642 to <4 x i16>
  %644 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %645 = bitcast <4 x float> %644 to <4 x i32>
  %646 = and <4 x i32> %645, <i32 -1, i32 -1, i32 -1, i32 -1>
  %647 = or <4 x i32> zeroinitializer, %646
  %648 = or <4 x i32> zeroinitializer, %647
  %649 = ashr <4 x i32> %648, <i32 13, i32 13, i32 13, i32 13>
  %650 = trunc <4 x i32> %649 to <4 x i16>
  %651 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %652 = bitcast <4 x float> %651 to <4 x i32>
  %653 = and <4 x i32> %652, <i32 -1, i32 -1, i32 -1, i32 -1>
  %654 = or <4 x i32> zeroinitializer, %653
  %655 = or <4 x i32> zeroinitializer, %654
  %656 = ashr <4 x i32> %655, <i32 13, i32 13, i32 13, i32 13>
  %657 = trunc <4 x i32> %656 to <4 x i16>
  %658 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %659 = bitcast <4 x float> %658 to <4 x i32>
  %660 = and <4 x i32> %659, <i32 -1, i32 -1, i32 -1, i32 -1>
  %661 = or <4 x i32> zeroinitializer, %660
  %662 = or <4 x i32> zeroinitializer, %661
  %663 = ashr <4 x i32> %662, <i32 13, i32 13, i32 13, i32 13>
  %664 = trunc <4 x i32> %663 to <4 x i16>
  %665 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %666 = bitcast <4 x float> %665 to <4 x i32>
  %667 = and <4 x i32> %666, <i32 -1, i32 -1, i32 -1, i32 -1>
  %668 = or <4 x i32> zeroinitializer, %667
  %669 = or <4 x i32> zeroinitializer, %668
  %670 = ashr <4 x i32> %669, <i32 13, i32 13, i32 13, i32 13>
  %671 = trunc <4 x i32> %670 to <4 x i16>
  %672 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %673 = bitcast <4 x float> %672 to <4 x i32>
  %674 = and <4 x i32> %673, <i32 -1, i32 -1, i32 -1, i32 -1>
  %675 = or <4 x i32> zeroinitializer, %674
  %676 = or <4 x i32> zeroinitializer, %675
  %677 = ashr <4 x i32> %676, <i32 13, i32 13, i32 13, i32 13>
  %678 = trunc <4 x i32> %677 to <4 x i16>
  %679 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %680 = bitcast <4 x float> %679 to <4 x i32>
  %681 = and <4 x i32> %680, <i32 -1, i32 -1, i32 -1, i32 -1>
  %682 = or <4 x i32> zeroinitializer, %681
  %683 = or <4 x i32> zeroinitializer, %682
  %684 = ashr <4 x i32> %683, <i32 13, i32 13, i32 13, i32 13>
  %685 = trunc <4 x i32> %684 to <4 x i16>
  %686 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %687 = bitcast <4 x float> %686 to <4 x i32>
  %688 = and <4 x i32> %687, <i32 -1, i32 -1, i32 -1, i32 -1>
  %689 = or <4 x i32> zeroinitializer, %688
  %690 = or <4 x i32> zeroinitializer, %689
  %691 = ashr <4 x i32> %690, <i32 13, i32 13, i32 13, i32 13>
  %692 = trunc <4 x i32> %691 to <4 x i16>
  %693 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> zeroinitializer, <4 x float> <float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000, float 0x39EFFC0000000000>)
  %694 = bitcast <4 x float> %693 to <4 x i32>
  %695 = and <4 x i32> %694, <i32 -1, i32 -1, i32 -1, i32 -1>
  %696 = or <4 x i32> zeroinitializer, %695
  %697 = or <4 x i32> zeroinitializer, %696
  %698 = ashr <4 x i32> %697, <i32 13, i32 13, i32 13, i32 13>
  %699 = trunc <4 x i32> %698 to <4 x i16>
  %700 = shufflevector <4 x i16> %447, <4 x i16> %447, <3 x i32> <i32 0, i32 1, i32 2>
  %701 = shufflevector <4 x i16> %475, <4 x i16> %475, <3 x i32> <i32 0, i32 1, i32 2>
  %702 = shufflevector <4 x i16> %503, <4 x i16> %503, <3 x i32> <i32 0, i32 1, i32 2>
  %703 = shufflevector <4 x i16> %531, <4 x i16> %531, <3 x i32> <i32 0, i32 1, i32 2>
  %704 = shufflevector <4 x i16> %559, <4 x i16> %559, <3 x i32> <i32 0, i32 1, i32 2>
  %705 = shufflevector <4 x i16> %587, <4 x i16> %587, <3 x i32> <i32 0, i32 1, i32 2>
  %706 = shufflevector <4 x i16> %615, <4 x i16> %615, <3 x i32> <i32 0, i32 1, i32 2>
  %707 = shufflevector <4 x i16> %643, <4 x i16> %643, <3 x i32> <i32 0, i32 1, i32 2>
  %708 = shufflevector <4 x i16> %650, <4 x i16> %650, <3 x i32> <i32 0, i32 1, i32 2>
  %709 = shufflevector <4 x i16> %657, <4 x i16> %657, <3 x i32> <i32 0, i32 1, i32 2>
  %710 = shufflevector <4 x i16> %664, <4 x i16> %664, <3 x i32> <i32 0, i32 1, i32 2>
  %711 = shufflevector <4 x i16> %671, <4 x i16> %671, <3 x i32> <i32 0, i32 1, i32 2>
  %712 = shufflevector <4 x i16> %678, <4 x i16> %678, <3 x i32> <i32 0, i32 1, i32 2>
  %713 = shufflevector <4 x i16> %685, <4 x i16> %685, <3 x i32> <i32 0, i32 1, i32 2>
  %714 = shufflevector <4 x i16> %692, <4 x i16> %692, <3 x i32> <i32 0, i32 1, i32 2>
  %715 = shufflevector <4 x i16> %699, <4 x i16> %699, <3 x i32> <i32 0, i32 1, i32 2>
  %716 = mul i32 0, %209
  %717 = add i32 0, %716
  %718 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %717
  %719 = bitcast i8* %718 to <3 x i16>*
  store <3 x i16> %700, <3 x i16>* %719, align 2
  %720 = mul i32 0, %209
  %721 = add i32 6, %720
  %722 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %721
  %723 = bitcast i8* %722 to <3 x i16>*
  store <3 x i16> %701, <3 x i16>* %723, align 2
  %724 = mul i32 0, %209
  %725 = add i32 12, %724
  %726 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %725
  %727 = bitcast i8* %726 to <3 x i16>*
  store <3 x i16> %702, <3 x i16>* %727, align 2
  %728 = mul i32 0, %209
  %729 = add i32 18, %728
  %730 = getelementptr <16 x i8>* %color_ptr0, i32 0, i32 %729
  %731 = bitcast i8* %730 to <3 x i16>*
  store <3 x i16> %703, <3 x i16>* %731, align 2
  call void @llvm.x86.sse.ldmxcsr(i32* %mxcsr_ptr)
  ret void
}

src/gallium/auxiliary/gallivm/lp_bld_init.c:605:gallivm_verify_function: Assertion `0' failed.

(gdb) bt
#0  0x00007f1c5f7df609 in _debug_assert_fail (expr=0x7f1c602c8088 "0", file=0x7f1c602c7fd0 "src/gallium/auxiliary/gallivm/lp_bld_init.c", line=605, 
    function=0x7f1c602c8130 <__func__.8132> "gallivm_verify_function") at src/gallium/auxiliary/util/u_debug.c:278
#1  0x00007f1c5f86719a in gallivm_verify_function (gallivm=0x15a7560, func=0x1511a80) at src/gallium/auxiliary/gallivm/lp_bld_init.c:605
#2  0x00007f1c5f44000c in generate_fragment (lp=0x9c08b0, shader=0xa0a2a0, variant=0x161ff80, partial_mask=1) at src/gallium/drivers/llvmpipe/lp_state_fs.c:2373
#3  0x00007f1c5f440b15 in generate_variant (lp=0x9c08b0, shader=0xa0a2a0, key=0x7fff901955d0) at src/gallium/drivers/llvmpipe/lp_state_fs.c:2542
#4  0x00007f1c5f4420f9 in llvmpipe_update_fs (lp=0x9c08b0) at src/gallium/drivers/llvmpipe/lp_state_fs.c:3089
#5  0x00007f1c5f438549 in llvmpipe_update_derived (llvmpipe=0x9c08b0) at src/gallium/drivers/llvmpipe/lp_state_derived.c:186
#6  0x00007f1c5f412dfc in llvmpipe_draw_vbo (pipe=0x9c08b0, info=0x7fff90195870) at src/gallium/drivers/llvmpipe/lp_draw_arrays.c:64
#7  0x00007f1c5f7e267b in util_draw_arrays_instanced (pipe=0x9c08b0, mode=6, start=0, count=4, start_instance=0, instance_count=1)
    at src/gallium/auxiliary/util/u_draw.h:99
#8  0x00007f1c5f7e5066 in blitter_draw (ctx=0x9fa570, x1=0, y1=0, x2=16, y2=1, depth=0, num_instances=1) at src/gallium/auxiliary/util/u_blitter.c:941
#9  0x00007f1c5f7e510f in util_blitter_draw_rectangle (blitter=0x9fa570, x1=0, y1=0, x2=16, y2=1, depth=0, type=UTIL_BLITTER_ATTRIB_TEXCOORD, 
    attrib=0x7fff90195a40) at src/gallium/auxiliary/util/u_blitter.c:965
#10 0x00007f1c5f7e6631 in util_blitter_blit_generic (blitter=0x9fa570, dst=0x168df10, dstbox=0x7fff90195c0c, src=0x1535a80, srcbox=0x7fff90195c34, 
    src_width0=384, src_height0=16, mask=15, filter=0, scissor=0x0, copy_all_samples=1 '\001') at src/gallium/auxiliary/util/u_blitter.c:1396
#11 0x00007f1c5f7e6c1e in util_blitter_blit (blitter=0x9fa570, info=0x7fff90195c00) at src/gallium/auxiliary/util/u_blitter.c:1489
#12 0x00007f1c5f44997b in lp_blit (pipe=0x9c08b0, blit_info=0x7fff90195d30) at src/gallium/drivers/llvmpipe/lp_surface.c:232
#13 0x00007f1c5f5ab93a in st_CopyTexSubImage (ctx=0x7f1c558e8010, dims=1, texImage=0xc7da40, destX=0, destY=0, slice=0, rb=0xa48510, srcX=0, srcY=0, width=16, 
    height=1) at src/mesa/state_tracker/st_cb_texture.c:1374
#14 0x00007f1c5f543ae3 in copytexsubimage_by_slice (ctx=0x7f1c558e8010, texImage=0xc7da40, dims=1, xoffset=0, yoffset=0, zoffset=0, rb=0xa48510, x=0, y=0, 
    width=16, height=1) at src/mesa/main/teximage.c:3423
#15 0x00007f1c5f543f7c in copyteximage (ctx=0x7f1c558e8010, dims=1, target=3552, level=0, internalFormat=34843, x=0, y=0, width=16, height=1, border=0)
    at src/mesa/main/teximage.c:3516
#16 0x00007f1c5f544065 in _mesa_CopyTexImage1D (target=3552, level=0, internalFormat=34843, x=0, y=0, width=16, border=0) at src/mesa/main/teximage.c:3541
#17 0x000000000040215a in test_target_and_format (x=80, y=0, target=3552, format=34843, expected=0x4030e0 <fcolor+32>)
    at piglit/tests/texturing/copyteximage.c:393
#18 0x0000000000402e25 in piglit_display () at piglit/tests/texturing/copyteximage.c:581
#19 0x00007f1c62afb6c8 in display () at piglit/tests/util/piglit-framework-gl/piglit_glut_framework.c:60
#20 0x00007f1c62292244 in fghRedrawWindow (window=0x9bce00) at freeglut_main.c:231
#21 fghcbDisplayWindow (window=0x9bce00, enumerator=0x7fff901960a0) at freeglut_main.c:248
#22 0x00007f1c62295aa9 in fgEnumWindows (enumCallback=enumCallback@entry=0x7f1c622921d0 <fghcbDisplayWindow>, enumerator=enumerator@entry=0x7fff901960a0)
    at freeglut_structure.c:396
#23 0x00007f1c622927fd in fghDisplayAll () at freeglut_main.c:271
#24 glutMainLoopEvent () at freeglut_main.c:1523
#25 0x00007f1c62292ffd in glutMainLoop () at freeglut_main.c:1571
#26 0x00007f1c62afb8f7 in run_test (gl_fw=0x7f1c62de9340 <glut_fw>, argc=2, argv=0x7fff90196478)
    at piglit/tests/util/piglit-framework-gl/piglit_glut_framework.c:142
#27 0x00007f1c62af96c9 in piglit_gl_test_run (argc=2, argv=0x7fff90196478, config=0x7fff90196340)
    at piglit/tests/util/piglit-framework-gl.c:191
#28 0x000000000040175e in main (argc=2, argv=0x7fff90196478) at piglit/tests/texturing/copyteximage.c:121
(gdb) frame 1
#1  0x00007f1c5f86719a in gallivm_verify_function (gallivm=0x15a7560, func=0x1511a80) at src/gallium/auxiliary/gallivm/lp_bld_init.c:605
605	      assert(0);
(gdb) l
600	{
601	   /* Verify the LLVM IR.  If invalid, dump and abort */
602	#ifdef DEBUG
603	   if (LLVMVerifyFunction(func, LLVMPrintMessageAction)) {
604	      lp_debug_dump_value(func);
605	      assert(0);
606	      return;
607	   }
608	#endif
609	

155139059ba588da1161eaa692515cacdead9f4e is the first bad commit
commit 155139059ba588da1161eaa692515cacdead9f4e
Author: Zack Rusin <zackr@vmware.com>
Date:   Fri Dec 6 01:28:25 2013 -0500

    llvmpipe: fix blending with half-float formats
    
    The fact that we flush denorms to zero breaks our half-float
    conversion and blending. This patches enables denorms for
    blending. It's a little tricky due to the llvm bug that makes
    it incorrectly reorder the mxcsr intrinsics:
    http://llvm.org/bugs/show_bug.cgi?id=6393
    
    Signed-off-by: Zack Rusin <zackr@vmware.com>
    Reviewed-by: José Fonseca <jfonseca@vmware.com>
    Reviewed-by: Roland Scheidegger <sroland@vmware.com>
    Signed-off-by: Zack Rusin <zackr@vmware.com>

:040000 040000 5564b14ae9b7870ae8f4845f2069878c0752d5bc 84eb2a43a71956a57817e373117c2e2952a058d2 M	src
bisect run success

Comment 1 Vinson Lee 2014-01-01 02:27:06 UTC

commit 27d47bd42f417db96842c9453092acf68944a4c8
Author: Roland Scheidegger <sroland@vmware.com>
Date:   Fri Dec 13 21:20:05 2013 +0100

    gallivm: fix pointer type for stmxcsr/ldmxcsr
    
    The argument is a i8 pointer not a i32 pointer (even though the value actual
    stored/loaded IS i32). Older llvm versions didn't care but 3.2 and newer do
    leading to crashes.
    
    Reviewed-by: Zack Rusin <zackr@vmware.com>

Use of freedesktop.org services, including Bugzilla, is subject to our Code of Conduct. How we collect and use information is described in our Privacy Policy.