r300: DRM version: 2.50.0, Name: ATI RS690, ID: 0x791f, GB: 1, Z: 1 r300: GART size: 509 MB, VRAM size: 288 MB r300: AA compression RAM: YES, Z compression RAM: NO, HiZ RAM: NO r300: DRM version: 2.50.0, Name: ATI RS690, ID: 0x791f, GB: 1, Z: 1 r300: GART size: 509 MB, VRAM size: 288 MB r300: AA compression RAM: YES, Z compression RAM: NO, HiZ RAM: NO VERT PROPERTY NEXT_SHADER FRAG DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL TEMP[0] 0: MOV TEMP[0], IN[0] 1: MOV OUT[1], IN[1] 2: MOV OUT[0], TEMP[0] 3: MOV OUT[2], TEMP[0] 4: END clamp_vertex_color = 0 clip_xy = 1 clip_z = 1 clip_user = 0 bypass_viewport = 0 clip_halfz = 0 need_edgeflags = 0 has_gs = 0 ucp_enable = 0 vertex_element[0].src_offset = 0 vertex_element[0].instance_divisor = 0 vertex_element[0].vertex_buffer_index = 0 vertex_element[0].src_format = PIPE_FORMAT_R32G32B32_FLOAT vertex_element[1].src_offset = 12 vertex_element[1].instance_divisor = 0 vertex_element[1].vertex_buffer_index = 0 vertex_element[1].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT ; Function Attrs: nounwind readnone declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #0 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 define i8 @draw_llvm_vs_variant0({ [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }* noalias %context, { i32, [4 x float], [3 x [4 x float]] }* noalias %io, { i8*, i32 }* noalias %vbuffers, i32 %count, i32 %start_or_maxelt, i32 %stride, { i16, i8, i32, i8* }* noalias %vb, i32 %instance_id, i32 %vertex_id_offset, i32 %start_instance, i32* noalias %fetch_elts) { entry: %temp37 = alloca <4 x float> %temp36 = alloca <4 x float> %temp35 = alloca <4 x float> %temp = alloca <4 x float> %output34 = alloca <4 x float> %output33 = alloca <4 x float> %output32 = alloca <4 x float> %output31 = alloca <4 x float> %output30 = alloca <4 x float> %output29 = alloca <4 x float> %output28 = alloca <4 x float> %output27 = alloca <4 x float> %output26 = alloca <4 x float> %output25 = alloca <4 x float> %output24 = alloca <4 x float> %output = alloca <4 x float> %looplimiter = alloca i32 %index_store = alloca <4 x i32> %loop_counter = alloca i32 %0 = alloca i8* %1 = alloca i8* %2 = alloca <4 x i64> %3 = alloca <4 x i32> store <4 x i32> zeroinitializer, <4 x i32>* %3 %4 = bitcast <4 x i64>* %2 to i8* %5 = getelementptr i8, i8* %4, i32 0 %6 = icmp ne i32* null, %fetch_elts %fetch_max = sub i32 %count, 1 %7 = insertelement <4 x i32> undef, i32 %fetch_max, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = insertelement <4 x i32> undef, i32 %start_or_maxelt, i32 0 %10 = shufflevector <4 x i32> %9, <4 x i32> undef, <4 x i32> zeroinitializer %11 = getelementptr { i8*, i32 }, { i8*, i32 }* %vbuffers, i32 0 %12 = getelementptr { i16, i8, i32, i8* }, { i16, i8, i32, i8* }* %vb, i32 0 %.stride_ptr = getelementptr { i16, i8, i32, i8* }, { i16, i8, i32, i8* }* %12, i32 0, i32 0 %.stride = load i16, i16* %.stride_ptr %13 = zext i16 %.stride to i32 %.buffer_offset_ptr = getelementptr { i16, i8, i32, i8* }, { i16, i8, i32, i8* }* %12, i32 0, i32 2 %.buffer_offset = load i32, i32* %.buffer_offset_ptr %.map_ptr = getelementptr { i8*, i32 }, { i8*, i32 }* %11, i32 0, i32 0 %.map = load i8*, i8** %.map_ptr %.size_ptr = getelementptr { i8*, i32 }, { i8*, i32 }* %11, i32 0, i32 1 %.size = load i32, i32* %.size_ptr %14 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %.size, i32 11) #1 %15 = extractvalue { i32, i1 } %14, 1 %16 = extractvalue { i32, i1 } %14, 0 %17 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %16, i32 %.buffer_offset) #1 %18 = extractvalue { i32, i1 } %17, 1 %19 = or i1 %15, %18 %20 = extractvalue { i32, i1 } %17, 0 %21 = select i1 %19, i32 0, i32 %20 br i1 %19, label %if-true-block, label %if-false-block if-true-block: ; preds = %entry store i8* %5, i8** %1 br label %endif-block if-false-block: ; preds = %entry %22 = getelementptr i8, i8* %.map, i32 %.buffer_offset store i8* %22, i8** %1 br label %endif-block endif-block: ; preds = %if-false-block, %if-true-block %map_ptr = load i8*, i8** %1 %23 = getelementptr { i8*, i32 }, { i8*, i32 }* %vbuffers, i32 0 %24 = getelementptr { i16, i8, i32, i8* }, { i16, i8, i32, i8* }* %vb, i32 0 %.stride_ptr1 = getelementptr { i16, i8, i32, i8* }, { i16, i8, i32, i8* }* %24, i32 0, i32 0 %.stride2 = load i16, i16* %.stride_ptr1 %25 = zext i16 %.stride2 to i32 %.buffer_offset_ptr3 = getelementptr { i16, i8, i32, i8* }, { i16, i8, i32, i8* }* %24, i32 0, i32 2 %.buffer_offset4 = load i32, i32* %.buffer_offset_ptr3 %.map_ptr5 = getelementptr { i8*, i32 }, { i8*, i32 }* %23, i32 0, i32 0 %.map6 = load i8*, i8** %.map_ptr5 %.size_ptr7 = getelementptr { i8*, i32 }, { i8*, i32 }* %23, i32 0, i32 1 %.size8 = load i32, i32* %.size_ptr7 %26 = add i32 %.buffer_offset4, 12 %27 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %.size8, i32 15) #1 %28 = extractvalue { i32, i1 } %27, 1 %29 = extractvalue { i32, i1 } %27, 0 %30 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %29, i32 %26) #1 %31 = extractvalue { i32, i1 } %30, 1 %32 = or i1 %28, %31 %33 = extractvalue { i32, i1 } %30, 0 %34 = select i1 %32, i32 0, i32 %33 br i1 %32, label %if-true-block10, label %if-false-block11 if-true-block10: ; preds = %endif-block store i8* %5, i8** %0 br label %endif-block9 if-false-block11: ; preds = %endif-block %35 = getelementptr i8, i8* %.map6, i32 %26 store i8* %35, i8** %0 br label %endif-block9 endif-block9: ; preds = %if-false-block11, %if-true-block10 %map_ptr12 = load i8*, i8** %0 store i32 0, i32* %loop_counter store i32 0, i32* %loop_counter br label %loop_begin loop_begin: ; preds = %endif-block13, %endif-block9 %36 = load i32, i32* %loop_counter %37 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %io, i32 %36 %38 = insertelement <4 x i32> undef, i32 %36, i32 0 %39 = shufflevector <4 x i32> %38, <4 x i32> undef, <4 x i32> zeroinitializer %40 = add <4 x i32> %39, %41 = icmp ult <4 x i32> %40, %8 %42 = sext <4 x i1> %41 to <4 x i32> %43 = trunc <4 x i32> %42 to <4 x i1> %44 = select <4 x i1> %43, <4 x i32> %40, <4 x i32> %8 br i1 %6, label %if-true-block14, label %if-false-block15 if-true-block14: ; preds = %loop_begin %45 = shl <4 x i32> %44, %46 = bitcast i32* %fetch_elts to i8* %47 = extractelement <4 x i32> %45, i32 0 %48 = getelementptr i8, i8* %46, i32 %47 %49 = bitcast i8* %48 to i32* %50 = load i32, i32* %49 %51 = insertelement <4 x i32> undef, i32 %50, i32 0 %52 = extractelement <4 x i32> %45, i32 1 %53 = getelementptr i8, i8* %46, i32 %52 %54 = bitcast i8* %53 to i32* %55 = load i32, i32* %54 %56 = insertelement <4 x i32> %51, i32 %55, i32 1 %57 = extractelement <4 x i32> %45, i32 2 %58 = getelementptr i8, i8* %46, i32 %57 %59 = bitcast i8* %58 to i32* %60 = load i32, i32* %59 %61 = insertelement <4 x i32> %56, i32 %60, i32 2 %62 = extractelement <4 x i32> %45, i32 3 %63 = getelementptr i8, i8* %46, i32 %62 %64 = bitcast i8* %63 to i32* %65 = load i32, i32* %64 %66 = insertelement <4 x i32> %61, i32 %65, i32 3 store <4 x i32> %66, <4 x i32>* %index_store br label %endif-block13 if-false-block15: ; preds = %loop_begin %67 = add <4 x i32> %44, %10 store <4 x i32> %67, <4 x i32>* %index_store br label %endif-block13 endif-block13: ; preds = %if-false-block15, %if-true-block14 %68 = load <4 x i32>, <4 x i32>* %index_store %69 = insertelement <4 x i32> undef, i32 %13, i32 0 %70 = shufflevector <4 x i32> %69, <4 x i32> undef, <4 x i32> zeroinitializer %71 = insertelement <4 x i32> undef, i32 %21, i32 0 %72 = shufflevector <4 x i32> %71, <4 x i32> undef, <4 x i32> zeroinitializer %73 = mul <4 x i32> %70, %68 %74 = icmp ult <4 x i32> %73, %72 %75 = sext <4 x i1> %74 to <4 x i32> %76 = and <4 x i32> %73, %75 %77 = shufflevector <4 x i32> %76, <4 x i32> %76, <1 x i32> zeroinitializer %78 = extractelement <1 x i32> %77, i32 0 %79 = getelementptr i8, i8* %map_ptr, i32 %78 %80 = bitcast i8* %79 to <3 x i32>* %81 = load <3 x i32>, <3 x i32>* %80, align 1 %82 = shufflevector <3 x i32> %81, <3 x i32> undef, <4 x i32> %83 = shufflevector <4 x i32> %76, <4 x i32> %76, <1 x i32> %84 = extractelement <1 x i32> %83, i32 0 %85 = getelementptr i8, i8* %map_ptr, i32 %84 %86 = bitcast i8* %85 to <3 x i32>* %87 = load <3 x i32>, <3 x i32>* %86, align 1 %88 = shufflevector <3 x i32> %87, <3 x i32> undef, <4 x i32> %89 = shufflevector <4 x i32> %76, <4 x i32> %76, <1 x i32> %90 = extractelement <1 x i32> %89, i32 0 %91 = getelementptr i8, i8* %map_ptr, i32 %90 %92 = bitcast i8* %91 to <3 x i32>* %93 = load <3 x i32>, <3 x i32>* %92, align 1 %94 = shufflevector <3 x i32> %93, <3 x i32> undef, <4 x i32> %95 = shufflevector <4 x i32> %76, <4 x i32> %76, <1 x i32> %96 = extractelement <1 x i32> %95, i32 0 %97 = getelementptr i8, i8* %map_ptr, i32 %96 %98 = bitcast i8* %97 to <3 x i32>* %99 = load <3 x i32>, <3 x i32>* %98, align 1 %100 = shufflevector <3 x i32> %99, <3 x i32> undef, <4 x i32> %101 = shufflevector <4 x i32> %82, <4 x i32> %88, <4 x i32> %102 = shufflevector <4 x i32> %94, <4 x i32> %100, <4 x i32> %103 = shufflevector <4 x i32> %82, <4 x i32> %88, <4 x i32> %104 = shufflevector <4 x i32> %94, <4 x i32> %100, <4 x i32> %t0 = bitcast <4 x i32> %101 to <2 x i64> %t1 = bitcast <4 x i32> %102 to <2 x i64> %t2 = bitcast <4 x i32> %103 to <2 x i64> %t3 = bitcast <4 x i32> %104 to <2 x i64> %105 = shufflevector <2 x i64> %t0, <2 x i64> %t1, <2 x i32> %106 = shufflevector <2 x i64> %t0, <2 x i64> %t1, <2 x i32> %107 = shufflevector <2 x i64> %t2, <2 x i64> %t3, <2 x i32> %108 = shufflevector <2 x i64> %t2, <2 x i64> %t3, <2 x i32> %dst0 = bitcast <2 x i64> %105 to <4 x i32> %dst1 = bitcast <2 x i64> %106 to <4 x i32> %dst2 = bitcast <2 x i64> %107 to <4 x i32> %dst3 = bitcast <2 x i64> %108 to <4 x i32> %109 = bitcast <4 x i32> %dst0 to <4 x float> %110 = bitcast <4 x i32> %dst1 to <4 x float> %111 = bitcast <4 x i32> %dst2 to <4 x float> %112 = bitcast <4 x float> %109 to <4 x i32> %113 = and <4 x i32> %112, %75 %114 = bitcast <4 x i32> %113 to <4 x float> %115 = bitcast <4 x float> %110 to <4 x i32> %116 = and <4 x i32> %115, %75 %117 = bitcast <4 x i32> %116 to <4 x float> %118 = bitcast <4 x float> %111 to <4 x i32> %119 = and <4 x i32> %118, %75 %120 = bitcast <4 x i32> %119 to <4 x float> %121 = and <4 x i32> , %75 %122 = bitcast <4 x i32> %121 to <4 x float> %123 = insertelement <4 x i32> undef, i32 %25, i32 0 %124 = shufflevector <4 x i32> %123, <4 x i32> undef, <4 x i32> zeroinitializer %125 = insertelement <4 x i32> undef, i32 %34, i32 0 %126 = shufflevector <4 x i32> %125, <4 x i32> undef, <4 x i32> zeroinitializer %127 = mul <4 x i32> %124, %68 %128 = icmp ult <4 x i32> %127, %126 %129 = sext <4 x i1> %128 to <4 x i32> %130 = and <4 x i32> %127, %129 %131 = shufflevector <4 x i32> %130, <4 x i32> %130, <1 x i32> zeroinitializer %132 = extractelement <1 x i32> %131, i32 0 %133 = getelementptr i8, i8* %map_ptr12, i32 %132 %134 = bitcast i8* %133 to <4 x i32>* %135 = load <4 x i32>, <4 x i32>* %134, align 1 %136 = shufflevector <4 x i32> %130, <4 x i32> %130, <1 x i32> %137 = extractelement <1 x i32> %136, i32 0 %138 = getelementptr i8, i8* %map_ptr12, i32 %137 %139 = bitcast i8* %138 to <4 x i32>* %140 = load <4 x i32>, <4 x i32>* %139, align 1 %141 = shufflevector <4 x i32> %130, <4 x i32> %130, <1 x i32> %142 = extractelement <1 x i32> %141, i32 0 %143 = getelementptr i8, i8* %map_ptr12, i32 %142 %144 = bitcast i8* %143 to <4 x i32>* %145 = load <4 x i32>, <4 x i32>* %144, align 1 %146 = shufflevector <4 x i32> %130, <4 x i32> %130, <1 x i32> %147 = extractelement <1 x i32> %146, i32 0 %148 = getelementptr i8, i8* %map_ptr12, i32 %147 %149 = bitcast i8* %148 to <4 x i32>* %150 = load <4 x i32>, <4 x i32>* %149, align 1 %151 = shufflevector <4 x i32> %135, <4 x i32> %140, <4 x i32> %152 = shufflevector <4 x i32> %145, <4 x i32> %150, <4 x i32> %153 = shufflevector <4 x i32> %135, <4 x i32> %140, <4 x i32> %154 = shufflevector <4 x i32> %145, <4 x i32> %150, <4 x i32> %t016 = bitcast <4 x i32> %151 to <2 x i64> %t117 = bitcast <4 x i32> %152 to <2 x i64> %t218 = bitcast <4 x i32> %153 to <2 x i64> %t319 = bitcast <4 x i32> %154 to <2 x i64> %155 = shufflevector <2 x i64> %t016, <2 x i64> %t117, <2 x i32> %156 = shufflevector <2 x i64> %t016, <2 x i64> %t117, <2 x i32> %157 = shufflevector <2 x i64> %t218, <2 x i64> %t319, <2 x i32> %158 = shufflevector <2 x i64> %t218, <2 x i64> %t319, <2 x i32> %dst020 = bitcast <2 x i64> %155 to <4 x i32> %dst121 = bitcast <2 x i64> %156 to <4 x i32> %dst222 = bitcast <2 x i64> %157 to <4 x i32> %dst323 = bitcast <2 x i64> %158 to <4 x i32> %159 = bitcast <4 x i32> %dst020 to <4 x float> %160 = bitcast <4 x i32> %dst121 to <4 x float> %161 = bitcast <4 x i32> %dst222 to <4 x float> %162 = bitcast <4 x i32> %dst323 to <4 x float> %163 = bitcast <4 x float> %159 to <4 x i32> %164 = and <4 x i32> %163, %129 %165 = bitcast <4 x i32> %164 to <4 x float> %166 = bitcast <4 x float> %160 to <4 x i32> %167 = and <4 x i32> %166, %129 %168 = bitcast <4 x i32> %167 to <4 x float> %169 = bitcast <4 x float> %161 to <4 x i32> %170 = and <4 x i32> %169, %129 %171 = bitcast <4 x i32> %170 to <4 x float> %172 = bitcast <4 x float> %162 to <4 x i32> %173 = and <4 x i32> %172, %129 %174 = bitcast <4 x i32> %173 to <4 x float> %175 = insertelement <4 x i32> undef, i32 %vertex_id_offset, i32 0 %176 = shufflevector <4 x i32> %175, <4 x i32> undef, <4 x i32> zeroinitializer %177 = sub <4 x i32> %68, %176 %context.vs_constants_ptr = getelementptr { [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }, { [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 0 %context.num_vs_constants_ptr = getelementptr { [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }, { [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 1 store i32 0, i32* %looplimiter store i32 65535, i32* %looplimiter store <4 x float> zeroinitializer, <4 x float>* %output store <4 x float> zeroinitializer, <4 x float>* %output24 store <4 x float> zeroinitializer, <4 x float>* %output25 store <4 x float> zeroinitializer, <4 x float>* %output26 store <4 x float> zeroinitializer, <4 x float>* %output27 store <4 x float> zeroinitializer, <4 x float>* %output28 store <4 x float> zeroinitializer, <4 x float>* %output29 store <4 x float> zeroinitializer, <4 x float>* %output30 store <4 x float> zeroinitializer, <4 x float>* %output31 store <4 x float> zeroinitializer, <4 x float>* %output32 store <4 x float> zeroinitializer, <4 x float>* %output33 store <4 x float> zeroinitializer, <4 x float>* %output34 store <4 x float> zeroinitializer, <4 x float>* %temp store <4 x float> zeroinitializer, <4 x float>* %temp35 store <4 x float> zeroinitializer, <4 x float>* %temp36 store <4 x float> zeroinitializer, <4 x float>* %temp37 store <4 x float> %114, <4 x float>* %temp store <4 x float> %117, <4 x float>* %temp35 store <4 x float> %120, <4 x float>* %temp36 store <4 x float> %122, <4 x float>* %temp37 store <4 x float> %165, <4 x float>* %output27 store <4 x float> %168, <4 x float>* %output28 store <4 x float> %171, <4 x float>* %output29 store <4 x float> %174, <4 x float>* %output30 %178 = load <4 x float>, <4 x float>* %temp %179 = load <4 x float>, <4 x float>* %temp35 %180 = load <4 x float>, <4 x float>* %temp36 %181 = load <4 x float>, <4 x float>* %temp37 store <4 x float> %178, <4 x float>* %output store <4 x float> %179, <4 x float>* %output24 store <4 x float> %180, <4 x float>* %output25 store <4 x float> %181, <4 x float>* %output26 %182 = load <4 x float>, <4 x float>* %temp %183 = load <4 x float>, <4 x float>* %temp35 %184 = load <4 x float>, <4 x float>* %temp36 %185 = load <4 x float>, <4 x float>* %temp37 store <4 x float> %182, <4 x float>* %output31 store <4 x float> %183, <4 x float>* %output32 store <4 x float> %184, <4 x float>* %output33 store <4 x float> %185, <4 x float>* %output34 %186 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 0 %187 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 1 %188 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 2 %189 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 3 %190 = load <4 x float>, <4 x float>* %output %191 = load <4 x float>, <4 x float>* %output24 %192 = load <4 x float>, <4 x float>* %output25 %193 = load <4 x float>, <4 x float>* %output26 %.clip_pos_ptr = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %186, i32 0, i32 1 %.clip_pos_ptr38 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %187, i32 0, i32 1 %.clip_pos_ptr39 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %188, i32 0, i32 1 %.clip_pos_ptr40 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %189, i32 0, i32 1 %194 = shufflevector <4 x float> %190, <4 x float> %191, <4 x i32> %195 = shufflevector <4 x float> %192, <4 x float> %193, <4 x i32> %196 = shufflevector <4 x float> %190, <4 x float> %191, <4 x i32> %197 = shufflevector <4 x float> %192, <4 x float> %193, <4 x i32> %t041 = bitcast <4 x float> %194 to <2 x double> %t142 = bitcast <4 x float> %195 to <2 x double> %t243 = bitcast <4 x float> %196 to <2 x double> %t344 = bitcast <4 x float> %197 to <2 x double> %198 = shufflevector <2 x double> %t041, <2 x double> %t142, <2 x i32> %199 = shufflevector <2 x double> %t041, <2 x double> %t142, <2 x i32> %200 = shufflevector <2 x double> %t243, <2 x double> %t344, <2 x i32> %201 = shufflevector <2 x double> %t243, <2 x double> %t344, <2 x i32> %dst045 = bitcast <2 x double> %198 to <4 x float> %dst146 = bitcast <2 x double> %199 to <4 x float> %dst247 = bitcast <2 x double> %200 to <4 x float> %dst348 = bitcast <2 x double> %201 to <4 x float> %202 = shufflevector <4 x float> %dst045, <4 x float> %dst045, <4 x i32> %203 = shufflevector <4 x float> %dst146, <4 x float> %dst146, <4 x i32> %204 = shufflevector <4 x float> %dst247, <4 x float> %dst247, <4 x i32> %205 = shufflevector <4 x float> %dst348, <4 x float> %dst348, <4 x i32> %clipo = getelementptr [4 x float], [4 x float]* %.clip_pos_ptr, i32 0, i32 0 %206 = bitcast float* %clipo to <4 x float>* store <4 x float> %202, <4 x float>* %206, align 4 %clipo49 = getelementptr [4 x float], [4 x float]* %.clip_pos_ptr38, i32 0, i32 0 %207 = bitcast float* %clipo49 to <4 x float>* store <4 x float> %203, <4 x float>* %207, align 4 %clipo50 = getelementptr [4 x float], [4 x float]* %.clip_pos_ptr39, i32 0, i32 0 %208 = bitcast float* %clipo50 to <4 x float>* store <4 x float> %204, <4 x float>* %208, align 4 %clipo51 = getelementptr [4 x float], [4 x float]* %.clip_pos_ptr40, i32 0, i32 0 %209 = bitcast float* %clipo51 to <4 x float>* store <4 x float> %205, <4 x float>* %209, align 4 %210 = load <4 x i32>, <4 x i32>* %3 %211 = load <4 x float>, <4 x float>* %output %212 = load <4 x float>, <4 x float>* %output24 %213 = load <4 x float>, <4 x float>* %output25 %214 = load <4 x float>, <4 x float>* %output26 %215 = fcmp ugt <4 x float> %211, %214 %216 = sext <4 x i1> %215 to <4 x i32> %217 = and <4 x i32> %216, %218 = fadd <4 x float> %211, %214 %219 = fcmp ugt <4 x float> zeroinitializer, %218 %220 = sext <4 x i1> %219 to <4 x i32> %221 = and <4 x i32> %220, %222 = or <4 x i32> %217, %221 %223 = fcmp ugt <4 x float> %212, %214 %224 = sext <4 x i1> %223 to <4 x i32> %225 = and <4 x i32> %224, %226 = or <4 x i32> %222, %225 %227 = fadd <4 x float> %212, %214 %228 = fcmp ugt <4 x float> zeroinitializer, %227 %229 = sext <4 x i1> %228 to <4 x i32> %230 = and <4 x i32> %229, %231 = or <4 x i32> %226, %230 %232 = fadd <4 x float> %213, %214 %233 = fcmp ugt <4 x float> zeroinitializer, %232 %234 = sext <4 x i1> %233 to <4 x i32> %235 = and <4 x i32> %234, %236 = or <4 x i32> %231, %235 %237 = fcmp ugt <4 x float> %213, %214 %238 = sext <4 x i1> %237 to <4 x i32> %239 = and <4 x i32> %238, %240 = or <4 x i32> %236, %239 %241 = or <4 x i32> %240, %210 store <4 x i32> %241, <4 x i32>* %3 %242 = load <4 x float>, <4 x float>* %output26 %context.viewports_ptr = getelementptr { [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }, { [16 x float*], [16 x i32], [14 x [4 x float]]*, float*, [128 x { i32, i32, i32, i32, i32, i8*, [16 x i32], [16 x i32], [16 x i32] }], [32 x { float, float, float, [4 x float] }] }* %context, i32 0, i32 3 %context.viewports = load float*, float** %context.viewports_ptr %243 = fdiv <4 x float> , %242 store <4 x float> %243, <4 x float>* %output26 %244 = load <4 x float>, <4 x float>* %output %245 = getelementptr float, float* %context.viewports, i32 0 %246 = getelementptr float, float* %context.viewports, i32 3 %scale = load float, float* %245 %247 = insertelement <4 x float> undef, float %scale, i32 0 %248 = shufflevector <4 x float> %247, <4 x float> undef, <4 x i32> zeroinitializer %trans = load float, float* %246 %249 = insertelement <4 x float> undef, float %trans, i32 0 %250 = shufflevector <4 x float> %249, <4 x float> undef, <4 x i32> zeroinitializer %251 = fmul <4 x float> %244, %243 %252 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %251, <4 x float> %248, <4 x float> %250) #1 store <4 x float> %252, <4 x float>* %output %253 = load <4 x float>, <4 x float>* %output24 %254 = getelementptr float, float* %context.viewports, i32 1 %255 = getelementptr float, float* %context.viewports, i32 4 %scale52 = load float, float* %254 %256 = insertelement <4 x float> undef, float %scale52, i32 0 %257 = shufflevector <4 x float> %256, <4 x float> undef, <4 x i32> zeroinitializer %trans53 = load float, float* %255 %258 = insertelement <4 x float> undef, float %trans53, i32 0 %259 = shufflevector <4 x float> %258, <4 x float> undef, <4 x i32> zeroinitializer %260 = fmul <4 x float> %253, %243 %261 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %260, <4 x float> %257, <4 x float> %259) #1 store <4 x float> %261, <4 x float>* %output24 %262 = load <4 x float>, <4 x float>* %output25 %263 = getelementptr float, float* %context.viewports, i32 2 %264 = getelementptr float, float* %context.viewports, i32 5 %scale54 = load float, float* %263 %265 = insertelement <4 x float> undef, float %scale54, i32 0 %266 = shufflevector <4 x float> %265, <4 x float> undef, <4 x i32> zeroinitializer %trans55 = load float, float* %264 %267 = insertelement <4 x float> undef, float %trans55, i32 0 %268 = shufflevector <4 x float> %267, <4 x float> undef, <4 x i32> zeroinitializer %269 = fmul <4 x float> %262, %243 %270 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %269, <4 x float> %266, <4 x float> %268) #1 store <4 x float> %270, <4 x float>* %output25 %output0.x = load <4 x float>, <4 x float>* %output %output0.y = load <4 x float>, <4 x float>* %output24 %output0.z = load <4 x float>, <4 x float>* %output25 %output0.w = load <4 x float>, <4 x float>* %output26 %271 = shufflevector <4 x float> %output0.x, <4 x float> %output0.y, <4 x i32> %272 = shufflevector <4 x float> %output0.z, <4 x float> %output0.w, <4 x i32> %273 = shufflevector <4 x float> %output0.x, <4 x float> %output0.y, <4 x i32> %274 = shufflevector <4 x float> %output0.z, <4 x float> %output0.w, <4 x i32> %t056 = bitcast <4 x float> %271 to <2 x double> %t157 = bitcast <4 x float> %272 to <2 x double> %t258 = bitcast <4 x float> %273 to <2 x double> %t359 = bitcast <4 x float> %274 to <2 x double> %275 = shufflevector <2 x double> %t056, <2 x double> %t157, <2 x i32> %276 = shufflevector <2 x double> %t056, <2 x double> %t157, <2 x i32> %277 = shufflevector <2 x double> %t258, <2 x double> %t359, <2 x i32> %278 = shufflevector <2 x double> %t258, <2 x double> %t359, <2 x i32> %dst060 = bitcast <2 x double> %275 to <4 x float> %dst161 = bitcast <2 x double> %276 to <4 x float> %dst262 = bitcast <2 x double> %277 to <4 x float> %dst363 = bitcast <2 x double> %278 to <4 x float> %279 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 0 %280 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 1 %281 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 2 %282 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 3 %283 = or <4 x i32> , %240 %.id_ptr = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %279, i32 0, i32 0 %284 = extractelement <4 x i32> %283, i32 0 store i32 %284, i32* %.id_ptr %.id_ptr64 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %280, i32 0, i32 0 %285 = extractelement <4 x i32> %283, i32 1 store i32 %285, i32* %.id_ptr64 %.id_ptr65 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %281, i32 0, i32 0 %286 = extractelement <4 x i32> %283, i32 2 store i32 %286, i32* %.id_ptr65 %.id_ptr66 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %282, i32 0, i32 0 %287 = extractelement <4 x i32> %283, i32 3 store i32 %287, i32* %.id_ptr66 %.data_ptr = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %279, i32 0, i32 2 %288 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr, i32 0, i32 0, i32 0 %289 = bitcast float* %288 to <4 x float>* store <4 x float> %dst060, <4 x float>* %289, align 4 %.data_ptr67 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %280, i32 0, i32 2 %290 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr67, i32 0, i32 0, i32 0 %291 = bitcast float* %290 to <4 x float>* store <4 x float> %dst161, <4 x float>* %291, align 4 %.data_ptr68 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %281, i32 0, i32 2 %292 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr68, i32 0, i32 0, i32 0 %293 = bitcast float* %292 to <4 x float>* store <4 x float> %dst262, <4 x float>* %293, align 4 %.data_ptr69 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %282, i32 0, i32 2 %294 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr69, i32 0, i32 0, i32 0 %295 = bitcast float* %294 to <4 x float>* store <4 x float> %dst363, <4 x float>* %295, align 4 %output1.x = load <4 x float>, <4 x float>* %output27 %output1.y = load <4 x float>, <4 x float>* %output28 %output1.z = load <4 x float>, <4 x float>* %output29 %output1.w = load <4 x float>, <4 x float>* %output30 %296 = shufflevector <4 x float> %output1.x, <4 x float> %output1.y, <4 x i32> %297 = shufflevector <4 x float> %output1.z, <4 x float> %output1.w, <4 x i32> %298 = shufflevector <4 x float> %output1.x, <4 x float> %output1.y, <4 x i32> %299 = shufflevector <4 x float> %output1.z, <4 x float> %output1.w, <4 x i32> %t070 = bitcast <4 x float> %296 to <2 x double> %t171 = bitcast <4 x float> %297 to <2 x double> %t272 = bitcast <4 x float> %298 to <2 x double> %t373 = bitcast <4 x float> %299 to <2 x double> %300 = shufflevector <2 x double> %t070, <2 x double> %t171, <2 x i32> %301 = shufflevector <2 x double> %t070, <2 x double> %t171, <2 x i32> %302 = shufflevector <2 x double> %t272, <2 x double> %t373, <2 x i32> %303 = shufflevector <2 x double> %t272, <2 x double> %t373, <2 x i32> %dst074 = bitcast <2 x double> %300 to <4 x float> %dst175 = bitcast <2 x double> %301 to <4 x float> %dst276 = bitcast <2 x double> %302 to <4 x float> %dst377 = bitcast <2 x double> %303 to <4 x float> %304 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 0 %305 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 1 %306 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 2 %307 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 3 %.data_ptr78 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %304, i32 0, i32 2 %308 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr78, i32 0, i32 1, i32 0 %309 = bitcast float* %308 to <4 x float>* store <4 x float> %dst074, <4 x float>* %309, align 4 %.data_ptr79 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %305, i32 0, i32 2 %310 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr79, i32 0, i32 1, i32 0 %311 = bitcast float* %310 to <4 x float>* store <4 x float> %dst175, <4 x float>* %311, align 4 %.data_ptr80 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %306, i32 0, i32 2 %312 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr80, i32 0, i32 1, i32 0 %313 = bitcast float* %312 to <4 x float>* store <4 x float> %dst276, <4 x float>* %313, align 4 %.data_ptr81 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %307, i32 0, i32 2 %314 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr81, i32 0, i32 1, i32 0 %315 = bitcast float* %314 to <4 x float>* store <4 x float> %dst377, <4 x float>* %315, align 4 %output2.x = load <4 x float>, <4 x float>* %output31 %output2.y = load <4 x float>, <4 x float>* %output32 %output2.z = load <4 x float>, <4 x float>* %output33 %output2.w = load <4 x float>, <4 x float>* %output34 %316 = shufflevector <4 x float> %output2.x, <4 x float> %output2.y, <4 x i32> %317 = shufflevector <4 x float> %output2.z, <4 x float> %output2.w, <4 x i32> %318 = shufflevector <4 x float> %output2.x, <4 x float> %output2.y, <4 x i32> %319 = shufflevector <4 x float> %output2.z, <4 x float> %output2.w, <4 x i32> %t082 = bitcast <4 x float> %316 to <2 x double> %t183 = bitcast <4 x float> %317 to <2 x double> %t284 = bitcast <4 x float> %318 to <2 x double> %t385 = bitcast <4 x float> %319 to <2 x double> %320 = shufflevector <2 x double> %t082, <2 x double> %t183, <2 x i32> %321 = shufflevector <2 x double> %t082, <2 x double> %t183, <2 x i32> %322 = shufflevector <2 x double> %t284, <2 x double> %t385, <2 x i32> %323 = shufflevector <2 x double> %t284, <2 x double> %t385, <2 x i32> %dst086 = bitcast <2 x double> %320 to <4 x float> %dst187 = bitcast <2 x double> %321 to <4 x float> %dst288 = bitcast <2 x double> %322 to <4 x float> %dst389 = bitcast <2 x double> %323 to <4 x float> %324 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 0 %325 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 1 %326 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 2 %327 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %37, i32 3 %.data_ptr90 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %324, i32 0, i32 2 %328 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr90, i32 0, i32 2, i32 0 %329 = bitcast float* %328 to <4 x float>* store <4 x float> %dst086, <4 x float>* %329, align 4 %.data_ptr91 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %325, i32 0, i32 2 %330 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr91, i32 0, i32 2, i32 0 %331 = bitcast float* %330 to <4 x float>* store <4 x float> %dst187, <4 x float>* %331, align 4 %.data_ptr92 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %326, i32 0, i32 2 %332 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr92, i32 0, i32 2, i32 0 %333 = bitcast float* %332 to <4 x float>* store <4 x float> %dst288, <4 x float>* %333, align 4 %.data_ptr93 = getelementptr { i32, [4 x float], [3 x [4 x float]] }, { i32, [4 x float], [3 x [4 x float]] }* %327, i32 0, i32 2 %334 = getelementptr [3 x [4 x float]], [3 x [4 x float]]* %.data_ptr93, i32 0, i32 2, i32 0 %335 = bitcast float* %334 to <4 x float>* store <4 x float> %dst389, <4 x float>* %335, align 4 %336 = add i32 %36, 4 store i32 %336, i32* %loop_counter %337 = icmp uge i32 %336, %count br i1 %337, label %loop_end, label %loop_begin loop_end: ; preds = %endif-block13 %338 = load i32, i32* %loop_counter %339 = load <4 x i32>, <4 x i32>* %3 %340 = bitcast <4 x i32> %339 to i128 %341 = icmp ne i128 %340, 0 %342 = zext i1 %341 to i8 ret i8 %342 } llc -mattr option(s): +sse2,+cx16,-tbm,-avx512ifma,-avx512dq,-fma4,+prfchw,-bmi2,-xsavec,-fsgsbase,-popcnt,-aes,-pcommit,-xsaves,-avx512er,-clwb,-avx512f,-pku,-smap,+mmx,-xop,-rdseed,-hle,-sse4a,-avx512bw,-clflushopt,-xsave,-avx512vl,-invpcid,-avx512cd,-avx,-rtm,-fma,-bmi,-mwaitx,-rdrnd,-sse4.1,-sse4.2,-avx2,+sse,-lzcnt,-pclmul,-prefetchwt1,-f16c,-ssse3,-sgx,+cmov,-avx512vbmi,-movbe,-xsaveopt,-sha,-adx,-avx512pf,+sse3 llc -mcpu option: k8-sse3 draw_llvm_vs_variant0: 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r15 6: pushq %r14 8: pushq %r13 10: pushq %r12 12: pushq %rbx 13: andq $-32, %rsp 17: subq $384, %rsp 24: movq %rdi, %r15 27: movq 48(%rbp), %r9 31: movq 16(%rbp), %rax 35: leal -1(%rcx), %ebx 38: movd %ebx, %xmm0 42: pshufd $0, %xmm0, %xmm1 47: movd %r8d, %xmm0 52: pshufd $0, %xmm0, %xmm0 57: movdqa %xmm0, 48(%rsp) 63: movzwl (%rax), %edi 66: movl %edi, (%rsp) 69: movslq 4(%rax), %r8 73: movq (%rdx), %r14 76: movl 8(%rdx), %eax 79: movl %eax, %ebx 81: subl $11, %ebx 84: setb %dil 88: subl %r8d, %ebx 91: setb %r12b 95: leal 12(%r8), %r13d 99: addq %r14, %r8 102: xorl %r10d, %r10d 105: orb %dil, %r12b 108: cmovnel %r10d, %ebx 112: leaq 320(%rsp), %r11 120: cmovneq %r11, %r8 124: subl $15, %eax 127: setb %r12b 131: subl %r13d, %eax 134: setb %dil 138: movslq %r13d, %r13 141: addq %r14, %r13 144: orb %r12b, %dil 147: cmovnel %r10d, %eax 151: cmovneq %r11, %r13 155: xorps %xmm5, %xmm5 158: movabsq $140126383685632, %rdi 168: movaps (%rdi), %xmm0 171: movaps %xmm0, 256(%rsp) 179: movabsq $140126383685648, %rdi 189: movdqa (%rdi), %xmm4 193: movdqa %xmm1, 272(%rsp) 202: pxor %xmm4, %xmm1 206: movdqa %xmm1, 224(%rsp) 215: movd (%rsp), %xmm0 220: pshufd $0, %xmm0, %xmm0 225: movdqa %xmm0, 208(%rsp) 234: movd %ebx, %xmm0 238: pshufd $0, %xmm0, %xmm0 243: pxor %xmm4, %xmm0 247: movdqa %xmm0, 192(%rsp) 256: movabsq $140126383685664, %rdi 266: movaps (%rdi), %xmm0 269: movaps %xmm0, 16(%rsp) 274: movd %eax, %xmm0 278: pshufd $0, %xmm0, %xmm0 283: pxor %xmm4, %xmm0 287: movdqa %xmm0, 176(%rsp) 296: movabsq $140126383685680, %rax 306: movaps (%rax), %xmm0 309: movaps %xmm0, 160(%rsp) 317: movabsq $140126383685696, %rax 327: movaps (%rax), %xmm0 330: movaps %xmm0, 144(%rsp) 338: movabsq $140126383685712, %rax 348: movaps (%rax), %xmm0 351: movaps %xmm0, 128(%rsp) 359: movabsq $140126383685728, %rax 369: movaps (%rax), %xmm0 372: movaps %xmm0, 112(%rsp) 377: movabsq $140126383685744, %rax 387: movaps (%rax), %xmm0 390: movaps %xmm0, 96(%rsp) 395: movabsq $140126383685760, %rax 405: movaps (%rax), %xmm0 408: movaps %xmm0, 80(%rsp) 413: movabsq $140126383685776, %rax 423: movaps (%rax), %xmm0 426: movaps %xmm0, 64(%rsp) 431: movdqa %xmm4, 240(%rsp) 440: nopl (%rax,%rax) 448: movslq %r10d, %rdi 451: imulq $68, %rdi, %r11 455: leaq (%rsi,%r11), %rax 459: testq %r9, %r9 462: movd %edi, %xmm0 466: pshufd $0, %xmm0, %xmm1 471: paddd 256(%rsp), %xmm1 480: movdqa %xmm1, %xmm2 484: pxor %xmm4, %xmm2 488: movdqa 224(%rsp), %xmm0 497: pcmpgtd %xmm2, %xmm0 501: pand %xmm0, %xmm1 505: pandn 272(%rsp), %xmm0 514: por %xmm1, %xmm0 518: movaps %xmm5, 304(%rsp) 526: je 80 528: pslld $2, %xmm0 533: pshufd $78, %xmm0, %xmm1 538: movd %xmm1, %rdi 543: movd %xmm0, %rbx 548: movslq %ebx, %r14 551: sarq $32, %rbx 555: movslq %edi, %rdx 558: sarq $32, %rdi 562: movd (%r9,%rbx), %xmm1 568: movd (%r9,%rdi), %xmm0 574: punpckldq %xmm0, %xmm1 578: movd (%r9,%r14), %xmm0 584: movd (%r9,%rdx), %xmm2 590: punpckldq %xmm2, %xmm0 594: punpckldq %xmm1, %xmm0 598: jmp 14 600: nopl (%rax,%rax) 608: paddd 48(%rsp), %xmm0 614: pshufd $245, %xmm0, %xmm1 619: movdqa 208(%rsp), %xmm2 628: pmuludq %xmm2, %xmm1 632: pshufd $232, %xmm1, %xmm1 637: pmuludq %xmm2, %xmm0 641: pshufd $232, %xmm0, %xmm0 646: punpckldq %xmm1, %xmm0 650: movdqa %xmm0, %xmm3 654: pxor %xmm4, %xmm3 658: movdqa 192(%rsp), %xmm7 667: pcmpgtd %xmm3, %xmm7 671: movdqa %xmm7, %xmm1 675: pand %xmm0, %xmm1 679: pshufd $78, %xmm1, %xmm2 684: movd %xmm2, %rdx 689: movd %xmm1, %rdi 694: movslq %edi, %rbx 697: movsd (%r8,%rbx), %xmm11 703: movss 8(%r8,%rbx), %xmm1 710: shufps $48, %xmm11, %xmm1 715: shufps $132, %xmm1, %xmm11 720: sarq $32, %rdi 724: movsd (%r8,%rdi), %xmm1 730: movss 8(%r8,%rdi), %xmm2 737: shufps $48, %xmm1, %xmm2 741: shufps $132, %xmm2, %xmm1 745: movslq %edx, %rdi 748: movsd (%r8,%rdi), %xmm2 754: movss 8(%r8,%rdi), %xmm4 761: shufps $48, %xmm2, %xmm4 765: shufps $132, %xmm4, %xmm2 769: sarq $32, %rdx 773: movsd (%r8,%rdx), %xmm4 779: movss 8(%r8,%rdx), %xmm5 786: shufps $48, %xmm4, %xmm5 790: shufps $132, %xmm5, %xmm4 794: movaps %xmm11, %xmm6 798: punpckldq %xmm1, %xmm6 802: movaps %xmm2, %xmm5 805: punpckldq %xmm4, %xmm5 809: punpckhdq %xmm1, %xmm11 814: punpckhdq %xmm4, %xmm2 818: movdqa %xmm6, %xmm10 823: punpcklqdq %xmm5, %xmm10 828: pand %xmm7, %xmm10 833: punpckhqdq %xmm5, %xmm6 837: pand %xmm7, %xmm6 841: punpcklqdq %xmm2, %xmm11 846: pand %xmm7, %xmm11 851: pand 16(%rsp), %xmm7 857: movdqa 176(%rsp), %xmm1 866: pcmpgtd %xmm3, %xmm1 870: pand %xmm1, %xmm0 874: pshufd $78, %xmm0, %xmm2 879: movd %xmm2, %rdx 884: movd %xmm0, %rdi 889: movslq %edi, %rbx 892: movdqu (%r13,%rbx), %xmm8 899: sarq $32, %rdi 903: movdqu (%r13,%rdi), %xmm0 910: movslq %edx, %rdi 913: movdqu (%r13,%rdi), %xmm2 920: sarq $32, %rdx 924: movdqu (%r13,%rdx), %xmm3 931: movdqa %xmm8, %xmm15 936: punpckldq %xmm0, %xmm15 941: movdqa %xmm2, %xmm4 945: punpckldq %xmm3, %xmm4 949: punpckhdq %xmm0, %xmm8 954: punpckhdq %xmm3, %xmm2 958: movdqa %xmm15, %xmm14 963: punpcklqdq %xmm4, %xmm14 968: pand %xmm1, %xmm14 973: punpckhqdq %xmm4, %xmm15 978: pand %xmm1, %xmm15 983: movdqa %xmm8, %xmm12 988: punpcklqdq %xmm2, %xmm12 993: pand %xmm1, %xmm12 998: punpckhqdq %xmm2, %xmm8 1003: pand %xmm1, %xmm8 1008: movdqa %xmm10, %xmm0 1013: unpcklps %xmm6, %xmm0 1016: movdqa %xmm11, %xmm2 1021: unpcklps %xmm7, %xmm2 1024: movdqa %xmm10, %xmm1 1029: unpckhps %xmm6, %xmm1 1032: movdqa %xmm11, %xmm13 1037: unpckhps %xmm7, %xmm13 1041: movaps %xmm0, %xmm3 1044: unpcklpd %xmm2, %xmm3 1048: movapd %xmm3, 288(%rsp) 1057: movhlps %xmm0, %xmm2 1060: movaps %xmm2, (%rsp) 1064: movaps %xmm1, %xmm0 1067: unpcklpd %xmm13, %xmm0 1072: movapd %xmm0, 32(%rsp) 1078: movhlps %xmm1, %xmm13 1082: movdqa %xmm10, %xmm5 1087: cmpnleps %xmm7, %xmm5 1091: movaps %xmm7, %xmm2 1094: movaps %xmm7, %xmm0 1097: addps %xmm10, %xmm0 1101: xorpd %xmm3, %xmm3 1105: cmpnleps %xmm0, %xmm3 1109: addps %xmm6, %xmm2 1112: xorps %xmm1, %xmm1 1115: cmpnleps %xmm2, %xmm1 1119: movaps %xmm6, %xmm2 1122: cmpnleps %xmm7, %xmm2 1126: movdqa %xmm11, %xmm9 1131: cmpnleps %xmm7, %xmm9 1136: movaps 16(%rsp), %xmm4 1141: divps %xmm7, %xmm4 1144: addps %xmm11, %xmm7 1148: xorps %xmm0, %xmm0 1151: cmpnleps %xmm7, %xmm0 1155: andps 160(%rsp), %xmm5 1163: andps 80(%rsp), %xmm9 1169: orps %xmm5, %xmm9 1173: andps 144(%rsp), %xmm3 1181: orps %xmm3, %xmm9 1185: andps 112(%rsp), %xmm1 1190: orps %xmm1, %xmm9 1194: andps 96(%rsp), %xmm0 1199: orps %xmm0, %xmm9 1203: movaps 288(%rsp), %xmm7 1211: movups %xmm7, 4(%rsi,%r11) 1217: movaps (%rsp), %xmm0 1221: movups %xmm0, 72(%rax) 1225: movaps 32(%rsp), %xmm0 1230: movups %xmm0, 140(%rax) 1237: movups %xmm13, 208(%rax) 1245: andps 128(%rsp), %xmm2 1253: orps %xmm2, %xmm9 1257: movaps 304(%rsp), %xmm5 1265: orps %xmm9, %xmm5 1269: movq 200(%r15), %rdx 1276: movss (%rdx), %xmm0 1280: shufps $0, %xmm0, %xmm0 1284: mulps %xmm4, %xmm10 1288: mulps %xmm0, %xmm10 1292: movss 12(%rdx), %xmm0 1297: shufps $0, %xmm0, %xmm0 1301: addps %xmm0, %xmm10 1305: movss 4(%rdx), %xmm0 1310: shufps $0, %xmm0, %xmm0 1314: mulps %xmm4, %xmm6 1317: mulps %xmm0, %xmm6 1320: movss 16(%rdx), %xmm0 1325: shufps $0, %xmm0, %xmm0 1329: addps %xmm0, %xmm6 1332: movss 8(%rdx), %xmm0 1337: shufps $0, %xmm0, %xmm0 1341: mulps %xmm4, %xmm11 1345: mulps %xmm0, %xmm11 1349: movss 20(%rdx), %xmm0 1354: shufps $0, %xmm0, %xmm0 1358: addps %xmm0, %xmm11 1362: movaps %xmm10, %xmm0 1366: unpcklps %xmm6, %xmm0 1369: unpckhps %xmm6, %xmm10 1373: movaps %xmm11, %xmm1 1377: unpcklps %xmm4, %xmm1 1380: unpckhps %xmm4, %xmm11 1384: movdqa 240(%rsp), %xmm4 1393: movaps %xmm0, %xmm2 1396: unpcklpd %xmm1, %xmm2 1400: movhlps %xmm0, %xmm1 1403: movaps %xmm10, %xmm0 1407: unpcklpd %xmm11, %xmm0 1412: movhlps %xmm10, %xmm11 1416: orps 64(%rsp), %xmm9 1422: movss %xmm9, (%rax) 1427: pshufd $229, %xmm9, %xmm3 1433: movd %xmm3, 68(%rax) 1438: pshufd $78, %xmm9, %xmm3 1444: movd %xmm3, 136(%rax) 1452: pshufd $231, %xmm9, %xmm3 1458: movd %xmm3, 204(%rax) 1466: movupd %xmm2, 20(%rsi,%r11) 1473: movups %xmm1, 88(%rax) 1477: movupd %xmm0, 156(%rax) 1485: movups %xmm11, 224(%rax) 1493: movdqa %xmm14, %xmm0 1498: unpcklps %xmm15, %xmm0 1502: unpckhps %xmm15, %xmm14 1506: movdqa %xmm12, %xmm1 1511: unpcklps %xmm8, %xmm1 1515: unpckhps %xmm8, %xmm12 1519: movaps %xmm0, %xmm2 1522: unpcklpd %xmm1, %xmm2 1526: movhlps %xmm0, %xmm1 1529: movaps %xmm14, %xmm0 1533: unpcklpd %xmm12, %xmm0 1538: movhlps %xmm14, %xmm12 1542: movupd %xmm2, 36(%rsi,%r11) 1549: movups %xmm1, 104(%rax) 1553: movupd %xmm0, 172(%rax) 1561: movups %xmm12, 240(%rax) 1569: movups %xmm7, 52(%rsi,%r11) 1575: movaps (%rsp), %xmm0 1579: movups %xmm0, 120(%rax) 1583: movaps 32(%rsp), %xmm0 1588: movups %xmm0, 188(%rax) 1595: movups %xmm13, 256(%rax) 1603: addl $4, %r10d 1607: cmpl %ecx, %r10d 1610: jb -1168 1616: movd %xmm5, %rax 1621: pshufd $78, %xmm5, %xmm0 1626: movd %xmm0, %rcx 1631: orq %rax, %rcx 1634: setne %al 1637: leaq -40(%rbp), %rsp 1641: popq %rbx 1642: popq %r12 1644: popq %r13 1646: popq %r14 1648: popq %r15 1650: popq %rbp 1651: retq