define void @fetch_b8g8r8a8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $8, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $16, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $24, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: pinsrd $3, %eax, %xmm0 78: pand (%rdx), %xmm0 82: cvtdq2ps %xmm0, %xmm0 85: mulps (%rcx), %xmm0 88: pshufd $198, %xmm0, %xmm0 93: movdqa %xmm0, (%rdi) 97: popq %rbp 98: ret define void @fetch_b8g8r8a8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = lshr i32 %7, 16 %9 = and i32 %8, 255 %10 = and i32 %7, -16711936 %11 = or i32 %9, %10 %12 = shl i32 %7, 16 %13 = and i32 %12, 16711680 %14 = or i32 %11, %13 %15 = bitcast i32 %14 to <4 x i8> store <4 x i8> %15, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B8G8R8A8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movl %eax, %ecx 8: andl $4278255360, %ecx 14: movl %eax, %edx 16: shrl $16, %edx 19: movzbl %dl, %edx 22: orl %ecx, %edx 24: shll $16, %eax 27: andl $16711680, %eax 33: orl %edx, %eax 35: movl %eax, (%rdi) 37: popq %rbp 38: ret define void @fetch_b8g8r8x8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B8G8R8A8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $2, %xmm0, %eax 19: pextrd $1, %xmm0, %ecx 25: shrl $8, %ecx 28: pextrd $3, %xmm0, %edx 34: shrl %cl, %edx 36: pinsrd $1, %ecx, %xmm0 42: shrl $16, %eax 45: pinsrd $2, %eax, %xmm0 51: movabsq $139845047042064, %rax 61: movaps (%rax), %xmm1 64: movabsq $139845047042080, %rax 74: movabsq $139845047042096, %rcx 84: pinsrd $3, %edx, %xmm0 90: pand (%rcx), %xmm0 94: cvtdq2ps %xmm0, %xmm0 97: mulps (%rax), %xmm0 100: shufps $1, %xmm0, %xmm1 104: shufps $38, %xmm1, %xmm0 108: movaps %xmm0, (%rdi) 111: popq %rbp 112: ret define void @fetch_b8g8r8x8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = lshr i32 %7, 16 %9 = and i32 %8, 255 %10 = or i32 %9, bitcast (<4 x i8> to i32) %11 = and i32 %7, 65280 %12 = or i32 %10, %11 %13 = shl i32 %7, 16 %14 = and i32 %13, 16711680 %15 = or i32 %12, %14 %16 = bitcast i32 %15 to <4 x i8> store <4 x i8> %16, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B8G8R8X8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movl %eax, %ecx 8: shrl $16, %ecx 11: movzbl %cl, %ecx 14: movabsq $139845047042064, %rdx 24: orl (%rdx), %ecx 26: movl %eax, %edx 28: andl $65280, %edx 34: orl %ecx, %edx 36: shll $16, %eax 39: andl $16711680, %eax 45: orl %edx, %eax 47: movl %eax, (%rdi) 49: popq %rbp 50: ret define void @fetch_a8r8g8b8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B8G8R8X8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $8, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $16, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $24, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: pinsrd $3, %eax, %xmm0 78: pand (%rdx), %xmm0 82: cvtdq2ps %xmm0, %xmm0 85: mulps (%rcx), %xmm0 88: pshufd $57, %xmm0, %xmm0 93: movdqa %xmm0, (%rdi) 97: popq %rbp 98: ret define void @fetch_a8r8g8b8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = lshr i32 %7, 8 %9 = shl i32 %7, 24 %10 = or i32 %8, %9 %11 = bitcast i32 %10 to <4 x i8> store <4 x i8> %11, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_A8R8G8B8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: roll $24, %eax 9: movl %eax, (%rdi) 11: popq %rbp 12: ret define void @fetch_x8r8g8b8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_A8R8G8B8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: movd %xmm0, %eax 17: shrl %cl, %eax 19: movd %eax, %xmm1 23: pextrd $1, %xmm0, %eax 29: shrl $8, %eax 32: pinsrd $1, %eax, %xmm1 38: pextrd $2, %xmm0, %eax 44: shrl $16, %eax 47: movabsq $139845047042064, %rcx 57: movaps (%rcx), %xmm2 60: movabsq $139845047042080, %rcx 70: movabsq $139845047042096, %rdx 80: pinsrd $2, %eax, %xmm1 86: pextrd $3, %xmm0, %eax 92: shrl $24, %eax 95: pinsrd $3, %eax, %xmm1 101: pand (%rdx), %xmm1 105: cvtdq2ps %xmm1, %xmm0 108: mulps (%rcx), %xmm0 111: shufps $49, %xmm0, %xmm2 115: shufps $41, %xmm2, %xmm0 119: movaps %xmm0, (%rdi) 122: popq %rbp 123: ret define void @fetch_x8r8g8b8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = lshr i32 %7, 8 %9 = or i32 %8, bitcast (<4 x i8> to i32) %10 = bitcast i32 %9 to <4 x i8> store <4 x i8> %10, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_X8R8G8B8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shrl $8, %eax 9: movabsq $139845047042064, %rcx 19: orl (%rcx), %eax 21: movl %eax, (%rdi) 23: popq %rbp 24: ret define void @fetch_b5g5r5a1_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_X8R8G8B8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $3, %xmm0, %eax 22: pextrd $2, %xmm0, %ecx 28: pextrd $1, %xmm0, %edx 34: shrl $5, %edx 37: pinsrd $1, %edx, %xmm0 43: shrl $10, %ecx 46: movabsq $139845047042064, %rdx 56: movabsq $139845047042080, %rsi 66: pinsrd $2, %ecx, %xmm0 72: shrl $15, %eax 75: pinsrd $3, %eax, %xmm0 81: pand (%rsi), %xmm0 85: cvtdq2ps %xmm0, %xmm0 88: mulps (%rdx), %xmm0 91: pshufd $198, %xmm0, %xmm0 96: movdqa %xmm0, (%rdi) 100: popq %rbp 101: ret define void @fetch_b5g5r5a1_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = lshr i32 %32, 16 %34 = and i32 %33, 255 %35 = and i32 %32, -16711936 %36 = or i32 %34, %35 %37 = shl i32 %32, 16 %38 = and i32 %37, 16711680 %39 = or i32 %36, %38 %40 = bitcast i32 %39 to <4 x i8> store <4 x i8> %40, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B5G5R5A1_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: pshufd $0, %xmm0, %xmm0 19: pextrd $3, %xmm0, %eax 25: pextrd $2, %xmm0, %ecx 31: pextrd $1, %xmm0, %edx 37: shrl $5, %edx 40: pinsrd $1, %edx, %xmm0 46: shrl $10, %ecx 49: movabsq $139845047042064, %rdx 59: movabsq $139845047042080, %rsi 69: movabsq $139845047042096, %r8 79: movabsq $139845047042112, %r9 89: movabsq $139845047042128, %r10 99: movabsq $139845047042144, %r11 109: xorps %xmm1, %xmm1 112: movabsq $139845047042160, %rbx 122: movabsq $139845047042176, %r14 132: pinsrd $2, %ecx, %xmm0 138: shrl $15, %eax 141: pinsrd $3, %eax, %xmm0 147: pand (%r14), %xmm0 152: cvtdq2ps %xmm0, %xmm0 155: mulps (%rbx), %xmm0 158: maxps %xmm1, %xmm0 161: minps (%r11), %xmm0 165: mulps (%r10), %xmm0 169: addps (%r9), %xmm0 173: andps (%r8), %xmm0 177: pshufd $1, %xmm0, %xmm1 182: pshufd $3, %xmm0, %xmm2 187: movaps %xmm0, %xmm3 190: movhlps %xmm3, %xmm3 193: punpcklwd %xmm2, %xmm3 197: pshufb (%rsi), %xmm3 202: punpcklwd %xmm1, %xmm0 206: pshufb (%rdx), %xmm0 211: por %xmm3, %xmm0 215: movd %xmm0, %eax 219: movl %eax, %ecx 221: andl $4278255360, %ecx 227: movl %eax, %edx 229: shrl $16, %edx 232: movzbl %dl, %edx 235: orl %ecx, %edx 237: shll $16, %eax 240: andl $16711680, %eax 246: orl %edx, %eax 248: movl %eax, (%rdi) 250: popq %rbx 251: popq %r14 253: popq %rbp 254: ret define void @fetch_b4g4r4a4_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B5G5R5A1_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $3, %xmm0, %eax 22: pextrd $2, %xmm0, %ecx 28: pextrd $1, %xmm0, %edx 34: shrl $4, %edx 37: pinsrd $1, %edx, %xmm0 43: shrl $8, %ecx 46: movabsq $139845047042064, %rdx 56: movabsq $139845047042080, %rsi 66: pinsrd $2, %ecx, %xmm0 72: shrl $12, %eax 75: pinsrd $3, %eax, %xmm0 81: pand (%rsi), %xmm0 85: cvtdq2ps %xmm0, %xmm0 88: mulps (%rdx), %xmm0 91: pshufd $198, %xmm0, %xmm0 96: movdqa %xmm0, (%rdi) 100: popq %rbp 101: ret define void @fetch_b4g4r4a4_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = lshr i32 %32, 16 %34 = and i32 %33, 255 %35 = and i32 %32, -16711936 %36 = or i32 %34, %35 %37 = shl i32 %32, 16 %38 = and i32 %37, 16711680 %39 = or i32 %36, %38 %40 = bitcast i32 %39 to <4 x i8> store <4 x i8> %40, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B4G4R4A4_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: pshufd $0, %xmm0, %xmm0 19: pextrd $3, %xmm0, %eax 25: pextrd $2, %xmm0, %ecx 31: pextrd $1, %xmm0, %edx 37: shrl $4, %edx 40: pinsrd $1, %edx, %xmm0 46: shrl $8, %ecx 49: movabsq $139845047042064, %rdx 59: movabsq $139845047042080, %rsi 69: movabsq $139845047042096, %r8 79: movabsq $139845047042112, %r9 89: movabsq $139845047042128, %r10 99: movabsq $139845047042144, %r11 109: xorps %xmm1, %xmm1 112: movabsq $139845047042160, %rbx 122: movabsq $139845047042176, %r14 132: pinsrd $2, %ecx, %xmm0 138: shrl $12, %eax 141: pinsrd $3, %eax, %xmm0 147: pand (%r14), %xmm0 152: cvtdq2ps %xmm0, %xmm0 155: mulps (%rbx), %xmm0 158: maxps %xmm1, %xmm0 161: minps (%r11), %xmm0 165: mulps (%r10), %xmm0 169: addps (%r9), %xmm0 173: andps (%r8), %xmm0 177: pshufd $1, %xmm0, %xmm1 182: pshufd $3, %xmm0, %xmm2 187: movaps %xmm0, %xmm3 190: movhlps %xmm3, %xmm3 193: punpcklwd %xmm2, %xmm3 197: pshufb (%rsi), %xmm3 202: punpcklwd %xmm1, %xmm0 206: pshufb (%rdx), %xmm0 211: por %xmm3, %xmm0 215: movd %xmm0, %eax 219: movl %eax, %ecx 221: andl $4278255360, %ecx 227: movl %eax, %edx 229: shrl $16, %edx 232: movzbl %dl, %edx 235: orl %ecx, %edx 237: shll $16, %eax 240: andl $16711680, %eax 246: orl %edx, %eax 248: movl %eax, (%rdi) 250: popq %rbx 251: popq %r14 253: popq %rbp 254: ret define void @fetch_b5g6r5_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B4G4R4A4_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: pextrd $1, %xmm0, %ecx 28: shrl $5, %ecx 31: pextrd $3, %xmm0, %edx 37: shrl %cl, %edx 39: pinsrd $1, %ecx, %xmm0 45: movabsq $139845047042064, %rcx 55: movaps (%rcx), %xmm1 58: shrl $11, %eax 61: movabsq $139845047042080, %rcx 71: movabsq $139845047042096, %rsi 81: pinsrd $2, %eax, %xmm0 87: pinsrd $3, %edx, %xmm0 93: pand (%rsi), %xmm0 97: cvtdq2ps %xmm0, %xmm0 100: mulps (%rcx), %xmm0 103: shufps $1, %xmm0, %xmm1 107: shufps $38, %xmm1, %xmm0 111: movaps %xmm0, (%rdi) 114: popq %rbp 115: ret define void @fetch_b5g6r5_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = lshr i32 %32, 16 %34 = and i32 %33, 255 %35 = or i32 %34, bitcast (<4 x i8> to i32) %36 = and i32 %32, 65280 %37 = or i32 %35, %36 %38 = shl i32 %32, 16 %39 = and i32 %38, 16711680 %40 = or i32 %37, %39 %41 = bitcast i32 %40 to <4 x i8> store <4 x i8> %41, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B5G6R5_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: pshufd $0, %xmm0, %xmm0 19: pextrd $2, %xmm0, %eax 25: pextrd $1, %xmm0, %ecx 31: shrl $5, %ecx 34: pextrd $3, %xmm0, %edx 40: shrl %cl, %edx 42: pinsrd $1, %ecx, %xmm0 48: shrl $11, %eax 51: movabsq $139845047042064, %rcx 61: movabsq $139845047042080, %rsi 71: movabsq $139845047042096, %r8 81: movabsq $139845047042112, %r9 91: movabsq $139845047042128, %r10 101: movabsq $139845047042144, %r11 111: movabsq $139845047042160, %rbx 121: xorps %xmm1, %xmm1 124: movabsq $139845047042176, %r14 134: pinsrd $2, %eax, %xmm0 140: movabsq $139845047042192, %rax 150: pinsrd $3, %edx, %xmm0 156: pand (%rax), %xmm0 160: cvtdq2ps %xmm0, %xmm0 163: mulps (%r14), %xmm0 167: maxps %xmm1, %xmm0 170: minps (%rbx), %xmm0 173: mulps (%r11), %xmm0 177: addps (%r10), %xmm0 181: andps (%r9), %xmm0 185: pshufd $1, %xmm0, %xmm1 190: pshufd $3, %xmm0, %xmm2 195: movaps %xmm0, %xmm3 198: movhlps %xmm3, %xmm3 201: punpcklwd %xmm2, %xmm3 205: pshufb (%r8), %xmm3 211: punpcklwd %xmm1, %xmm0 215: pshufb (%rsi), %xmm0 220: por %xmm3, %xmm0 224: movd %xmm0, %eax 228: movl %eax, %edx 230: shrl $16, %edx 233: movzbl %dl, %edx 236: orl (%rcx), %edx 238: movl %eax, %ecx 240: andl $65280, %ecx 246: orl %edx, %ecx 248: shll $16, %eax 251: andl $16711680, %eax 257: orl %ecx, %eax 259: movl %eax, (%rdi) 261: popq %rbx 262: popq %r14 264: popq %rbp 265: ret define void @fetch_r10g10b10a2_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, store <4 x float> %11, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B5G6R5_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $10, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $20, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $30, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: pinsrd $3, %eax, %xmm0 78: pand (%rdx), %xmm0 82: cvtdq2ps %xmm0, %xmm0 85: mulps (%rcx), %xmm0 88: movaps %xmm0, (%rdi) 91: popq %rbp 92: ret define void @fetch_r10g10b10a2_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> store <4 x i8> %30, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R10G10B10A2_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: pextrd $3, %xmm0, %eax 20: pextrd $2, %xmm0, %ecx 26: pextrd $1, %xmm0, %edx 32: shrl $10, %edx 35: pinsrd $1, %edx, %xmm0 41: shrl $20, %ecx 44: pinsrd $2, %ecx, %xmm0 50: shrl $30, %eax 53: movabsq $139845047042064, %rcx 63: movabsq $139845047042080, %rdx 73: movabsq $139845047042096, %rsi 83: movabsq $139845047042112, %r8 93: movabsq $139845047042128, %r9 103: movabsq $139845047042144, %r10 113: xorps %xmm1, %xmm1 116: movabsq $139845047042160, %r11 126: movabsq $139845047042176, %rbx 136: pinsrd $3, %eax, %xmm0 142: pand (%rbx), %xmm0 146: cvtdq2ps %xmm0, %xmm0 149: mulps (%r11), %xmm0 153: maxps %xmm1, %xmm0 156: minps (%r10), %xmm0 160: mulps (%r9), %xmm0 164: addps (%r8), %xmm0 168: andps (%rsi), %xmm0 171: pshufd $1, %xmm0, %xmm1 176: pshufd $3, %xmm0, %xmm2 181: movaps %xmm0, %xmm3 184: movhlps %xmm3, %xmm3 187: punpcklwd %xmm2, %xmm3 191: pshufb (%rdx), %xmm3 196: punpcklwd %xmm1, %xmm0 200: pshufb (%rcx), %xmm0 205: por %xmm3, %xmm0 209: movd %xmm0, (%rdi) 213: popq %rbx 214: popq %rbp 215: ret define void @fetch_l8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R10G10B10A2_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movaps (%rax), %xmm1 50: movabsq $139845047042080, %rax 60: movss (%rax), %xmm2 64: mulps %xmm0, %xmm2 67: shufps $1, %xmm2, %xmm1 71: shufps $32, %xmm1, %xmm2 75: movaps %xmm2, (%rdi) 78: popq %rbp 79: ret define void @fetch_l8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> , i8 %4, i32 0 %6 = bitcast <4 x i8> %5 to i32 %7 = and i32 %6, 255 %8 = or i32 %7, bitcast (<4 x i8> to i32) %9 = shl i32 %6, 8 %10 = and i32 %9, 65280 %11 = or i32 %8, %10 %12 = shl i32 %6, 16 %13 = and i32 %12, 16711680 %14 = or i32 %11, %13 %15 = bitcast i32 %14 to <4 x i8> store <4 x i8> %15, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_L8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: movd %xmm0, %eax 15: movzbl %al, %ecx 18: movabsq $139845047042064, %rdx 28: orl (%rdx), %ecx 30: movl %eax, %edx 32: shll $8, %edx 35: movzwl %dx, %edx 38: orl %ecx, %edx 40: shll $16, %eax 43: andl $16711680, %eax 49: orl %edx, %eax 51: movl %eax, (%rdi) 53: popq %rbp 54: ret define void @fetch_a8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_L8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movss (%rax), %xmm1 51: mulps %xmm0, %xmm1 54: pslldq $12, %xmm1 59: movdqa %xmm1, (%rdi) 63: popq %rbp 64: ret define void @fetch_a8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> , i8 %4, i32 0 %6 = bitcast <4 x i8> %5 to i32 %7 = shl i32 %6, 24 %8 = bitcast i32 %7 to <4 x i8> store <4 x i8> %8, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_A8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: movd %xmm0, %eax 15: shll $24, %eax 18: movl %eax, (%rdi) 20: popq %rbp 21: ret define void @fetch_i8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_A8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movss (%rax), %xmm1 51: mulps %xmm0, %xmm1 54: pshufd $0, %xmm1, %xmm0 59: movdqa %xmm0, (%rdi) 63: popq %rbp 64: ret define void @fetch_i8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> , i8 %4, i32 0 %6 = and <4 x i8> %5, %7 = bitcast <4 x i8> %6 to i32 %8 = shl i32 %7, 8 %9 = or i32 %7, %8 %10 = shl i32 %9, 16 %11 = or i32 %9, %10 %12 = bitcast i32 %11 to <4 x i8> store <4 x i8> %12, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_I8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: andpd %xmm0, %xmm1 24: movd %xmm1, %eax 28: movl %eax, %ecx 30: shll $8, %ecx 33: orl %eax, %ecx 35: movl %ecx, %eax 37: shll $16, %eax 40: orl %ecx, %eax 42: movl %eax, (%rdi) 44: popq %rbp 45: ret define void @fetch_l8a8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_I8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $8, %edx 39: pinsrd $1, %edx, %xmm0 45: movabsq $139845047042064, %rdx 55: movabsq $139845047042080, %rsi 65: pinsrd $2, %eax, %xmm0 71: shrl %cl, %ecx 73: pinsrd $3, %ecx, %xmm0 79: pand (%rsi), %xmm0 83: cvtdq2ps %xmm0, %xmm0 86: mulps (%rdx), %xmm0 89: pshufd $64, %xmm0, %xmm0 94: movdqa %xmm0, (%rdi) 98: popq %rbp 99: ret define void @fetch_l8a8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = bitcast i32 %6 to <4 x i8> %8 = bitcast <4 x i8> %7 to i32 %9 = and i32 %8, 255 %10 = shl i32 %8, 8 %11 = and i32 %10, 65280 %12 = or i32 %9, %11 %13 = shl i32 %8, 16 %14 = or i32 %12, %13 %15 = bitcast i32 %14 to <4 x i8> store <4 x i8> %15, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_L8A8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movzbl %al, %ecx 10: movl %eax, %edx 12: shll $8, %edx 15: movzwl %dx, %edx 18: orl %ecx, %edx 20: shll $16, %eax 23: orl %edx, %eax 25: movl %eax, (%rdi) 27: popq %rbp 28: ret define void @fetch_l16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_L8A8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movaps (%rax), %xmm1 50: movabsq $139845047042080, %rax 60: movss (%rax), %xmm2 64: mulps %xmm0, %xmm2 67: shufps $1, %xmm2, %xmm1 71: shufps $32, %xmm1, %xmm2 75: movaps %xmm2, (%rdi) 78: popq %rbp 79: ret define void @fetch_l16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = and i32 %32, 255 %34 = or i32 %33, bitcast (<4 x i8> to i32) %35 = shl i32 %32, 8 %36 = and i32 %35, 65280 %37 = or i32 %34, %36 %38 = shl i32 %32, 16 %39 = and i32 %38, 16711680 %40 = or i32 %37, %39 %41 = bitcast i32 %40 to <4 x i8> store <4 x i8> %41, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_L16_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: movabsq $139845047042064, %rax 44: movss (%rax), %xmm0 48: cvtdq2ps %xmm1, %xmm1 51: movabsq $139845047042080, %rax 61: movabsq $139845047042096, %rcx 71: movabsq $139845047042112, %rdx 81: movabsq $139845047042128, %rsi 91: movabsq $139845047042144, %r8 101: movabsq $139845047042160, %r9 111: movabsq $139845047042176, %r10 121: xorps %xmm2, %xmm2 124: mulps %xmm1, %xmm0 127: maxps %xmm2, %xmm0 130: minps (%r10), %xmm0 134: mulps (%r9), %xmm0 138: addps (%r8), %xmm0 142: andps (%rsi), %xmm0 145: pshufd $1, %xmm0, %xmm1 150: pshufd $3, %xmm0, %xmm2 155: movaps %xmm0, %xmm3 158: movhlps %xmm3, %xmm3 161: punpcklwd %xmm2, %xmm3 165: pshufb (%rdx), %xmm3 170: punpcklwd %xmm1, %xmm0 174: pshufb (%rcx), %xmm0 179: por %xmm3, %xmm0 183: movd %xmm0, %ecx 187: movzbl %cl, %edx 190: orl (%rax), %edx 192: movl %ecx, %eax 194: shll $8, %eax 197: movzwl %ax, %eax 200: orl %edx, %eax 202: shll $16, %ecx 205: andl $16711680, %ecx 211: orl %eax, %ecx 213: movl %ecx, (%rdi) 215: popq %rbp 216: ret define void @fetch_uyvy_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = or i32 %6, 8 %8 = lshr i32 %5, %7 %9 = lshr i32 %5, 16 %y = and i32 %8, 255 %u = and i32 %5, 255 %v = and i32 %9, 255 %10 = add i32 %u, -128 %11 = add i32 %v, -128 %tmp = mul i32 %y, 298 %12 = add i32 %tmp, -4640 %13 = mul i32 %11, 409 %14 = mul i32 %11, -208 %15 = mul i32 %10, -100 %16 = add i32 %15, %14 %17 = mul i32 %10, 516 %18 = add i32 %13, %12 %19 = add i32 %16, %12 %20 = add i32 %17, %12 %r = ashr i32 %18, 8 %g = ashr i32 %19, 8 %b = ashr i32 %20, 8 %21 = icmp slt i32 %r, 255 %22 = select i1 %21, i32 %r, i32 255 %23 = icmp sgt i32 %22, 0 %24 = select i1 %23, i32 %22, i32 0 %25 = icmp slt i32 %g, 255 %26 = select i1 %25, i32 %g, i32 255 %27 = icmp sgt i32 %26, 0 %28 = icmp slt i32 %b, 255 %29 = select i1 %28, i32 %b, i32 255 %30 = icmp sgt i32 %29, 0 %.op = shl nsw i32 %26, 8 %31 = select i1 %27, i32 %.op, i32 0 %.op1 = shl i32 %29, 16 %32 = select i1 %30, i32 %.op1, i32 0 %33 = or i32 %24, %31 %34 = or i32 %33, %32 %35 = or i32 %34, -16777216 %36 = bitcast i32 %35 to <4 x i8> %37 = extractelement <4 x i8> %36, i32 0 %38 = zext i8 %37 to i32 %39 = insertelement <4 x i32> undef, i32 %38, i32 0 %40 = extractelement <4 x i8> %36, i32 1 %41 = zext i8 %40 to i32 %42 = insertelement <4 x i32> %39, i32 %41, i32 1 %43 = extractelement <4 x i8> %36, i32 2 %44 = zext i8 %43 to i32 %45 = insertelement <4 x i32> %42, i32 %44, i32 2 %46 = extractelement <4 x i8> %36, i32 3 %47 = zext i8 %46 to i32 %48 = insertelement <4 x i32> %45, i32 %47, i32 3 %49 = sitofp <4 x i32> %48 to <4 x float> %50 = fmul <4 x float> %49, store <4 x float> %50, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_L16_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movl (%rsi), %eax 9: movl %eax, %ecx 11: shrl $16, %ecx 14: movzbl %cl, %ecx 17: addl $128, %ecx 20: movzbl %al, %esi 23: addl $128, %esi 26: imull $156, %esi, %r8d 30: imull $4294967088, %ecx, %r9d 37: imull $409, %ecx, %r10d 44: xorl %r11d, %r11d 47: movl $255, %ebx 52: imull $516, %esi, %esi 58: movabsq $139845047042064, %r14 68: addl %r8d, %r9d 71: shll $4, %edx 74: leal 8(%rdx), %ecx 77: shrl %cl, %eax 79: movzbl %al, %eax 82: imull $298, %eax, %eax 88: leal -4640(%rax,%r9), %ecx 96: sarl $8, %ecx 99: cmpl $255, %ecx 105: cmovgel %ebx, %ecx 108: movl %ecx, %edx 110: shll $8, %edx 113: testl %ecx, %ecx 115: cmovlel %r11d, %edx 119: leal -4640(%r10,%rax), %ecx 127: sarl $8, %ecx 130: cmpl $255, %ecx 136: cmovgel %ebx, %ecx 139: testl %ecx, %ecx 141: cmovlel %r11d, %ecx 145: orl %edx, %ecx 147: leal -4640(%rsi,%rax), %eax 154: sarl $8, %eax 157: cmpl $255, %eax 163: cmovgel %ebx, %eax 166: movl %eax, %edx 168: shll $16, %edx 171: testl %eax, %eax 173: cmovlel %r11d, %edx 177: orl %ecx, %edx 179: orl $4278190080, %edx 185: movd %edx, %xmm0 189: pextrb $1, %xmm0, %eax 195: pextrb $0, %xmm0, %ecx 201: movd %ecx, %xmm1 205: pinsrd $1, %eax, %xmm1 211: pextrb $2, %xmm0, %eax 217: pinsrd $2, %eax, %xmm1 223: pextrb $3, %xmm0, %eax 229: pinsrd $3, %eax, %xmm1 235: cvtdq2ps %xmm1, %xmm0 238: mulps (%r14), %xmm0 242: movaps %xmm0, (%rdi) 245: popq %rbx 246: popq %r14 248: popq %rbp 249: ret define void @fetch_uyvy_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = or i32 %6, 8 %8 = lshr i32 %5, %7 %9 = lshr i32 %5, 16 %y = and i32 %8, 255 %u = and i32 %5, 255 %v = and i32 %9, 255 %10 = add i32 %u, -128 %11 = add i32 %v, -128 %tmp = mul i32 %y, 298 %12 = add i32 %tmp, -4640 %13 = mul i32 %11, 409 %14 = mul i32 %11, -208 %15 = mul i32 %10, -100 %16 = add i32 %15, %14 %17 = mul i32 %10, 516 %18 = add i32 %13, %12 %19 = add i32 %16, %12 %20 = add i32 %17, %12 %r = ashr i32 %18, 8 %g = ashr i32 %19, 8 %b = ashr i32 %20, 8 %21 = icmp slt i32 %r, 255 %22 = select i1 %21, i32 %r, i32 255 %23 = icmp sgt i32 %22, 0 %24 = select i1 %23, i32 %22, i32 0 %25 = icmp slt i32 %g, 255 %26 = select i1 %25, i32 %g, i32 255 %27 = icmp sgt i32 %26, 0 %28 = icmp slt i32 %b, 255 %29 = select i1 %28, i32 %b, i32 255 %30 = icmp sgt i32 %29, 0 %.op = shl nsw i32 %26, 8 %31 = select i1 %27, i32 %.op, i32 0 %.op1 = shl i32 %29, 16 %32 = select i1 %30, i32 %.op1, i32 0 %33 = or i32 %24, %31 %34 = or i32 %33, %32 %35 = or i32 %34, -16777216 %36 = bitcast i32 %35 to <4 x i8> store <4 x i8> %36, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_UYVY (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movl (%rsi), %eax 7: movl %eax, %ecx 9: shrl $16, %ecx 12: movzbl %cl, %ecx 15: addl $128, %ecx 18: movzbl %al, %esi 21: addl $128, %esi 24: imull $156, %esi, %r8d 28: imull $4294967088, %ecx, %r9d 35: imull $409, %ecx, %r10d 42: xorl %r11d, %r11d 45: movl $255, %ebx 50: imull $516, %esi, %esi 56: addl %r8d, %r9d 59: shll $4, %edx 62: leal 8(%rdx), %ecx 65: shrl %cl, %eax 67: movzbl %al, %eax 70: imull $298, %eax, %eax 76: leal -4640(%rax,%r9), %ecx 84: sarl $8, %ecx 87: cmpl $255, %ecx 93: cmovgel %ebx, %ecx 96: movl %ecx, %edx 98: shll $8, %edx 101: testl %ecx, %ecx 103: cmovlel %r11d, %edx 107: leal -4640(%r10,%rax), %ecx 115: sarl $8, %ecx 118: cmpl $255, %ecx 124: cmovgel %ebx, %ecx 127: testl %ecx, %ecx 129: cmovlel %r11d, %ecx 133: orl %edx, %ecx 135: leal -4640(%rsi,%rax), %eax 142: sarl $8, %eax 145: cmpl $255, %eax 151: cmovgel %ebx, %eax 154: movl %eax, %edx 156: shll $16, %edx 159: testl %eax, %eax 161: cmovlel %r11d, %edx 165: orl %ecx, %edx 167: orl $4278190080, %edx 173: movl %edx, (%rdi) 175: popq %rbx 176: popq %rbp 177: ret define void @fetch_yuyv_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = lshr i32 %5, %6 %8 = lshr i32 %5, 8 %9 = lshr i32 %5, 24 %y = and i32 %7, 255 %u = and i32 %8, 255 %10 = add i32 %u, -128 %11 = add i32 %9, -128 %tmp = mul i32 %y, 298 %12 = add i32 %tmp, -4640 %13 = mul i32 %11, 409 %14 = mul i32 %11, -208 %15 = mul i32 %10, -100 %16 = add i32 %15, %14 %17 = mul i32 %10, 516 %18 = add i32 %13, %12 %19 = add i32 %16, %12 %20 = add i32 %17, %12 %r = ashr i32 %18, 8 %g = ashr i32 %19, 8 %b = ashr i32 %20, 8 %21 = icmp slt i32 %r, 255 %22 = select i1 %21, i32 %r, i32 255 %23 = icmp sgt i32 %22, 0 %24 = select i1 %23, i32 %22, i32 0 %25 = icmp slt i32 %g, 255 %26 = select i1 %25, i32 %g, i32 255 %27 = icmp sgt i32 %26, 0 %28 = icmp slt i32 %b, 255 %29 = select i1 %28, i32 %b, i32 255 %30 = icmp sgt i32 %29, 0 %.op = shl nsw i32 %26, 8 %31 = select i1 %27, i32 %.op, i32 0 %.op1 = shl i32 %29, 16 %32 = select i1 %30, i32 %.op1, i32 0 %33 = or i32 %24, %31 %34 = or i32 %33, %32 %35 = or i32 %34, -16777216 %36 = bitcast i32 %35 to <4 x i8> %37 = extractelement <4 x i8> %36, i32 0 %38 = zext i8 %37 to i32 %39 = insertelement <4 x i32> undef, i32 %38, i32 0 %40 = extractelement <4 x i8> %36, i32 1 %41 = zext i8 %40 to i32 %42 = insertelement <4 x i32> %39, i32 %41, i32 1 %43 = extractelement <4 x i8> %36, i32 2 %44 = zext i8 %43 to i32 %45 = insertelement <4 x i32> %42, i32 %44, i32 2 %46 = extractelement <4 x i8> %36, i32 3 %47 = zext i8 %46 to i32 %48 = insertelement <4 x i32> %45, i32 %47, i32 3 %49 = sitofp <4 x i32> %48 to <4 x float> %50 = fmul <4 x float> %49, store <4 x float> %50, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_UYVY (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: movb %dl, %cl 11: movl %eax, %edx 13: shrl %cl, %edx 15: movzbl %dl, %ecx 18: movzbl %ah, %edx 21: shrl $24, %eax 24: addl $128, %eax 27: imull $298, %ecx, %ecx 33: imull $4294967088, %eax, %esi 39: addl $128, %edx 42: imull $156, %edx, %r8d 46: imull $409, %eax, %eax 52: xorl %r9d, %r9d 55: movl $255, %r10d 61: movabsq $139845047042064, %r11 71: addl %esi, %r8d 74: leal -4640(%rcx,%r8), %esi 82: sarl $8, %esi 85: cmpl $255, %esi 91: cmovgel %r10d, %esi 95: movl %esi, %r8d 98: shll $8, %r8d 102: testl %esi, %esi 104: leal -4640(%rax,%rcx), %eax 111: cmovlel %r9d, %r8d 115: sarl $8, %eax 118: cmpl $255, %eax 124: cmovgel %r10d, %eax 128: testl %eax, %eax 130: cmovlel %r9d, %eax 134: imull $516, %edx, %edx 140: leal -4640(%rdx,%rcx), %ecx 147: orl %r8d, %eax 150: sarl $8, %ecx 153: cmpl $255, %ecx 159: cmovgel %r10d, %ecx 163: movl %ecx, %edx 165: shll $16, %edx 168: testl %ecx, %ecx 170: cmovlel %r9d, %edx 174: orl %eax, %edx 176: orl $4278190080, %edx 182: movd %edx, %xmm0 186: pextrb $1, %xmm0, %eax 192: pextrb $0, %xmm0, %ecx 198: movd %ecx, %xmm1 202: pinsrd $1, %eax, %xmm1 208: pextrb $2, %xmm0, %eax 214: pinsrd $2, %eax, %xmm1 220: pextrb $3, %xmm0, %eax 226: pinsrd $3, %eax, %xmm1 232: cvtdq2ps %xmm1, %xmm0 235: mulps (%r11), %xmm0 239: movaps %xmm0, (%rdi) 242: popq %rbp 243: ret define void @fetch_yuyv_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = lshr i32 %5, %6 %8 = lshr i32 %5, 8 %9 = lshr i32 %5, 24 %y = and i32 %7, 255 %u = and i32 %8, 255 %10 = add i32 %u, -128 %11 = add i32 %9, -128 %tmp = mul i32 %y, 298 %12 = add i32 %tmp, -4640 %13 = mul i32 %11, 409 %14 = mul i32 %11, -208 %15 = mul i32 %10, -100 %16 = add i32 %15, %14 %17 = mul i32 %10, 516 %18 = add i32 %13, %12 %19 = add i32 %16, %12 %20 = add i32 %17, %12 %r = ashr i32 %18, 8 %g = ashr i32 %19, 8 %b = ashr i32 %20, 8 %21 = icmp slt i32 %r, 255 %22 = select i1 %21, i32 %r, i32 255 %23 = icmp sgt i32 %22, 0 %24 = select i1 %23, i32 %22, i32 0 %25 = icmp slt i32 %g, 255 %26 = select i1 %25, i32 %g, i32 255 %27 = icmp sgt i32 %26, 0 %28 = icmp slt i32 %b, 255 %29 = select i1 %28, i32 %b, i32 255 %30 = icmp sgt i32 %29, 0 %.op = shl nsw i32 %26, 8 %31 = select i1 %27, i32 %.op, i32 0 %.op1 = shl i32 %29, 16 %32 = select i1 %30, i32 %.op1, i32 0 %33 = or i32 %24, %31 %34 = or i32 %33, %32 %35 = or i32 %34, -16777216 %36 = bitcast i32 %35 to <4 x i8> store <4 x i8> %36, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_YUYV (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: movb %dl, %cl 11: movl %eax, %edx 13: shrl %cl, %edx 15: movzbl %dl, %ecx 18: movl %eax, %edx 20: shrl $24, %eax 23: addl $128, %eax 26: imull $298, %ecx, %ecx 32: imull $4294967088, %eax, %esi 38: movzbl %dh, %edx 41: addl $128, %edx 44: imull $156, %edx, %r8d 48: imull $409, %eax, %eax 54: xorl %r9d, %r9d 57: movl $255, %r10d 63: addl %esi, %r8d 66: leal -4640(%rcx,%r8), %esi 74: sarl $8, %esi 77: cmpl $255, %esi 83: cmovgel %r10d, %esi 87: movl %esi, %r8d 90: shll $8, %r8d 94: testl %esi, %esi 96: leal -4640(%rax,%rcx), %eax 103: cmovlel %r9d, %r8d 107: sarl $8, %eax 110: cmpl $255, %eax 116: cmovgel %r10d, %eax 120: testl %eax, %eax 122: cmovlel %r9d, %eax 126: imull $516, %edx, %edx 132: leal -4640(%rdx,%rcx), %ecx 139: orl %r8d, %eax 142: sarl $8, %ecx 145: cmpl $255, %ecx 151: cmovgel %r10d, %ecx 155: movl %ecx, %edx 157: shll $16, %edx 160: testl %ecx, %ecx 162: cmovlel %r9d, %edx 166: orl %eax, %edx 168: orl $4278190080, %edx 174: movl %edx, (%rdi) 176: popq %rbp 177: ret define void @fetch_r64_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to double* %5 = load double* %4, align 8 %6 = fptrunc double %5 to float %7 = insertelement <4 x float> undef, float %6, i32 0 %8 = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32> zeroinitializer %9 = shufflevector <4 x float> %8, <4 x float> , <4 x i32> store <4 x float> %9, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_YUYV (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movsd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm0 12: pshufd $0, %xmm0, %xmm0 17: movabsq $139845047042064, %rax 27: movaps (%rax), %xmm1 30: movlhps %xmm1, %xmm0 33: shufps $72, %xmm1, %xmm0 37: movaps %xmm0, (%rdi) 40: popq %rbp 41: ret define void @fetch_r64_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to double* %5 = load double* %4, align 8 %6 = fptrunc double %5 to float %7 = insertelement <4 x float> undef, float %6, i32 0 %8 = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32> zeroinitializer %9 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %8, <4 x float> zeroinitializer) %10 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %9, <4 x float> ) %11 = fmul <4 x float> %10, %12 = fadd <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = and <4 x i32> %13, %15 = extractelement <4 x i32> %14, i32 0 %16 = extractelement <4 x i32> %14, i32 1 %17 = extractelement <4 x i32> %14, i32 2 %18 = extractelement <4 x i32> %14, i32 3 %19 = bitcast i32 %15 to <2 x i16> %20 = bitcast i32 %16 to <2 x i16> %21 = shufflevector <2 x i16> %19, <2 x i16> %20, <2 x i32> %22 = bitcast i32 %17 to <2 x i16> %23 = bitcast i32 %18 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast <2 x i16> %21 to <4 x i8> %26 = bitcast <2 x i16> %24 to <4 x i8> %27 = shufflevector <4 x i8> %25, <4 x i8> %26, <4 x i32> %28 = bitcast <4 x i8> %27 to i32 %29 = and i32 %28, 255 %30 = or i32 %29, bitcast (<4 x i8> to i32) %31 = bitcast i32 %30 to <4 x i8> store <4 x i8> %31, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movsd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm0 12: pshufd $0, %xmm0, %xmm0 17: xorps %xmm1, %xmm1 20: maxps %xmm1, %xmm0 23: movabsq $139845047042064, %rax 33: minps (%rax), %xmm0 36: movabsq $139845047042080, %rax 46: movabsq $139845047042096, %rcx 56: movabsq $139845047042112, %rdx 66: movabsq $139845047042128, %rsi 76: movabsq $139845047042144, %r8 86: movabsq $139845047042160, %r9 96: mulps (%r9), %xmm0 100: addps (%r8), %xmm0 104: andps (%rsi), %xmm0 107: pshufd $1, %xmm0, %xmm1 112: pshufd $3, %xmm0, %xmm2 117: movaps %xmm0, %xmm3 120: movhlps %xmm3, %xmm3 123: punpcklwd %xmm2, %xmm3 127: pshufb (%rdx), %xmm3 132: punpcklwd %xmm1, %xmm0 136: pshufb (%rcx), %xmm0 141: por %xmm3, %xmm0 145: movd %xmm0, %ecx 149: movzbl %cl, %ecx 152: orl (%rax), %ecx 154: movl %ecx, (%rdi) 156: popq %rbp 157: ret define void @fetch_r64g64_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x double>* %5 = load <2 x double>* %4, align 8 %6 = fptrunc <2 x double> %5 to <2 x float> %7 = shufflevector <2 x float> %6, <2 x float> undef, <4 x i32> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movupd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm1 12: unpckhpd %xmm0, %xmm0 16: cvtsd2ss %xmm0, %xmm0 20: insertps $16, %xmm0, %xmm1 26: movabsq $139845047042064, %rax 36: movaps (%rax), %xmm0 39: movlhps %xmm0, %xmm1 42: movaps %xmm1, (%rdi) 45: popq %rbp 46: ret define void @fetch_r64g64_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x double>* %5 = load <2 x double>* %4, align 8 %6 = fptrunc <2 x double> %5 to <2 x float> %7 = shufflevector <2 x float> %6, <2 x float> undef, <4 x i32> %8 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %7, <4 x float> zeroinitializer) %9 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %8, <4 x float> ) %10 = fmul <4 x float> %9, %11 = fadd <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = and <4 x i32> %12, %14 = extractelement <4 x i32> %13, i32 0 %15 = extractelement <4 x i32> %13, i32 1 %16 = extractelement <4 x i32> %13, i32 2 %17 = extractelement <4 x i32> %13, i32 3 %18 = bitcast i32 %14 to <2 x i16> %19 = bitcast i32 %15 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast i32 %16 to <2 x i16> %22 = bitcast i32 %17 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast <2 x i16> %20 to <4 x i8> %25 = bitcast <2 x i16> %23 to <4 x i8> %26 = shufflevector <4 x i8> %24, <4 x i8> %25, <4 x i32> %27 = bitcast <4 x i8> %26 to i32 %28 = and i32 %27, 65535 %29 = or i32 %28, bitcast (<4 x i8> to i32) %30 = bitcast i32 %29 to <4 x i8> store <4 x i8> %30, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movupd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm1 12: unpckhpd %xmm0, %xmm0 16: cvtsd2ss %xmm0, %xmm0 20: insertps $16, %xmm0, %xmm1 26: movabsq $139845047042064, %rax 36: movabsq $139845047042080, %rcx 46: movabsq $139845047042096, %rdx 56: movabsq $139845047042112, %rsi 66: movabsq $139845047042128, %r8 76: movabsq $139845047042144, %r9 86: movabsq $139845047042160, %r10 96: xorps %xmm0, %xmm0 99: maxps %xmm0, %xmm1 102: minps (%r10), %xmm1 106: mulps (%r9), %xmm1 110: addps (%r8), %xmm1 114: andps (%rsi), %xmm1 117: pshufd $1, %xmm1, %xmm0 122: pshufd $3, %xmm1, %xmm2 127: movaps %xmm1, %xmm3 130: movhlps %xmm3, %xmm3 133: punpcklwd %xmm2, %xmm3 137: pshufb (%rdx), %xmm3 142: punpcklwd %xmm0, %xmm1 146: pshufb (%rcx), %xmm1 151: por %xmm3, %xmm1 155: movd %xmm1, %ecx 159: movzwl %cx, %ecx 162: orl (%rax), %ecx 164: movl %ecx, (%rdi) 166: popq %rbp 167: ret define void @fetch_r64g64b64_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x double>* %5 = load <3 x double>* %4, align 8 %6 = fptrunc <3 x double> %5 to <3 x float> %7 = shufflevector <3 x float> %6, <3 x float> undef, <4 x i32> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movupd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm1 12: unpckhpd %xmm0, %xmm0 16: cvtsd2ss %xmm0, %xmm0 20: insertps $16, %xmm0, %xmm1 26: movsd 16(%rsi), %xmm0 31: cvtsd2ss %xmm0, %xmm0 35: movabsq $139845047042064, %rax 45: movaps (%rax), %xmm2 48: insertps $32, %xmm0, %xmm1 54: insertps $48, %xmm0, %xmm1 60: shufps $33, %xmm1, %xmm2 64: shufps $36, %xmm2, %xmm1 68: movaps %xmm1, (%rdi) 71: popq %rbp 72: ret define void @fetch_r64g64b64_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x double>* %5 = load <3 x double>* %4, align 8 %6 = fptrunc <3 x double> %5 to <3 x float> %7 = shufflevector <3 x float> %6, <3 x float> undef, <4 x i32> %8 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %7, <4 x float> zeroinitializer) %9 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %8, <4 x float> ) %10 = fmul <4 x float> %9, %11 = fadd <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = and <4 x i32> %12, %14 = extractelement <4 x i32> %13, i32 0 %15 = extractelement <4 x i32> %13, i32 1 %16 = extractelement <4 x i32> %13, i32 2 %17 = extractelement <4 x i32> %13, i32 3 %18 = bitcast i32 %14 to <2 x i16> %19 = bitcast i32 %15 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast i32 %16 to <2 x i16> %22 = bitcast i32 %17 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast <2 x i16> %20 to <4 x i8> %25 = bitcast <2 x i16> %23 to <4 x i8> %26 = shufflevector <4 x i8> %24, <4 x i8> %25, <4 x i32> %27 = bitcast <4 x i8> %26 to i32 %28 = and i32 %27, 16777215 %29 = or i32 %28, bitcast (<4 x i8> to i32) %30 = bitcast i32 %29 to <4 x i8> store <4 x i8> %30, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movupd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm1 12: unpckhpd %xmm0, %xmm0 16: cvtsd2ss %xmm0, %xmm0 20: movsd 16(%rsi), %xmm2 25: cvtsd2ss %xmm2, %xmm2 29: insertps $16, %xmm0, %xmm1 35: movabsq $139845047042064, %rax 45: movabsq $139845047042080, %rcx 55: movabsq $139845047042096, %rdx 65: movabsq $139845047042112, %rsi 75: movabsq $139845047042128, %r8 85: movabsq $139845047042144, %r9 95: movabsq $139845047042160, %r10 105: xorps %xmm0, %xmm0 108: insertps $32, %xmm2, %xmm1 114: insertps $48, %xmm2, %xmm1 120: maxps %xmm0, %xmm1 123: minps (%r10), %xmm1 127: mulps (%r9), %xmm1 131: addps (%r8), %xmm1 135: andps (%rsi), %xmm1 138: pshufd $1, %xmm1, %xmm0 143: pshufd $3, %xmm1, %xmm2 148: movaps %xmm1, %xmm3 151: movhlps %xmm3, %xmm3 154: punpcklwd %xmm2, %xmm3 158: pshufb (%rdx), %xmm3 163: punpcklwd %xmm0, %xmm1 167: pshufb (%rcx), %xmm1 172: por %xmm3, %xmm1 176: movd %xmm1, %ecx 180: andl $16777215, %ecx 186: orl (%rax), %ecx 188: movl %ecx, (%rdi) 190: popq %rbp 191: ret define void @fetch_r64g64b64a64_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x double>* %5 = load <4 x double>* %4, align 8 %6 = fptrunc <4 x double> %5 to <4 x float> store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movupd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm1 12: unpckhpd %xmm0, %xmm0 16: cvtsd2ss %xmm0, %xmm0 20: insertps $16, %xmm0, %xmm1 26: movupd 16(%rsi), %xmm0 31: cvtsd2ss %xmm0, %xmm2 35: insertps $32, %xmm2, %xmm1 41: unpckhpd %xmm0, %xmm0 45: cvtsd2ss %xmm0, %xmm0 49: insertps $48, %xmm0, %xmm1 55: movdqa %xmm1, (%rdi) 59: popq %rbp 60: ret define void @fetch_r64g64b64a64_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x double>* %5 = load <4 x double>* %4, align 8 %6 = fptrunc <4 x double> %5 to <4 x float> %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movupd (%rsi), %xmm0 8: cvtsd2ss %xmm0, %xmm1 12: unpckhpd %xmm0, %xmm0 16: cvtsd2ss %xmm0, %xmm0 20: movupd 16(%rsi), %xmm2 25: cvtsd2ss %xmm2, %xmm3 29: insertps $16, %xmm0, %xmm1 35: movabsq $139845047042064, %rax 45: movabsq $139845047042080, %rcx 55: movabsq $139845047042096, %rdx 65: movabsq $139845047042112, %rsi 75: movabsq $139845047042128, %r8 85: movabsq $139845047042144, %r9 95: xorps %xmm0, %xmm0 98: insertps $32, %xmm3, %xmm1 104: unpckhpd %xmm2, %xmm2 108: cvtsd2ss %xmm2, %xmm2 112: insertps $48, %xmm2, %xmm1 118: maxps %xmm0, %xmm1 121: minps (%r9), %xmm1 125: mulps (%r8), %xmm1 129: addps (%rsi), %xmm1 132: andps (%rdx), %xmm1 135: pshufd $1, %xmm1, %xmm0 140: pshufd $3, %xmm1, %xmm2 145: movaps %xmm1, %xmm3 148: movhlps %xmm3, %xmm3 151: punpcklwd %xmm2, %xmm3 155: pshufb (%rcx), %xmm3 160: punpcklwd %xmm0, %xmm1 164: pshufb (%rax), %xmm1 169: por %xmm3, %xmm1 173: movd %xmm1, (%rdi) 177: popq %rbp 178: ret define void @fetch_r32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = zext i32 %5 to i128 %7 = bitcast i128 %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movl (%rsi), %eax 19: movd %rax, %xmm1 24: movlhps %xmm0, %xmm1 27: shufps $72, %xmm0, %xmm1 31: movaps %xmm1, (%rdi) 34: popq %rbp 35: ret define void @fetch_r32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to float* %5 = load float* %4, align 4 %6 = insertelement <4 x float> undef, float %5, i32 0 %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer %8 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %7, <4 x float> zeroinitializer) %9 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %8, <4 x float> ) %10 = fmul <4 x float> %9, %11 = fadd <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = and <4 x i32> %12, %14 = extractelement <4 x i32> %13, i32 0 %15 = extractelement <4 x i32> %13, i32 1 %16 = extractelement <4 x i32> %13, i32 2 %17 = extractelement <4 x i32> %13, i32 3 %18 = bitcast i32 %14 to <2 x i16> %19 = bitcast i32 %15 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast i32 %16 to <2 x i16> %22 = bitcast i32 %17 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast <2 x i16> %20 to <4 x i8> %25 = bitcast <2 x i16> %23 to <4 x i8> %26 = shufflevector <4 x i8> %24, <4 x i8> %25, <4 x i32> %27 = bitcast <4 x i8> %26 to i32 %28 = and i32 %27, 255 %29 = or i32 %28, bitcast (<4 x i8> to i32) %30 = bitcast i32 %29 to <4 x i8> store <4 x i8> %30, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movss (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: xorps %xmm1, %xmm1 16: maxps %xmm1, %xmm0 19: movabsq $139845047042064, %rax 29: minps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: mulps (%rax), %xmm0 45: movabsq $139845047042096, %rax 55: movabsq $139845047042112, %rcx 65: movabsq $139845047042128, %rdx 75: movabsq $139845047042144, %rsi 85: movabsq $139845047042160, %r8 95: addps (%r8), %xmm0 99: andps (%rsi), %xmm0 102: pshufd $1, %xmm0, %xmm1 107: pshufd $3, %xmm0, %xmm2 112: movaps %xmm0, %xmm3 115: movhlps %xmm3, %xmm3 118: punpcklwd %xmm2, %xmm3 122: pshufb (%rdx), %xmm3 127: punpcklwd %xmm1, %xmm0 131: pshufb (%rcx), %xmm0 136: por %xmm3, %xmm0 140: movd %xmm0, %ecx 144: movzbl %cl, %ecx 147: orl (%rax), %ecx 149: movl %ecx, (%rdi) 151: popq %rbp 152: ret define void @fetch_r32g32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i64* %5 = load i64* %4, align 8 %6 = zext i64 %5 to i128 %7 = bitcast i128 %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movq (%rsi), %xmm1 21: movlhps %xmm0, %xmm1 24: movaps %xmm1, (%rdi) 27: popq %rbp 28: ret define void @fetch_r32g32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x float>* %5 = load <2 x float>* %4, align 4 %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> %26 = bitcast <4 x i8> %25 to i32 %27 = and i32 %26, 65535 %28 = or i32 %27, bitcast (<4 x i8> to i32) %29 = bitcast i32 %28 to <4 x i8> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: xorps %xmm1, %xmm1 11: maxps %xmm1, %xmm0 14: movabsq $139845047042064, %rax 24: minps (%rax), %xmm0 27: movabsq $139845047042080, %rax 37: mulps (%rax), %xmm0 40: movabsq $139845047042096, %rax 50: addps (%rax), %xmm0 53: movabsq $139845047042112, %rax 63: andps (%rax), %xmm0 66: movabsq $139845047042128, %rax 76: movabsq $139845047042144, %rcx 86: movabsq $139845047042160, %rdx 96: pshufd $1, %xmm0, %xmm1 101: pshufd $3, %xmm0, %xmm2 106: movaps %xmm0, %xmm3 109: movhlps %xmm3, %xmm3 112: punpcklwd %xmm2, %xmm3 116: pshufb (%rdx), %xmm3 121: punpcklwd %xmm1, %xmm0 125: pshufb (%rcx), %xmm0 130: por %xmm3, %xmm0 134: movd %xmm0, %ecx 138: movzwl %cx, %ecx 141: orl (%rax), %ecx 143: movl %ecx, (%rdi) 145: popq %rbp 146: ret define void @fetch_r32g32b32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x float>* %5 = load <3 x float>* %4, align 4 %6 = shufflevector <3 x float> %5, <3 x float> undef, <4 x i32> %7 = shufflevector <4 x float> %6, <4 x float> , <4 x i32> store <4 x float> %7, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pshufd $1, %xmm0, %xmm1 13: insertps $16, %xmm1, %xmm0 19: insertps $32, 8(%rsi), %xmm0 26: movabsq $139845047042064, %rax 36: movaps (%rax), %xmm1 39: shufps $33, %xmm0, %xmm1 43: shufps $36, %xmm1, %xmm0 47: movaps %xmm0, (%rdi) 50: popq %rbp 51: ret define void @fetch_r32g32b32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x float>* %5 = load <3 x float>* %4, align 4 %6 = shufflevector <3 x float> %5, <3 x float> undef, <4 x i32> %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> %26 = bitcast <4 x i8> %25 to i32 %27 = and i32 %26, 16777215 %28 = or i32 %27, bitcast (<4 x i8> to i32) %29 = bitcast i32 %28 to <4 x i8> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pshufd $1, %xmm0, %xmm1 13: insertps $16, %xmm1, %xmm0 19: insertps $32, 8(%rsi), %xmm0 26: xorps %xmm1, %xmm1 29: maxps %xmm1, %xmm0 32: movabsq $139845047042064, %rax 42: minps (%rax), %xmm0 45: movabsq $139845047042080, %rax 55: movabsq $139845047042096, %rcx 65: movabsq $139845047042112, %rdx 75: movabsq $139845047042128, %rsi 85: movabsq $139845047042144, %r8 95: movabsq $139845047042160, %r9 105: mulps (%r9), %xmm0 109: addps (%r8), %xmm0 113: andps (%rsi), %xmm0 116: pshufd $1, %xmm0, %xmm1 121: pshufd $3, %xmm0, %xmm2 126: movaps %xmm0, %xmm3 129: movhlps %xmm3, %xmm3 132: punpcklwd %xmm2, %xmm3 136: pshufb (%rdx), %xmm3 141: punpcklwd %xmm1, %xmm0 145: pshufb (%rcx), %xmm0 150: por %xmm3, %xmm0 154: movd %xmm0, %ecx 158: andl $16777215, %ecx 164: orl (%rax), %ecx 166: movl %ecx, (%rdi) 168: popq %rbp 169: ret define void @fetch_r32g32b32a32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i128* %5 = load i128* %4, align 8 %6 = bitcast i128 %5 to <4 x float> store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %rax 7: movq 8(%rsi), %rcx 11: movq %rcx, 8(%rdi) 15: movq %rax, (%rdi) 18: popq %rbp 19: ret define void @fetch_r32g32b32a32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x float>* %5 = load <4 x float>* %4, align 4 %6 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %5, <4 x float> zeroinitializer) %7 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %6, <4 x float> ) %8 = fmul <4 x float> %7, %9 = fadd <4 x float> %8, %10 = bitcast <4 x float> %9 to <4 x i32> %11 = and <4 x i32> %10, %12 = extractelement <4 x i32> %11, i32 0 %13 = extractelement <4 x i32> %11, i32 1 %14 = extractelement <4 x i32> %11, i32 2 %15 = extractelement <4 x i32> %11, i32 3 %16 = bitcast i32 %12 to <2 x i16> %17 = bitcast i32 %13 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast i32 %14 to <2 x i16> %20 = bitcast i32 %15 to <2 x i16> %21 = shufflevector <2 x i16> %19, <2 x i16> %20, <2 x i32> %22 = bitcast <2 x i16> %18 to <4 x i8> %23 = bitcast <2 x i16> %21 to <4 x i8> %24 = shufflevector <4 x i8> %22, <4 x i8> %23, <4 x i32> store <4 x i8> %24, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32A32_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movups (%rsi), %xmm0 7: xorps %xmm1, %xmm1 10: maxps %xmm1, %xmm0 13: movabsq $139845047042064, %rax 23: minps (%rax), %xmm0 26: movabsq $139845047042080, %rax 36: mulps (%rax), %xmm0 39: movabsq $139845047042096, %rax 49: addps (%rax), %xmm0 52: movabsq $139845047042112, %rax 62: andps (%rax), %xmm0 65: movabsq $139845047042128, %rax 75: movabsq $139845047042144, %rcx 85: pshufd $1, %xmm0, %xmm1 90: pshufd $3, %xmm0, %xmm2 95: movaps %xmm0, %xmm3 98: movhlps %xmm3, %xmm3 101: punpcklwd %xmm2, %xmm3 105: pshufb (%rcx), %xmm3 110: punpcklwd %xmm1, %xmm0 114: pshufb (%rax), %xmm0 119: por %xmm3, %xmm0 123: movd %xmm0, (%rdi) 127: popq %rbp 128: ret define void @fetch_r32_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = uitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32A32_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: psrld $0, %xmm0 18: xorps %xmm1, %xmm1 21: movss %xmm0, %xmm1 25: movaps %xmm1, -16(%rbp) 29: movabsq $139845047042064, %rax 39: movss (%rax), %xmm0 43: movabsq $139845047042080, %rax 53: movaps (%rax), %xmm1 56: movl -12(%rbp), %eax 59: movl -8(%rbp), %ecx 62: cvtsi2ssq %rax, %xmm2 67: movl -16(%rbp), %eax 70: cvtsi2ssq %rax, %xmm3 75: insertps $16, %xmm2, %xmm3 81: cvtsi2ssq %rcx, %xmm2 86: insertps $32, %xmm2, %xmm3 92: movl -4(%rbp), %eax 95: cvtsi2ssq %rax, %xmm2 100: insertps $48, %xmm2, %xmm3 106: mulps %xmm0, %xmm3 109: movlhps %xmm1, %xmm3 112: shufps $72, %xmm1, %xmm3 116: movaps %xmm3, (%rdi) 119: popq %rbp 120: ret define void @fetch_r32_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = uitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 255 %33 = or i32 %32, bitcast (<4 x i8> to i32) %34 = bitcast i32 %33 to <4 x i8> store <4 x i8> %34, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: psrld $0, %xmm0 19: xorps %xmm1, %xmm1 22: xorps %xmm2, %xmm2 25: movss %xmm0, %xmm2 29: movabsq $139845047042064, %rax 39: movss (%rax), %xmm0 43: movaps %xmm2, -32(%rbp) 47: movabsq $139845047042080, %rax 57: movabsq $139845047042096, %rcx 67: movabsq $139845047042112, %rdx 77: movabsq $139845047042128, %rsi 87: movabsq $139845047042144, %r8 97: movabsq $139845047042160, %r9 107: movabsq $139845047042176, %r10 117: movl -28(%rbp), %r11d 121: movl -24(%rbp), %ebx 124: cvtsi2ssq %r11, %xmm2 129: movl -32(%rbp), %r11d 133: cvtsi2ssq %r11, %xmm3 138: insertps $16, %xmm2, %xmm3 144: cvtsi2ssq %rbx, %xmm2 149: insertps $32, %xmm2, %xmm3 155: movl -20(%rbp), %r11d 159: cvtsi2ssq %r11, %xmm2 164: insertps $48, %xmm2, %xmm3 170: mulps %xmm0, %xmm3 173: maxps %xmm1, %xmm3 176: minps (%r10), %xmm3 180: mulps (%r9), %xmm3 184: addps (%r8), %xmm3 188: andps (%rsi), %xmm3 191: pshufd $1, %xmm3, %xmm0 196: pshufd $3, %xmm3, %xmm1 201: movaps %xmm3, %xmm2 204: movhlps %xmm2, %xmm2 207: punpcklwd %xmm1, %xmm2 211: pshufb (%rdx), %xmm2 216: punpcklwd %xmm0, %xmm3 220: pshufb (%rcx), %xmm3 225: por %xmm2, %xmm3 229: movd %xmm3, %ecx 233: movzbl %cl, %ecx 236: orl (%rax), %ecx 238: movl %ecx, (%rdi) 240: popq %rbx 241: popq %rbp 242: ret define void @fetch_r32g32_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = lshr <4 x i32> %6, %8 = or <4 x i32> %7, %9 = bitcast <4 x i32> %8 to <4 x float> %10 = fadd <4 x float> %9, %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: psrld $9, %xmm0 13: movabsq $139845047042064, %rax 23: por (%rax), %xmm0 27: movabsq $139845047042080, %rax 37: addps (%rax), %xmm0 40: movabsq $139845047042096, %rax 50: mulps (%rax), %xmm0 53: movabsq $139845047042112, %rax 63: movaps (%rax), %xmm1 66: movlhps %xmm1, %xmm0 69: movaps %xmm0, (%rdi) 72: popq %rbp 73: ret define void @fetch_r32g32_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = lshr <4 x i32> %6, %8 = extractelement <4 x i32> %7, i32 0 %9 = extractelement <4 x i32> %7, i32 1 %10 = extractelement <4 x i32> %7, i32 2 %11 = extractelement <4 x i32> %7, i32 3 %12 = bitcast i32 %8 to <2 x i16> %13 = bitcast i32 %9 to <2 x i16> %14 = shufflevector <2 x i16> %12, <2 x i16> %13, <2 x i32> %15 = bitcast i32 %10 to <2 x i16> %16 = bitcast i32 %11 to <2 x i16> %17 = shufflevector <2 x i16> %15, <2 x i16> %16, <2 x i32> %18 = bitcast <2 x i16> %14 to <4 x i8> %19 = bitcast <2 x i16> %17 to <4 x i8> %20 = shufflevector <4 x i8> %18, <4 x i8> %19, <4 x i32> %21 = bitcast <4 x i8> %20 to i32 %22 = and i32 %21, 65535 %23 = or i32 %22, bitcast (<4 x i8> to i32) %24 = bitcast i32 %23 to <4 x i8> store <4 x i8> %24, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: psrld $24, %xmm0 13: pshufd $1, %xmm0, %xmm1 18: pshufd $3, %xmm0, %xmm2 23: movdqa %xmm0, %xmm3 27: movhlps %xmm3, %xmm3 30: punpcklwd %xmm2, %xmm3 34: movabsq $139845047042064, %rax 44: pshufb (%rax), %xmm3 49: punpcklwd %xmm1, %xmm0 53: movabsq $139845047042080, %rax 63: pshufb (%rax), %xmm0 68: por %xmm3, %xmm0 72: movabsq $139845047042096, %rax 82: movd %xmm0, %ecx 86: movzwl %cx, %ecx 89: orl (%rax), %ecx 91: movl %ecx, (%rdi) 93: popq %rbp 94: ret define void @fetch_r32g32b32_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = lshr <4 x i32> %6, %8 = or <4 x i32> %7, %9 = bitcast <4 x i32> %8 to <4 x float> %10 = fadd <4 x float> %9, %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: psrld $9, %xmm0 32: movabsq $139845047042064, %rax 42: por (%rax), %xmm0 46: movabsq $139845047042080, %rax 56: movaps (%rax), %xmm1 59: movabsq $139845047042096, %rax 69: movabsq $139845047042112, %rcx 79: addps (%rcx), %xmm0 82: mulps (%rax), %xmm0 85: shufps $33, %xmm0, %xmm1 89: shufps $36, %xmm1, %xmm0 93: movaps %xmm0, (%rdi) 96: popq %rbp 97: ret define void @fetch_r32g32b32_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = lshr <4 x i32> %6, %8 = extractelement <4 x i32> %7, i32 0 %9 = extractelement <4 x i32> %7, i32 1 %10 = extractelement <4 x i32> %7, i32 2 %11 = extractelement <4 x i32> %7, i32 3 %12 = bitcast i32 %8 to <2 x i16> %13 = bitcast i32 %9 to <2 x i16> %14 = shufflevector <2 x i16> %12, <2 x i16> %13, <2 x i32> %15 = bitcast i32 %10 to <2 x i16> %16 = bitcast i32 %11 to <2 x i16> %17 = shufflevector <2 x i16> %15, <2 x i16> %16, <2 x i32> %18 = bitcast <2 x i16> %14 to <4 x i8> %19 = bitcast <2 x i16> %17 to <4 x i8> %20 = shufflevector <4 x i8> %18, <4 x i8> %19, <4 x i32> %21 = bitcast <4 x i8> %20 to i32 %22 = and i32 %21, 16777215 %23 = or i32 %22, bitcast (<4 x i8> to i32) %24 = bitcast i32 %23 to <4 x i8> store <4 x i8> %24, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: psrld $24, %xmm0 32: pshufd $1, %xmm0, %xmm1 37: pshufd $3, %xmm0, %xmm2 42: movdqa %xmm0, %xmm3 46: movhlps %xmm3, %xmm3 49: movabsq $139845047042064, %rax 59: movabsq $139845047042080, %rcx 69: movabsq $139845047042096, %rdx 79: punpcklwd %xmm2, %xmm3 83: pshufb (%rdx), %xmm3 88: punpcklwd %xmm1, %xmm0 92: pshufb (%rcx), %xmm0 97: por %xmm3, %xmm0 101: movd %xmm0, %ecx 105: andl $16777215, %ecx 111: orl (%rax), %ecx 113: movl %ecx, (%rdi) 115: popq %rbp 116: ret define void @fetch_r32g32b32a32_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = lshr <4 x i32> %5, %7 = or <4 x i32> %6, %8 = bitcast <4 x i32> %7 to <4 x float> %9 = fadd <4 x float> %8, %10 = fmul <4 x float> %9, store <4 x float> %10, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movdqu (%rsi), %xmm0 8: psrld $9, %xmm0 13: movabsq $139845047042064, %rax 23: por (%rax), %xmm0 27: movabsq $139845047042080, %rax 37: addps (%rax), %xmm0 40: movabsq $139845047042096, %rax 50: mulps (%rax), %xmm0 53: movaps %xmm0, (%rdi) 56: popq %rbp 57: ret define void @fetch_r32g32b32a32_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = lshr <4 x i32> %5, %7 = extractelement <4 x i32> %6, i32 0 %8 = extractelement <4 x i32> %6, i32 1 %9 = extractelement <4 x i32> %6, i32 2 %10 = extractelement <4 x i32> %6, i32 3 %11 = bitcast i32 %7 to <2 x i16> %12 = bitcast i32 %8 to <2 x i16> %13 = shufflevector <2 x i16> %11, <2 x i16> %12, <2 x i32> %14 = bitcast i32 %9 to <2 x i16> %15 = bitcast i32 %10 to <2 x i16> %16 = shufflevector <2 x i16> %14, <2 x i16> %15, <2 x i32> %17 = bitcast <2 x i16> %13 to <4 x i8> %18 = bitcast <2 x i16> %16 to <4 x i8> %19 = shufflevector <4 x i8> %17, <4 x i8> %18, <4 x i32> store <4 x i8> %19, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32A32_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movdqu (%rsi), %xmm0 8: psrld $24, %xmm0 13: pshufd $1, %xmm0, %xmm1 18: pshufd $3, %xmm0, %xmm2 23: movdqa %xmm0, %xmm3 27: movhlps %xmm3, %xmm3 30: punpcklwd %xmm2, %xmm3 34: movabsq $139845047042064, %rax 44: pshufb (%rax), %xmm3 49: punpcklwd %xmm1, %xmm0 53: movabsq $139845047042080, %rax 63: pshufb (%rax), %xmm0 68: por %xmm3, %xmm0 72: movd %xmm0, (%rdi) 76: popq %rbp 77: ret define void @fetch_r32_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = uitofp <4 x i32> %9 to <4 x float> %11 = shufflevector <4 x float> %10, <4 x float> , <4 x i32> store <4 x float> %11, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32A32_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: psrld $0, %xmm0 18: xorps %xmm1, %xmm1 21: movss %xmm0, %xmm1 25: movaps %xmm1, -16(%rbp) 29: movabsq $139845047042064, %rax 39: movaps (%rax), %xmm0 42: movl -12(%rbp), %eax 45: movl -8(%rbp), %ecx 48: cvtsi2ssq %rax, %xmm1 53: movl -16(%rbp), %eax 56: cvtsi2ssq %rax, %xmm2 61: insertps $16, %xmm1, %xmm2 67: cvtsi2ssq %rcx, %xmm1 72: insertps $32, %xmm1, %xmm2 78: movl -4(%rbp), %eax 81: cvtsi2ssq %rax, %xmm1 86: insertps $48, %xmm1, %xmm2 92: movlhps %xmm0, %xmm2 95: shufps $72, %xmm0, %xmm2 99: movaps %xmm2, (%rdi) 102: popq %rbp 103: ret define void @fetch_r32_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = uitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> %30 = bitcast <4 x i8> %29 to i32 %31 = and i32 %30, 255 %32 = or i32 %31, bitcast (<4 x i8> to i32) %33 = bitcast i32 %32 to <4 x i8> store <4 x i8> %33, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: psrld $0, %xmm0 19: xorps %xmm1, %xmm1 22: xorps %xmm2, %xmm2 25: movss %xmm0, %xmm2 29: movaps %xmm2, -32(%rbp) 33: movabsq $139845047042064, %rax 43: movabsq $139845047042080, %rcx 53: movabsq $139845047042096, %rdx 63: movabsq $139845047042112, %rsi 73: movabsq $139845047042128, %r8 83: movabsq $139845047042144, %r9 93: movabsq $139845047042160, %r10 103: movl -28(%rbp), %r11d 107: movl -24(%rbp), %ebx 110: cvtsi2ssq %r11, %xmm0 115: movl -32(%rbp), %r11d 119: cvtsi2ssq %r11, %xmm2 124: insertps $16, %xmm0, %xmm2 130: cvtsi2ssq %rbx, %xmm0 135: insertps $32, %xmm0, %xmm2 141: movl -20(%rbp), %r11d 145: cvtsi2ssq %r11, %xmm0 150: insertps $48, %xmm0, %xmm2 156: maxps %xmm1, %xmm2 159: minps (%r10), %xmm2 163: mulps (%r9), %xmm2 167: addps (%r8), %xmm2 171: andps (%rsi), %xmm2 174: pshufd $1, %xmm2, %xmm0 179: pshufd $3, %xmm2, %xmm1 184: movaps %xmm2, %xmm3 187: movhlps %xmm3, %xmm3 190: punpcklwd %xmm1, %xmm3 194: pshufb (%rdx), %xmm3 199: punpcklwd %xmm0, %xmm2 203: pshufb (%rcx), %xmm2 208: por %xmm3, %xmm2 212: movd %xmm2, %ecx 216: movzbl %cl, %ecx 219: orl (%rax), %ecx 221: movl %ecx, (%rdi) 223: popq %rbx 224: popq %rbp 225: ret define void @fetch_r32g32_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movq (%rsi), %xmm1 21: cvtdq2ps %xmm1, %xmm1 24: movlhps %xmm0, %xmm1 27: movaps %xmm1, (%rdi) 30: popq %rbp 31: ret define void @fetch_r32g32_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %6, <4 x i32> ) %8 = lshr <4 x i32> %7, %9 = sub <4 x i32> %7, %8 %10 = extractelement <4 x i32> %9, i32 0 %11 = extractelement <4 x i32> %9, i32 1 %12 = extractelement <4 x i32> %9, i32 2 %13 = extractelement <4 x i32> %9, i32 3 %14 = bitcast i32 %10 to <2 x i16> %15 = bitcast i32 %11 to <2 x i16> %16 = shufflevector <2 x i16> %14, <2 x i16> %15, <2 x i32> %17 = bitcast i32 %12 to <2 x i16> %18 = bitcast i32 %13 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast <2 x i16> %16 to <4 x i8> %21 = bitcast <2 x i16> %19 to <4 x i8> %22 = shufflevector <4 x i8> %20, <4 x i8> %21, <4 x i32> %23 = shl <4 x i8> %22, %24 = sub <4 x i8> %23, %22 %25 = bitcast <4 x i8> %24 to i32 %26 = and i32 %25, 65535 %27 = or i32 %26, bitcast (<4 x i8> to i32) %28 = bitcast i32 %27 to <4 x i8> store <4 x i8> %28, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movdqa (%rax), %xmm0 18: movq (%rsi), %xmm1 22: pminud %xmm0, %xmm1 27: movdqa %xmm1, %xmm0 31: psrld $255, %xmm0 36: psubd %xmm0, %xmm1 40: pshufd $1, %xmm1, %xmm2 45: pshufd $3, %xmm1, %xmm0 50: movdqa %xmm1, %xmm3 54: movhlps %xmm3, %xmm3 57: movabsq $139845047042080, %rax 67: punpcklwd %xmm0, %xmm3 71: movdqa (%rax), %xmm0 75: movabsq $139845047042096, %rax 85: psllw $5, %xmm0 90: movabsq $139845047042112, %rcx 100: movabsq $139845047042128, %rdx 110: movabsq $139845047042144, %rsi 120: pshufb (%rax), %xmm3 125: punpcklwd %xmm2, %xmm1 129: movabsq $139845047042160, %rax 139: pshufb (%rax), %xmm1 144: por %xmm3, %xmm1 148: movdqa (%rdx), %xmm2 152: pand %xmm1, %xmm2 156: psllw $4, %xmm2 161: movdqa %xmm1, %xmm3 165: pblendvb %xmm0, %xmm2, %xmm3 170: movdqa (%rcx), %xmm2 174: pand %xmm3, %xmm2 178: psllw $2, %xmm2 183: paddb %xmm0, %xmm0 187: pblendvb %xmm0, %xmm2, %xmm3 192: movdqa %xmm3, %xmm2 196: paddb %xmm2, %xmm2 200: paddb %xmm0, %xmm0 204: pblendvb %xmm0, %xmm2, %xmm3 209: psubb %xmm1, %xmm3 213: movd %xmm3, %eax 217: movzwl %ax, %eax 220: orl (%rsi), %eax 222: movl %eax, (%rdi) 224: popq %rbp 225: ret define void @fetch_r32g32b32_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32_USCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff ff 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: cvtdq2ps %xmm0, %xmm0 30: movabsq $139845047042064, %rax 40: movaps (%rax), %xmm1 43: shufps $33, %xmm0, %xmm1 47: shufps $36, %xmm1, %xmm0 51: movaps %xmm0, (%rdi) 54: popq %rbp 55: ret define void @fetch_r32g32b32_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %6, <4 x i32> ) %8 = lshr <4 x i32> %7, %9 = sub <4 x i32> %7, %8 %10 = extractelement <4 x i32> %9, i32 0 %11 = extractelement <4 x i32> %9, i32 1 %12 = extractelement <4 x i32> %9, i32 2 %13 = extractelement <4 x i32> %9, i32 3 %14 = bitcast i32 %10 to <2 x i16> %15 = bitcast i32 %11 to <2 x i16> %16 = shufflevector <2 x i16> %14, <2 x i16> %15, <2 x i32> %17 = bitcast i32 %12 to <2 x i16> %18 = bitcast i32 %13 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast <2 x i16> %16 to <4 x i8> %21 = bitcast <2 x i16> %19 to <4 x i8> %22 = shufflevector <4 x i8> %20, <4 x i8> %21, <4 x i32> %23 = shl <4 x i8> %22, %24 = sub <4 x i8> %23, %22 %25 = bitcast <4 x i8> %24 to i32 %26 = and i32 %25, 16777215 %27 = or i32 %26, bitcast (<4 x i8> to i32) %28 = bitcast i32 %27 to <4 x i8> store <4 x i8> %28, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: movabsq $139845047042064, %rax 37: movdqa (%rax), %xmm1 41: pminud %xmm0, %xmm1 46: movabsq $139845047042080, %rax 56: movdqa (%rax), %xmm2 60: movdqa %xmm1, %xmm0 64: psrld $255, %xmm0 69: psllw $5, %xmm2 74: movabsq $139845047042096, %rax 84: movabsq $139845047042112, %rcx 94: movabsq $139845047042128, %rdx 104: movabsq $139845047042144, %rsi 114: movabsq $139845047042160, %r8 124: psubd %xmm0, %xmm1 128: pshufd $1, %xmm1, %xmm0 133: pshufd $3, %xmm1, %xmm3 138: movdqa %xmm1, %xmm4 142: movhlps %xmm4, %xmm4 145: punpcklwd %xmm3, %xmm4 149: pshufb (%rcx), %xmm4 154: punpcklwd %xmm0, %xmm1 158: pshufb (%rax), %xmm1 163: por %xmm4, %xmm1 167: movdqa (%rsi), %xmm3 171: pand %xmm1, %xmm3 175: psllw $4, %xmm3 180: movdqa %xmm2, %xmm0 184: movdqa %xmm1, %xmm4 188: pblendvb %xmm0, %xmm3, %xmm4 193: movdqa (%rdx), %xmm3 197: pand %xmm4, %xmm3 201: psllw $2, %xmm3 206: movdqa %xmm2, %xmm0 210: paddb %xmm0, %xmm0 214: pblendvb %xmm0, %xmm3, %xmm4 219: movdqa %xmm4, %xmm2 223: paddb %xmm2, %xmm2 227: paddb %xmm0, %xmm0 231: pblendvb %xmm0, %xmm2, %xmm4 236: psubb %xmm1, %xmm4 240: movd %xmm4, %eax 244: andl $16777215, %eax 250: orl (%r8), %eax 253: movl %eax, (%rdi) 255: popq %rbp 256: ret define void @fetch_r32g32b32a32_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = sitofp <4 x i32> %5 to <4 x float> store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32_USCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 00 ff ff expected FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff ff ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movups (%rsi), %xmm0 7: cvtdq2ps %xmm0, %xmm0 10: movaps %xmm0, (%rdi) 13: popq %rbp 14: ret define void @fetch_r32g32b32a32_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %5, <4 x i32> ) %7 = lshr <4 x i32> %6, %8 = sub <4 x i32> %6, %7 %9 = extractelement <4 x i32> %8, i32 0 %10 = extractelement <4 x i32> %8, i32 1 %11 = extractelement <4 x i32> %8, i32 2 %12 = extractelement <4 x i32> %8, i32 3 %13 = bitcast i32 %9 to <2 x i16> %14 = bitcast i32 %10 to <2 x i16> %15 = shufflevector <2 x i16> %13, <2 x i16> %14, <2 x i32> %16 = bitcast i32 %11 to <2 x i16> %17 = bitcast i32 %12 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast <2 x i16> %15 to <4 x i8> %20 = bitcast <2 x i16> %18 to <4 x i8> %21 = shufflevector <4 x i8> %19, <4 x i8> %20, <4 x i32> %22 = shl <4 x i8> %21, %23 = sub <4 x i8> %22, %21 store <4 x i8> %23, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32A32_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movdqa (%rax), %xmm0 18: movdqu (%rsi), %xmm1 22: pminud %xmm0, %xmm1 27: movdqa %xmm1, %xmm0 31: psrld $255, %xmm0 36: psubd %xmm0, %xmm1 40: pshufd $1, %xmm1, %xmm2 45: pshufd $3, %xmm1, %xmm0 50: movdqa %xmm1, %xmm3 54: movhlps %xmm3, %xmm3 57: punpcklwd %xmm0, %xmm3 61: movabsq $139845047042080, %rax 71: movdqa (%rax), %xmm0 75: movabsq $139845047042096, %rax 85: psllw $5, %xmm0 90: movabsq $139845047042112, %rcx 100: movabsq $139845047042128, %rdx 110: pshufb (%rax), %xmm3 115: punpcklwd %xmm2, %xmm1 119: movabsq $139845047042144, %rax 129: pshufb (%rax), %xmm1 134: por %xmm3, %xmm1 138: movdqa (%rdx), %xmm2 142: pand %xmm1, %xmm2 146: psllw $4, %xmm2 151: movdqa %xmm1, %xmm3 155: pblendvb %xmm0, %xmm2, %xmm3 160: movdqa (%rcx), %xmm2 164: pand %xmm3, %xmm2 168: psllw $2, %xmm2 173: paddb %xmm0, %xmm0 177: pblendvb %xmm0, %xmm2, %xmm3 182: movdqa %xmm3, %xmm2 186: paddb %xmm2, %xmm2 190: paddb %xmm0, %xmm0 194: pblendvb %xmm0, %xmm2, %xmm3 199: psubb %xmm1, %xmm3 203: movd %xmm3, (%rdi) 207: popq %rbp 208: ret define void @fetch_r32_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = sitofp <4 x i32> %7 to <4 x float> %9 = fmul <4 x float> %8, %10 = shufflevector <4 x float> %9, <4 x float> , <4 x i32> store <4 x float> %10, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32A32_USCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 00 obtained ff 00 00 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 ff 00 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 ff 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 00 ff expected FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 00 obtained ff ff ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: cvtdq2ps %xmm0, %xmm0 16: movabsq $139845047042064, %rax 26: mulps (%rax), %xmm0 29: movabsq $139845047042080, %rax 39: movaps (%rax), %xmm1 42: movlhps %xmm1, %xmm0 45: shufps $72, %xmm1, %xmm0 49: movaps %xmm0, (%rdi) 52: popq %rbp 53: ret define void @fetch_r32_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %7, <4 x i32> zeroinitializer) %9 = ashr <4 x i32> %8, %10 = extractelement <4 x i32> %9, i32 0 %11 = extractelement <4 x i32> %9, i32 1 %12 = extractelement <4 x i32> %9, i32 2 %13 = extractelement <4 x i32> %9, i32 3 %14 = bitcast i32 %10 to <2 x i16> %15 = bitcast i32 %11 to <2 x i16> %16 = shufflevector <2 x i16> %14, <2 x i16> %15, <2 x i32> %17 = bitcast i32 %12 to <2 x i16> %18 = bitcast i32 %13 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast <2 x i16> %16 to <4 x i8> %21 = bitcast <2 x i16> %19 to <4 x i8> %22 = shufflevector <4 x i8> %20, <4 x i8> %21, <4 x i32> %23 = bitcast <4 x i8> %22 to i32 %24 = and i32 %23, 255 %25 = or i32 %24, bitcast (<4 x i8> to i32) %26 = bitcast i32 %25 to <4 x i8> store <4 x i8> %26, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsd %xmm0, %xmm1 22: psrad $23, %xmm1 27: pshufd $1, %xmm1, %xmm0 32: pshufd $3, %xmm1, %xmm2 37: movdqa %xmm1, %xmm3 41: movhlps %xmm3, %xmm3 44: punpcklwd %xmm2, %xmm3 48: movabsq $139845047042064, %rax 58: movabsq $139845047042080, %rcx 68: pshufb (%rax), %xmm3 73: punpcklwd %xmm0, %xmm1 77: movabsq $139845047042096, %rax 87: pshufb (%rax), %xmm1 92: por %xmm3, %xmm1 96: movd %xmm1, %eax 100: movzbl %al, %eax 103: orl (%rcx), %eax 105: movl %eax, (%rdi) 107: popq %rbp 108: ret define void @fetch_r32g32_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = fmul <4 x float> %7, %9 = shufflevector <4 x float> %8, <4 x float> , <4 x i32> store <4 x float> %9, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: cvtdq2ps %xmm0, %xmm0 11: movabsq $139845047042064, %rax 21: mulps (%rax), %xmm0 24: movabsq $139845047042080, %rax 34: movaps (%rax), %xmm1 37: movlhps %xmm1, %xmm0 40: movaps %xmm0, (%rdi) 43: popq %rbp 44: ret define void @fetch_r32g32_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %6, <4 x i32> zeroinitializer) %8 = ashr <4 x i32> %7, %9 = extractelement <4 x i32> %8, i32 0 %10 = extractelement <4 x i32> %8, i32 1 %11 = extractelement <4 x i32> %8, i32 2 %12 = extractelement <4 x i32> %8, i32 3 %13 = bitcast i32 %9 to <2 x i16> %14 = bitcast i32 %10 to <2 x i16> %15 = shufflevector <2 x i16> %13, <2 x i16> %14, <2 x i32> %16 = bitcast i32 %11 to <2 x i16> %17 = bitcast i32 %12 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast <2 x i16> %15 to <4 x i8> %20 = bitcast <2 x i16> %18 to <4 x i8> %21 = shufflevector <4 x i8> %19, <4 x i8> %20, <4 x i32> %22 = bitcast <4 x i8> %21 to i32 %23 = and i32 %22, 65535 %24 = or i32 %23, bitcast (<4 x i8> to i32) %25 = bitcast i32 %24 to <4 x i8> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsd %xmm0, %xmm1 17: psrad $23, %xmm1 22: pshufd $1, %xmm1, %xmm0 27: pshufd $3, %xmm1, %xmm2 32: movdqa %xmm1, %xmm3 36: movhlps %xmm3, %xmm3 39: punpcklwd %xmm2, %xmm3 43: movabsq $139845047042064, %rax 53: pshufb (%rax), %xmm3 58: punpcklwd %xmm0, %xmm1 62: movabsq $139845047042080, %rax 72: pshufb (%rax), %xmm1 77: movabsq $139845047042096, %rax 87: por %xmm3, %xmm1 91: movd %xmm1, %ecx 95: movzwl %cx, %ecx 98: orl (%rax), %ecx 100: movl %ecx, (%rdi) 102: popq %rbp 103: ret define void @fetch_r32g32b32_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = fmul <4 x float> %7, %9 = shufflevector <4 x float> %8, <4 x float> , <4 x i32> store <4 x float> %9, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: cvtdq2ps %xmm0, %xmm0 30: movabsq $139845047042064, %rax 40: mulps (%rax), %xmm0 43: movabsq $139845047042080, %rax 53: movaps (%rax), %xmm1 56: shufps $33, %xmm0, %xmm1 60: shufps $36, %xmm1, %xmm0 64: movaps %xmm0, (%rdi) 67: popq %rbp 68: ret define void @fetch_r32g32b32_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %6, <4 x i32> zeroinitializer) %8 = ashr <4 x i32> %7, %9 = extractelement <4 x i32> %8, i32 0 %10 = extractelement <4 x i32> %8, i32 1 %11 = extractelement <4 x i32> %8, i32 2 %12 = extractelement <4 x i32> %8, i32 3 %13 = bitcast i32 %9 to <2 x i16> %14 = bitcast i32 %10 to <2 x i16> %15 = shufflevector <2 x i16> %13, <2 x i16> %14, <2 x i32> %16 = bitcast i32 %11 to <2 x i16> %17 = bitcast i32 %12 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast <2 x i16> %15 to <4 x i8> %20 = bitcast <2 x i16> %18 to <4 x i8> %21 = shufflevector <4 x i8> %19, <4 x i8> %20, <4 x i32> %22 = bitcast <4 x i8> %21 to i32 %23 = and i32 %22, 16777215 %24 = or i32 %23, bitcast (<4 x i8> to i32) %25 = bitcast i32 %24 to <4 x i8> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: pxor %xmm1, %xmm1 31: pmaxsd %xmm0, %xmm1 36: psrad $23, %xmm1 41: movabsq $139845047042064, %rax 51: movabsq $139845047042080, %rcx 61: movabsq $139845047042096, %rdx 71: pshufd $1, %xmm1, %xmm0 76: pshufd $3, %xmm1, %xmm2 81: movdqa %xmm1, %xmm3 85: movhlps %xmm3, %xmm3 88: punpcklwd %xmm2, %xmm3 92: pshufb (%rdx), %xmm3 97: punpcklwd %xmm0, %xmm1 101: pshufb (%rcx), %xmm1 106: por %xmm3, %xmm1 110: movd %xmm1, %ecx 114: andl $16777215, %ecx 120: orl (%rax), %ecx 122: movl %ecx, (%rdi) 124: popq %rbp 125: ret define void @fetch_r32g32b32a32_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = sitofp <4 x i32> %5 to <4 x float> %7 = fmul <4 x float> %6, store <4 x float> %7, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movups (%rsi), %xmm0 7: cvtdq2ps %xmm0, %xmm0 10: movabsq $139845047042064, %rax 20: mulps (%rax), %xmm0 23: movaps %xmm0, (%rdi) 26: popq %rbp 27: ret define void @fetch_r32g32b32a32_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %5, <4 x i32> zeroinitializer) %7 = ashr <4 x i32> %6, %8 = extractelement <4 x i32> %7, i32 0 %9 = extractelement <4 x i32> %7, i32 1 %10 = extractelement <4 x i32> %7, i32 2 %11 = extractelement <4 x i32> %7, i32 3 %12 = bitcast i32 %8 to <2 x i16> %13 = bitcast i32 %9 to <2 x i16> %14 = shufflevector <2 x i16> %12, <2 x i16> %13, <2 x i32> %15 = bitcast i32 %10 to <2 x i16> %16 = bitcast i32 %11 to <2 x i16> %17 = shufflevector <2 x i16> %15, <2 x i16> %16, <2 x i32> %18 = bitcast <2 x i16> %14 to <4 x i8> %19 = bitcast <2 x i16> %17 to <4 x i8> %20 = shufflevector <4 x i8> %18, <4 x i8> %19, <4 x i32> store <4 x i8> %20, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32A32_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movdqu (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsd %xmm0, %xmm1 17: psrad $23, %xmm1 22: pshufd $1, %xmm1, %xmm0 27: pshufd $3, %xmm1, %xmm2 32: movdqa %xmm1, %xmm3 36: movhlps %xmm3, %xmm3 39: punpcklwd %xmm2, %xmm3 43: movabsq $139845047042064, %rax 53: pshufb (%rax), %xmm3 58: punpcklwd %xmm0, %xmm1 62: movabsq $139845047042080, %rax 72: pshufb (%rax), %xmm1 77: por %xmm3, %xmm1 81: movd %xmm1, (%rdi) 85: popq %rbp 86: ret define void @fetch_r32_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = sitofp <4 x i32> %7 to <4 x float> %9 = shufflevector <4 x float> %8, <4 x float> , <4 x i32> store <4 x float> %9, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32A32_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movd (%rsi), %xmm1 21: pshufd $0, %xmm1, %xmm1 26: cvtdq2ps %xmm1, %xmm1 29: movlhps %xmm0, %xmm1 32: shufps $72, %xmm0, %xmm1 36: movaps %xmm1, (%rdi) 39: popq %rbp 40: ret define void @fetch_r32_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %7, <4 x i32> zeroinitializer) %9 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %8, <4 x i32> ) %10 = ashr <4 x i32> %9, %11 = sub <4 x i32> %9, %10 %12 = extractelement <4 x i32> %11, i32 0 %13 = extractelement <4 x i32> %11, i32 1 %14 = extractelement <4 x i32> %11, i32 2 %15 = extractelement <4 x i32> %11, i32 3 %16 = bitcast i32 %12 to <2 x i16> %17 = bitcast i32 %13 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast i32 %14 to <2 x i16> %20 = bitcast i32 %15 to <2 x i16> %21 = shufflevector <2 x i16> %19, <2 x i16> %20, <2 x i32> %22 = bitcast <2 x i16> %18 to <4 x i8> %23 = bitcast <2 x i16> %21 to <4 x i8> %24 = shufflevector <4 x i8> %22, <4 x i8> %23, <4 x i32> %25 = shl <4 x i8> %24, %26 = sub <4 x i8> %25, %24 %27 = bitcast <4 x i8> %26 to i32 %28 = and i32 %27, 255 %29 = or i32 %28, bitcast (<4 x i8> to i32) %30 = bitcast i32 %29 to <4 x i8> store <4 x i8> %30, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsd %xmm0, %xmm1 22: movabsq $139845047042064, %rax 32: movdqa (%rax), %xmm2 36: pminsd %xmm1, %xmm2 41: movdqa %xmm2, %xmm0 45: psrad $255, %xmm0 50: movabsq $139845047042080, %rax 60: movdqa (%rax), %xmm1 64: psubd %xmm0, %xmm2 68: psllw $5, %xmm1 73: movabsq $139845047042096, %rax 83: movabsq $139845047042112, %rcx 93: movabsq $139845047042128, %rdx 103: movabsq $139845047042144, %rsi 113: movabsq $139845047042160, %r8 123: pshufd $1, %xmm2, %xmm0 128: pshufd $3, %xmm2, %xmm3 133: movdqa %xmm2, %xmm4 137: movhlps %xmm4, %xmm4 140: punpcklwd %xmm3, %xmm4 144: pshufb (%rax), %xmm4 149: punpcklwd %xmm0, %xmm2 153: pshufb (%rsi), %xmm2 158: por %xmm4, %xmm2 162: movdqa (%rdx), %xmm3 166: pand %xmm2, %xmm3 170: psllw $4, %xmm3 175: movdqa %xmm1, %xmm0 179: movdqa %xmm2, %xmm4 183: pblendvb %xmm0, %xmm3, %xmm4 188: movdqa (%rcx), %xmm3 192: pand %xmm4, %xmm3 196: psllw $2, %xmm3 201: movdqa %xmm1, %xmm0 205: paddb %xmm0, %xmm0 209: pblendvb %xmm0, %xmm3, %xmm4 214: movdqa %xmm4, %xmm1 218: paddb %xmm1, %xmm1 222: paddb %xmm0, %xmm0 226: pblendvb %xmm0, %xmm1, %xmm4 231: psubb %xmm2, %xmm4 235: movd %xmm4, %eax 239: movzbl %al, %eax 242: orl (%r8), %eax 245: movl %eax, (%rdi) 247: popq %rbp 248: ret define void @fetch_r32g32_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32_SSCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movq (%rsi), %xmm1 21: cvtdq2ps %xmm1, %xmm1 24: movlhps %xmm0, %xmm1 27: movaps %xmm1, (%rdi) 30: popq %rbp 31: ret define void @fetch_r32g32_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %6, <4 x i32> zeroinitializer) %8 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %7, <4 x i32> ) %9 = ashr <4 x i32> %8, %10 = sub <4 x i32> %8, %9 %11 = extractelement <4 x i32> %10, i32 0 %12 = extractelement <4 x i32> %10, i32 1 %13 = extractelement <4 x i32> %10, i32 2 %14 = extractelement <4 x i32> %10, i32 3 %15 = bitcast i32 %11 to <2 x i16> %16 = bitcast i32 %12 to <2 x i16> %17 = shufflevector <2 x i16> %15, <2 x i16> %16, <2 x i32> %18 = bitcast i32 %13 to <2 x i16> %19 = bitcast i32 %14 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast <2 x i16> %17 to <4 x i8> %22 = bitcast <2 x i16> %20 to <4 x i8> %23 = shufflevector <4 x i8> %21, <4 x i8> %22, <4 x i32> %24 = shl <4 x i8> %23, %25 = sub <4 x i8> %24, %23 %26 = bitcast <4 x i8> %25 to i32 %27 = and i32 %26, 65535 %28 = or i32 %27, bitcast (<4 x i8> to i32) %29 = bitcast i32 %28 to <4 x i8> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsd %xmm0, %xmm1 17: movabsq $139845047042064, %rax 27: movdqa (%rax), %xmm2 31: pminsd %xmm1, %xmm2 36: movdqa %xmm2, %xmm0 40: psrad $255, %xmm0 45: psubd %xmm0, %xmm2 49: pshufd $1, %xmm2, %xmm1 54: pshufd $3, %xmm2, %xmm3 59: movabsq $139845047042080, %rax 69: movdqa (%rax), %xmm0 73: movdqa %xmm2, %xmm4 77: movhlps %xmm4, %xmm4 80: psllw $5, %xmm0 85: movabsq $139845047042096, %rax 95: movabsq $139845047042112, %rcx 105: movabsq $139845047042128, %rdx 115: movabsq $139845047042144, %rsi 125: movabsq $139845047042160, %r8 135: punpcklwd %xmm3, %xmm4 139: pshufb (%rax), %xmm4 144: punpcklwd %xmm1, %xmm2 148: pshufb (%rsi), %xmm2 153: por %xmm4, %xmm2 157: movdqa (%rdx), %xmm1 161: pand %xmm2, %xmm1 165: psllw $4, %xmm1 170: movdqa %xmm2, %xmm3 174: pblendvb %xmm0, %xmm1, %xmm3 179: movdqa (%rcx), %xmm1 183: pand %xmm3, %xmm1 187: psllw $2, %xmm1 192: paddb %xmm0, %xmm0 196: pblendvb %xmm0, %xmm1, %xmm3 201: movdqa %xmm3, %xmm1 205: paddb %xmm1, %xmm1 209: paddb %xmm0, %xmm0 213: pblendvb %xmm0, %xmm1, %xmm3 218: psubb %xmm2, %xmm3 222: movd %xmm3, %eax 226: movzwl %ax, %eax 229: orl (%r8), %eax 232: movl %eax, (%rdi) 234: popq %rbp 235: ret define void @fetch_r32g32b32_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32_SSCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: cvtdq2ps %xmm0, %xmm0 30: movabsq $139845047042064, %rax 40: movaps (%rax), %xmm1 43: shufps $33, %xmm0, %xmm1 47: shufps $36, %xmm1, %xmm0 51: movaps %xmm0, (%rdi) 54: popq %rbp 55: ret define void @fetch_r32g32b32_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %6, <4 x i32> zeroinitializer) %8 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %7, <4 x i32> ) %9 = ashr <4 x i32> %8, %10 = sub <4 x i32> %8, %9 %11 = extractelement <4 x i32> %10, i32 0 %12 = extractelement <4 x i32> %10, i32 1 %13 = extractelement <4 x i32> %10, i32 2 %14 = extractelement <4 x i32> %10, i32 3 %15 = bitcast i32 %11 to <2 x i16> %16 = bitcast i32 %12 to <2 x i16> %17 = shufflevector <2 x i16> %15, <2 x i16> %16, <2 x i32> %18 = bitcast i32 %13 to <2 x i16> %19 = bitcast i32 %14 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast <2 x i16> %17 to <4 x i8> %22 = bitcast <2 x i16> %20 to <4 x i8> %23 = shufflevector <4 x i8> %21, <4 x i8> %22, <4 x i32> %24 = shl <4 x i8> %23, %25 = sub <4 x i8> %24, %23 %26 = bitcast <4 x i8> %25 to i32 %27 = and i32 %26, 16777215 %28 = or i32 %27, bitcast (<4 x i8> to i32) %29 = bitcast i32 %28 to <4 x i8> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: pxor %xmm1, %xmm1 31: pmaxsd %xmm0, %xmm1 36: movabsq $139845047042064, %rax 46: movdqa (%rax), %xmm2 50: movabsq $139845047042080, %rax 60: movdqa (%rax), %xmm3 64: pminsd %xmm1, %xmm3 69: psllw $5, %xmm2 74: movabsq $139845047042096, %rax 84: movabsq $139845047042112, %rcx 94: movabsq $139845047042128, %rdx 104: movabsq $139845047042144, %rsi 114: movabsq $139845047042160, %r8 124: movdqa %xmm3, %xmm0 128: psrad $255, %xmm0 133: psubd %xmm0, %xmm3 137: pshufd $1, %xmm3, %xmm0 142: pshufd $3, %xmm3, %xmm1 147: movdqa %xmm3, %xmm4 151: movhlps %xmm4, %xmm4 154: punpcklwd %xmm1, %xmm4 158: pshufb (%rcx), %xmm4 163: punpcklwd %xmm0, %xmm3 167: pshufb (%rax), %xmm3 172: por %xmm4, %xmm3 176: movdqa (%rsi), %xmm1 180: pand %xmm3, %xmm1 184: psllw $4, %xmm1 189: movdqa %xmm2, %xmm0 193: movdqa %xmm3, %xmm4 197: pblendvb %xmm0, %xmm1, %xmm4 202: movdqa (%rdx), %xmm1 206: pand %xmm4, %xmm1 210: psllw $2, %xmm1 215: movdqa %xmm2, %xmm0 219: paddb %xmm0, %xmm0 223: pblendvb %xmm0, %xmm1, %xmm4 228: movdqa %xmm4, %xmm1 232: paddb %xmm1, %xmm1 236: paddb %xmm0, %xmm0 240: pblendvb %xmm0, %xmm1, %xmm4 245: psubb %xmm3, %xmm4 249: movd %xmm4, %eax 253: andl $16777215, %eax 259: orl (%r8), %eax 262: movl %eax, (%rdi) 264: popq %rbp 265: ret define void @fetch_r32g32b32a32_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = sitofp <4 x i32> %5 to <4 x float> store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32_SSCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 00 ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movups (%rsi), %xmm0 7: cvtdq2ps %xmm0, %xmm0 10: movaps %xmm0, (%rdi) 13: popq %rbp 14: ret define void @fetch_r32g32b32a32_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %5, <4 x i32> zeroinitializer) %7 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %6, <4 x i32> ) %8 = ashr <4 x i32> %7, %9 = sub <4 x i32> %7, %8 %10 = extractelement <4 x i32> %9, i32 0 %11 = extractelement <4 x i32> %9, i32 1 %12 = extractelement <4 x i32> %9, i32 2 %13 = extractelement <4 x i32> %9, i32 3 %14 = bitcast i32 %10 to <2 x i16> %15 = bitcast i32 %11 to <2 x i16> %16 = shufflevector <2 x i16> %14, <2 x i16> %15, <2 x i32> %17 = bitcast i32 %12 to <2 x i16> %18 = bitcast i32 %13 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast <2 x i16> %16 to <4 x i8> %21 = bitcast <2 x i16> %19 to <4 x i8> %22 = shufflevector <4 x i8> %20, <4 x i8> %21, <4 x i32> %23 = shl <4 x i8> %22, %24 = sub <4 x i8> %23, %22 store <4 x i8> %24, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32A32_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movdqu (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsd %xmm0, %xmm1 17: movabsq $139845047042064, %rax 27: movdqa (%rax), %xmm2 31: pminsd %xmm1, %xmm2 36: movdqa %xmm2, %xmm0 40: psrad $255, %xmm0 45: psubd %xmm0, %xmm2 49: pshufd $1, %xmm2, %xmm1 54: pshufd $3, %xmm2, %xmm3 59: movabsq $139845047042080, %rax 69: movdqa %xmm2, %xmm4 73: movhlps %xmm4, %xmm4 76: movdqa (%rax), %xmm0 80: psllw $5, %xmm0 85: movabsq $139845047042096, %rax 95: movabsq $139845047042112, %rcx 105: movabsq $139845047042128, %rdx 115: movabsq $139845047042144, %rsi 125: punpcklwd %xmm3, %xmm4 129: pshufb (%rsi), %xmm4 134: punpcklwd %xmm1, %xmm2 138: pshufb (%rdx), %xmm2 143: por %xmm4, %xmm2 147: movdqa (%rcx), %xmm1 151: pand %xmm2, %xmm1 155: psllw $4, %xmm1 160: movdqa %xmm2, %xmm3 164: pblendvb %xmm0, %xmm1, %xmm3 169: movdqa (%rax), %xmm1 173: pand %xmm3, %xmm1 177: psllw $2, %xmm1 182: paddb %xmm0, %xmm0 186: pblendvb %xmm0, %xmm1, %xmm3 191: movdqa %xmm3, %xmm1 195: paddb %xmm1, %xmm1 199: paddb %xmm0, %xmm0 203: pblendvb %xmm0, %xmm1, %xmm3 208: psubb %xmm2, %xmm3 212: movd %xmm3, (%rdi) 216: popq %rbp 217: ret define void @fetch_r16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32A32_SSCALED (unorm8) ... FAILED Packed: 00 00 00 01 Unpacked (0,0): 00 00 00 00 obtained ff 00 00 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 ff 00 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 ff 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movss (%rax), %xmm1 51: movabsq $139845047042080, %rax 61: mulps %xmm0, %xmm1 64: movaps (%rax), %xmm0 67: movlhps %xmm0, %xmm1 70: shufps $72, %xmm0, %xmm1 74: movaps %xmm1, (%rdi) 77: popq %rbp 78: ret define void @fetch_r16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = and i32 %32, 255 %34 = or i32 %33, bitcast (<4 x i8> to i32) %35 = bitcast i32 %34 to <4 x i8> store <4 x i8> %35, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: movabsq $139845047042064, %rax 44: movss (%rax), %xmm0 48: cvtdq2ps %xmm1, %xmm1 51: movabsq $139845047042080, %rax 61: movabsq $139845047042096, %rcx 71: movabsq $139845047042112, %rdx 81: movabsq $139845047042128, %rsi 91: movabsq $139845047042144, %r8 101: movabsq $139845047042160, %r9 111: movabsq $139845047042176, %r10 121: xorps %xmm2, %xmm2 124: mulps %xmm1, %xmm0 127: maxps %xmm2, %xmm0 130: minps (%r10), %xmm0 134: mulps (%r9), %xmm0 138: addps (%r8), %xmm0 142: andps (%rsi), %xmm0 145: pshufd $1, %xmm0, %xmm1 150: pshufd $3, %xmm0, %xmm2 155: movaps %xmm0, %xmm3 158: movhlps %xmm3, %xmm3 161: punpcklwd %xmm2, %xmm3 165: pshufb (%rdx), %xmm3 170: punpcklwd %xmm1, %xmm0 174: pshufb (%rcx), %xmm0 179: por %xmm3, %xmm0 183: movd %xmm0, %ecx 187: movzbl %cl, %ecx 190: orl (%rax), %ecx 192: movl %ecx, (%rdi) 194: popq %rbp 195: ret define void @fetch_r16g16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $2, %xmm0, %eax 19: shrl %cl, %eax 21: pextrd $3, %xmm0, %ecx 27: pextrd $1, %xmm0, %edx 33: shrl $16, %edx 36: pinsrd $1, %edx, %xmm0 42: pinsrd $2, %eax, %xmm0 48: shrl %cl, %ecx 50: movabsq $139845047042064, %rax 60: movaps (%rax), %xmm1 63: movabsq $139845047042080, %rax 73: movabsq $139845047042096, %rdx 83: pinsrd $3, %ecx, %xmm0 89: pand (%rdx), %xmm0 93: cvtdq2ps %xmm0, %xmm0 96: mulps (%rax), %xmm0 99: movlhps %xmm1, %xmm0 102: movaps %xmm0, (%rdi) 105: popq %rbp 106: ret define void @fetch_r16g16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 65535 %33 = or i32 %32, bitcast (<4 x i8> to i32) %34 = bitcast i32 %33 to <4 x i8> store <4 x i8> %34, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movd (%rsi), %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $16, %edx 39: pinsrd $1, %edx, %xmm0 45: pinsrd $2, %eax, %xmm0 51: shrl %cl, %ecx 53: movabsq $139845047042064, %rax 63: movabsq $139845047042080, %rdx 73: movabsq $139845047042096, %rsi 83: movabsq $139845047042112, %r8 93: movabsq $139845047042128, %r9 103: movabsq $139845047042144, %r10 113: movabsq $139845047042160, %r11 123: xorps %xmm1, %xmm1 126: movabsq $139845047042176, %rbx 136: movabsq $139845047042192, %r14 146: pinsrd $3, %ecx, %xmm0 152: pand (%r14), %xmm0 157: cvtdq2ps %xmm0, %xmm0 160: mulps (%rbx), %xmm0 163: maxps %xmm1, %xmm0 166: minps (%r11), %xmm0 170: mulps (%r10), %xmm0 174: addps (%r9), %xmm0 178: andps (%r8), %xmm0 182: pshufd $1, %xmm0, %xmm1 187: pshufd $3, %xmm0, %xmm2 192: movaps %xmm0, %xmm3 195: movhlps %xmm3, %xmm3 198: punpcklwd %xmm2, %xmm3 202: pshufb (%rsi), %xmm3 207: punpcklwd %xmm1, %xmm0 211: pshufb (%rdx), %xmm0 216: por %xmm3, %xmm0 220: movd %xmm0, %ecx 224: movzwl %cx, %ecx 227: orl (%rax), %ecx 229: movl %ecx, (%rdi) 231: popq %rbx 232: popq %r14 234: popq %rbp 235: ret define void @fetch_r16g16b16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = extractelement <3 x i16> %5, i32 0 %7 = zext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i16> %5, i32 1 %10 = zext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i16> %5, i32 2 %13 = zext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = fmul <4 x float> %16, %18 = shufflevector <4 x float> %17, <4 x float> , <4 x i32> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: movd %ecx, %xmm0 19: movzwl %ax, %eax 22: movd %eax, %xmm1 26: punpckldq %xmm0, %xmm1 30: movzwl 4(%rsi), %eax 34: movd %eax, %xmm0 38: movlhps %xmm0, %xmm1 41: movabsq $139845047042064, %rax 51: movaps (%rax), %xmm0 54: movabsq $139845047042080, %rax 64: cvtdq2ps %xmm1, %xmm1 67: mulps (%rax), %xmm1 70: shufps $33, %xmm1, %xmm0 74: shufps $36, %xmm0, %xmm1 78: movaps %xmm1, (%rdi) 81: popq %rbp 82: ret define void @fetch_r16g16b16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = shufflevector <3 x i16> %5, <3 x i16> undef, <4 x i32> %7 = lshr <4 x i16> %6, %8 = shufflevector <4 x i16> %7, <4 x i16> %7, <2 x i32> %9 = shufflevector <4 x i16> %7, <4 x i16> %7, <2 x i32> %10 = bitcast <2 x i16> %8 to <4 x i8> %11 = bitcast <2 x i16> %9 to <4 x i8> %12 = shufflevector <4 x i8> %10, <4 x i8> %11, <4 x i32> %13 = bitcast <4 x i8> %12 to i32 %14 = and i32 %13, 16777215 %15 = or i32 %14, bitcast (<4 x i8> to i32) %16 = bitcast i32 %15 to <4 x i8> store <4 x i8> %16, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: pinsrw $0, %eax, %xmm0 11: movd %eax, %xmm1 15: pextrw $1, %xmm1, %eax 20: pinsrw $1, %eax, %xmm0 25: pinsrw $2, 4(%rsi), %xmm0 31: psrlw $8, %xmm0 36: movabsq $139845047042064, %rax 46: movabsq $139845047042080, %rcx 56: movabsq $139845047042096, %rdx 66: pshufd $1, %xmm0, %xmm1 71: pshufb (%rdx), %xmm0 76: pshufb (%rcx), %xmm1 81: por %xmm0, %xmm1 85: movd %xmm1, %ecx 89: andl $16777215, %ecx 95: orl (%rax), %ecx 97: movl %ecx, (%rdi) 99: popq %rbp 100: ret define void @fetch_r16g16b16a16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = extractelement <4 x i16> %5, i32 0 %7 = zext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <4 x i16> %5, i32 1 %10 = zext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <4 x i16> %5, i32 2 %13 = zext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = extractelement <4 x i16> %5, i32 3 %16 = zext i16 %15 to i32 %17 = insertelement <4 x i32> %14, i32 %16, i32 3 %18 = sitofp <4 x i32> %17 to <4 x float> %19 = fmul <4 x float> %18, store <4 x float> %19, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrw $1, %xmm0, %eax 13: movd %xmm0, %ecx 17: movzwl %cx, %ecx 20: movd %ecx, %xmm1 24: pinsrd $1, %eax, %xmm1 30: pextrw $2, %xmm0, %eax 35: movabsq $139845047042064, %rcx 45: pinsrd $2, %eax, %xmm1 51: pextrw $3, %xmm0, %eax 56: pinsrd $3, %eax, %xmm1 62: cvtdq2ps %xmm1, %xmm0 65: mulps (%rcx), %xmm0 68: movaps %xmm0, (%rdi) 71: popq %rbp 72: ret define void @fetch_r16g16b16a16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = lshr <4 x i16> %5, %7 = shufflevector <4 x i16> %6, <4 x i16> %6, <2 x i32> %8 = shufflevector <4 x i16> %6, <4 x i16> %6, <2 x i32> %9 = bitcast <2 x i16> %7 to <4 x i8> %10 = bitcast <2 x i16> %8 to <4 x i8> %11 = shufflevector <4 x i8> %9, <4 x i8> %10, <4 x i32> store <4 x i8> %11, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16A16_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: psrlw $8, %xmm0 13: pshufd $1, %xmm0, %xmm1 18: movabsq $139845047042064, %rax 28: pshufb (%rax), %xmm0 33: movabsq $139845047042080, %rax 43: pshufb (%rax), %xmm1 48: por %xmm0, %xmm1 52: movd %xmm1, (%rdi) 56: popq %rbp 57: ret define void @fetch_r16_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16A16_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movaps (%rax), %xmm1 50: movlhps %xmm1, %xmm0 53: shufps $72, %xmm1, %xmm0 57: movaps %xmm0, (%rdi) 60: popq %rbp 61: ret define void @fetch_r16_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 255 %33 = or i32 %32, bitcast (<4 x i8> to i32) %34 = bitcast i32 %33 to <4 x i8> store <4 x i8> %34, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movabsq $139845047042080, %rcx 57: movabsq $139845047042096, %rdx 67: movabsq $139845047042112, %rsi 77: movabsq $139845047042128, %r8 87: movabsq $139845047042144, %r9 97: movabsq $139845047042160, %r10 107: xorps %xmm1, %xmm1 110: maxps %xmm1, %xmm0 113: minps (%r10), %xmm0 117: mulps (%r9), %xmm0 121: addps (%r8), %xmm0 125: andps (%rsi), %xmm0 128: pshufd $1, %xmm0, %xmm1 133: pshufd $3, %xmm0, %xmm2 138: movaps %xmm0, %xmm3 141: movhlps %xmm3, %xmm3 144: punpcklwd %xmm2, %xmm3 148: pshufb (%rdx), %xmm3 153: punpcklwd %xmm1, %xmm0 157: pshufb (%rcx), %xmm0 162: por %xmm3, %xmm0 166: movd %xmm0, %ecx 170: movzbl %cl, %ecx 173: orl (%rax), %ecx 175: movl %ecx, (%rdi) 177: popq %rbp 178: ret define void @fetch_r16g16_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = shufflevector <4 x float> %10, <4 x float> , <4 x i32> store <4 x float> %11, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $2, %xmm0, %eax 19: shrl %cl, %eax 21: pextrd $3, %xmm0, %ecx 27: pextrd $1, %xmm0, %edx 33: shrl $16, %edx 36: pinsrd $1, %edx, %xmm0 42: pinsrd $2, %eax, %xmm0 48: shrl %cl, %ecx 50: movabsq $139845047042064, %rax 60: movaps (%rax), %xmm1 63: movabsq $139845047042080, %rax 73: pinsrd $3, %ecx, %xmm0 79: pand (%rax), %xmm0 83: cvtdq2ps %xmm0, %xmm0 86: movlhps %xmm1, %xmm0 89: movaps %xmm0, (%rdi) 92: popq %rbp 93: ret define void @fetch_r16g16_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> %30 = bitcast <4 x i8> %29 to i32 %31 = and i32 %30, 65535 %32 = or i32 %31, bitcast (<4 x i8> to i32) %33 = bitcast i32 %32 to <4 x i8> store <4 x i8> %33, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: pextrd $2, %xmm0, %eax 20: shrl %cl, %eax 22: pextrd $3, %xmm0, %ecx 28: pextrd $1, %xmm0, %edx 34: shrl $16, %edx 37: pinsrd $1, %edx, %xmm0 43: pinsrd $2, %eax, %xmm0 49: shrl %cl, %ecx 51: movabsq $139845047042064, %rax 61: movabsq $139845047042080, %rdx 71: movabsq $139845047042096, %rsi 81: movabsq $139845047042112, %r8 91: movabsq $139845047042128, %r9 101: movabsq $139845047042144, %r10 111: movabsq $139845047042160, %r11 121: xorps %xmm1, %xmm1 124: movabsq $139845047042176, %rbx 134: pinsrd $3, %ecx, %xmm0 140: pand (%rbx), %xmm0 144: cvtdq2ps %xmm0, %xmm0 147: maxps %xmm1, %xmm0 150: minps (%r11), %xmm0 154: mulps (%r10), %xmm0 158: addps (%r9), %xmm0 162: andps (%r8), %xmm0 166: pshufd $1, %xmm0, %xmm1 171: pshufd $3, %xmm0, %xmm2 176: movaps %xmm0, %xmm3 179: movhlps %xmm3, %xmm3 182: punpcklwd %xmm2, %xmm3 186: pshufb (%rsi), %xmm3 191: punpcklwd %xmm1, %xmm0 195: pshufb (%rdx), %xmm0 200: por %xmm3, %xmm0 204: movd %xmm0, %ecx 208: movzwl %cx, %ecx 211: orl (%rax), %ecx 213: movl %ecx, (%rdi) 215: popq %rbx 216: popq %rbp 217: ret define void @fetch_r16g16b16_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = extractelement <3 x i16> %5, i32 0 %7 = zext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i16> %5, i32 1 %10 = zext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i16> %5, i32 2 %13 = zext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = shufflevector <4 x float> %16, <4 x float> , <4 x i32> store <4 x float> %17, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: movd %ecx, %xmm0 19: movzwl %ax, %eax 22: movd %eax, %xmm1 26: punpckldq %xmm0, %xmm1 30: movzwl 4(%rsi), %eax 34: movd %eax, %xmm0 38: movlhps %xmm0, %xmm1 41: movabsq $139845047042064, %rax 51: movaps (%rax), %xmm0 54: cvtdq2ps %xmm1, %xmm1 57: shufps $33, %xmm1, %xmm0 61: shufps $36, %xmm0, %xmm1 65: movaps %xmm1, (%rdi) 68: popq %rbp 69: ret define void @fetch_r16g16b16_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = shufflevector <3 x i16> %5, <3 x i16> undef, <4 x i32> %7 = shufflevector <4 x i16> %6, <4 x i16> %6, <8 x i32> %8 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %7, <8 x i16> ) %9 = shufflevector <8 x i16> %8, <8 x i16> %8, <4 x i32> %10 = lshr <4 x i16> %9, %11 = sub <4 x i16> %9, %10 %12 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %13 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = bitcast <2 x i16> %13 to <4 x i8> %16 = shufflevector <4 x i8> %14, <4 x i8> %15, <4 x i32> %17 = shl <4 x i8> %16, %18 = sub <4 x i8> %17, %16 %19 = bitcast <4 x i8> %18 to i32 %20 = and i32 %19, 16777215 %21 = or i32 %20, bitcast (<4 x i8> to i32) %22 = bitcast i32 %21 to <4 x i8> store <4 x i8> %22, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: pinsrw $1, %ecx, %xmm0 20: movzwl 4(%rsi), %edx 24: pinsrw $2, %edx, %xmm0 29: movabsq $139845047042064, %rsi 39: movdqa (%rsi), %xmm1 43: movabsq $139845047042080, %rsi 53: pinsrw $4, %eax, %xmm0 58: movdqa (%rsi), %xmm2 62: psllw $5, %xmm1 67: movabsq $139845047042096, %rax 77: movabsq $139845047042112, %rsi 87: movabsq $139845047042128, %r8 97: movabsq $139845047042144, %r9 107: movabsq $139845047042160, %r10 117: pinsrw $5, %ecx, %xmm0 122: pinsrw $6, %edx, %xmm0 127: pminuw %xmm2, %xmm0 132: movdqa %xmm0, %xmm2 136: psrlw $255, %xmm2 141: psubw %xmm2, %xmm0 145: pshufd $1, %xmm0, %xmm2 150: pshufb (%r9), %xmm0 156: pshufb (%r8), %xmm2 162: por %xmm0, %xmm2 166: movdqa (%rsi), %xmm3 170: pand %xmm2, %xmm3 174: psllw $4, %xmm3 179: movdqa %xmm1, %xmm0 183: movdqa %xmm2, %xmm4 187: pblendvb %xmm0, %xmm3, %xmm4 192: movdqa (%rax), %xmm3 196: pand %xmm4, %xmm3 200: psllw $2, %xmm3 205: movdqa %xmm1, %xmm0 209: paddb %xmm0, %xmm0 213: pblendvb %xmm0, %xmm3, %xmm4 218: movdqa %xmm4, %xmm1 222: paddb %xmm1, %xmm1 226: paddb %xmm0, %xmm0 230: pblendvb %xmm0, %xmm1, %xmm4 235: psubb %xmm2, %xmm4 239: movd %xmm4, %eax 243: andl $16777215, %eax 249: orl (%r10), %eax 252: movl %eax, (%rdi) 254: popq %rbp 255: ret define void @fetch_r16g16b16a16_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = extractelement <4 x i16> %5, i32 0 %7 = zext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <4 x i16> %5, i32 1 %10 = zext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <4 x i16> %5, i32 2 %13 = zext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = extractelement <4 x i16> %5, i32 3 %16 = zext i16 %15 to i32 %17 = insertelement <4 x i32> %14, i32 %16, i32 3 %18 = sitofp <4 x i32> %17 to <4 x float> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16_USCALED (unorm8) ... FAILED Packed: ff ff 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 ff ff Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 00 ff ff expected FAILED Packed: ff ff ff ff Unpacked (0,0): 00 00 00 ff obtained ff ff ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrw $1, %xmm0, %eax 13: movd %xmm0, %ecx 17: movzwl %cx, %ecx 20: movd %ecx, %xmm1 24: pinsrd $1, %eax, %xmm1 30: pextrw $2, %xmm0, %eax 35: pinsrd $2, %eax, %xmm1 41: pextrw $3, %xmm0, %eax 46: pinsrd $3, %eax, %xmm1 52: cvtdq2ps %xmm1, %xmm0 55: movaps %xmm0, (%rdi) 58: popq %rbp 59: ret define void @fetch_r16g16b16a16_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = shufflevector <4 x i16> %5, <4 x i16> %5, <8 x i32> %7 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %6, <8 x i16> ) %8 = shufflevector <8 x i16> %7, <8 x i16> %7, <4 x i32> %9 = lshr <4 x i16> %8, %10 = sub <4 x i16> %8, %9 %11 = shufflevector <4 x i16> %10, <4 x i16> %10, <2 x i32> %12 = shufflevector <4 x i16> %10, <4 x i16> %10, <2 x i32> %13 = bitcast <2 x i16> %11 to <4 x i8> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = shufflevector <4 x i8> %13, <4 x i8> %14, <4 x i32> %16 = shl <4 x i8> %15, %17 = sub <4 x i8> %16, %15 store <4 x i8> %17, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16A16_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movdqa (%rax), %xmm0 18: movq (%rsi), %xmm1 22: pshufd $68, %xmm1, %xmm1 27: pminuw %xmm0, %xmm1 32: movdqa %xmm1, %xmm0 36: psrlw $255, %xmm0 41: psubw %xmm0, %xmm1 45: pshufd $1, %xmm1, %xmm2 50: movabsq $139845047042080, %rax 60: movabsq $139845047042096, %rcx 70: pshufb (%rcx), %xmm1 75: movdqa (%rax), %xmm0 79: psllw $5, %xmm0 84: movabsq $139845047042112, %rax 94: movabsq $139845047042128, %rcx 104: movabsq $139845047042144, %rdx 114: pshufb (%rdx), %xmm2 119: por %xmm1, %xmm2 123: movdqa (%rcx), %xmm1 127: pand %xmm2, %xmm1 131: psllw $4, %xmm1 136: movdqa %xmm2, %xmm3 140: pblendvb %xmm0, %xmm1, %xmm3 145: movdqa (%rax), %xmm1 149: pand %xmm3, %xmm1 153: psllw $2, %xmm1 158: paddb %xmm0, %xmm0 162: pblendvb %xmm0, %xmm1, %xmm3 167: movdqa %xmm3, %xmm1 171: paddb %xmm1, %xmm1 175: paddb %xmm0, %xmm0 179: pblendvb %xmm0, %xmm1, %xmm3 184: psubb %xmm2, %xmm3 188: movd %xmm3, (%rdi) 192: popq %rbp 193: ret define void @fetch_r16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = sext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = insertelement <4 x i32> %7, i32 %6, i32 1 %9 = insertelement <4 x i32> %8, i32 %6, i32 2 %10 = insertelement <4 x i32> %9, i32 %6, i32 3 %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16A16_USCALED (unorm8) ... FAILED Packed: ff ff 00 00 Unpacked (0,0): 00 00 00 00 obtained ff 00 00 00 expected FAILED Packed: 00 00 ff ff Unpacked (0,0): 00 00 00 00 obtained 00 ff 00 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 ff 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 00 ff expected FAILED Packed: ff ff ff ff Unpacked (0,0): 00 00 00 00 obtained ff ff ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movswl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: cvtdq2ps %xmm0, %xmm0 19: movabsq $139845047042064, %rax 29: mulps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: movaps (%rax), %xmm1 45: movlhps %xmm1, %xmm0 48: shufps $72, %xmm1, %xmm0 52: movaps %xmm0, (%rdi) 55: popq %rbp 56: ret define void @fetch_r16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = shufflevector <4 x i16> %7, <4 x i16> %7, <8 x i32> %9 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %8, <8 x i16> ) %10 = shufflevector <8 x i16> %9, <8 x i16> %9, <4 x i32> %11 = ashr <4 x i16> %10, %12 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %13 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = bitcast <2 x i16> %13 to <4 x i8> %16 = shufflevector <4 x i8> %14, <4 x i8> %15, <4 x i32> %17 = bitcast <4 x i8> %16 to i32 %18 = and i32 %17, 255 %19 = or i32 %18, bitcast (<4 x i8> to i32) %20 = bitcast i32 %19 to <4 x i8> store <4 x i8> %20, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklwd %xmm0, %xmm0 15: pshufd $0, %xmm0, %xmm0 20: pshufd $68, %xmm0, %xmm0 25: pxor %xmm1, %xmm1 29: pmaxsw %xmm0, %xmm1 33: movabsq $139845047042064, %rax 43: movabsq $139845047042080, %rcx 53: movabsq $139845047042096, %rdx 63: psraw $7, %xmm1 68: pshufd $1, %xmm1, %xmm0 73: pshufb (%rdx), %xmm1 78: pshufb (%rcx), %xmm0 83: por %xmm1, %xmm0 87: movd %xmm0, %ecx 91: movzbl %cl, %ecx 94: orl (%rax), %ecx 96: movl %ecx, (%rdi) 98: popq %rbp 99: ret define void @fetch_r16g16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i16>* %5 = load <2 x i16>* %4, align 2 %6 = extractelement <2 x i16> %5, i32 0 %7 = sext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <2 x i16> %5, i32 1 %10 = sext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = insertelement <4 x i32> %11, i32 0, i32 2 %13 = insertelement <4 x i32> %12, i32 0, i32 3 %14 = sitofp <4 x i32> %13 to <4 x float> %15 = fmul <4 x float> %14, %16 = shufflevector <4 x float> %15, <4 x float> , <4 x i32> store <4 x float> %16, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movswl %ax, %ecx 9: movabsq $139845047042064, %rdx 19: movaps (%rdx), %xmm0 22: movd %ecx, %xmm1 26: movd %eax, %xmm2 30: pextrw $1, %xmm2, %eax 35: movswl %ax, %eax 38: movd %eax, %xmm2 42: movabsq $139845047042080, %rax 52: punpckldq %xmm2, %xmm1 56: movq %xmm1, %xmm1 60: cvtdq2ps %xmm1, %xmm1 63: mulps (%rax), %xmm1 66: movlhps %xmm0, %xmm1 69: movaps %xmm1, (%rdi) 72: popq %rbp 73: ret define void @fetch_r16g16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i16>* %5 = load <2 x i16>* %4, align 2 %6 = shufflevector <2 x i16> %5, <2 x i16> undef, <4 x i32> %7 = shufflevector <4 x i16> %6, <4 x i16> %6, <8 x i32> %8 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %7, <8 x i16> ) %9 = shufflevector <8 x i16> %8, <8 x i16> %8, <4 x i32> %10 = ashr <4 x i16> %9, %11 = shufflevector <4 x i16> %10, <4 x i16> %10, <2 x i32> %12 = shufflevector <4 x i16> %10, <4 x i16> %10, <2 x i32> %13 = bitcast <2 x i16> %11 to <4 x i8> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = shufflevector <4 x i8> %13, <4 x i8> %14, <4 x i32> %16 = bitcast <4 x i8> %15 to i32 %17 = and i32 %16, 65535 %18 = or i32 %17, bitcast (<4 x i8> to i32) %19 = bitcast i32 %18 to <4 x i8> store <4 x i8> %19, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $68, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsw %xmm0, %xmm1 21: psraw $7, %xmm1 26: pshufd $1, %xmm1, %xmm0 31: movabsq $139845047042064, %rax 41: pshufb (%rax), %xmm1 46: movabsq $139845047042080, %rax 56: pshufb (%rax), %xmm0 61: movabsq $139845047042096, %rax 71: por %xmm1, %xmm0 75: movd %xmm0, %ecx 79: movzwl %cx, %ecx 82: orl (%rax), %ecx 84: movl %ecx, (%rdi) 86: popq %rbp 87: ret define void @fetch_r16g16b16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = extractelement <3 x i16> %5, i32 0 %7 = sext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i16> %5, i32 1 %10 = sext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i16> %5, i32 2 %13 = sext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = fmul <4 x float> %16, %18 = shufflevector <4 x float> %17, <4 x float> , <4 x i32> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movswl 4(%rsi), %ecx 10: movd %eax, %xmm0 14: movswl %ax, %eax 17: movabsq $139845047042064, %rdx 27: movaps (%rdx), %xmm1 30: movd %eax, %xmm2 34: pextrw $1, %xmm0, %eax 39: movswl %ax, %eax 42: movd %eax, %xmm0 46: movabsq $139845047042080, %rax 56: punpckldq %xmm0, %xmm2 60: movd %ecx, %xmm0 64: movlhps %xmm0, %xmm2 67: cvtdq2ps %xmm2, %xmm0 70: mulps (%rax), %xmm0 73: shufps $33, %xmm0, %xmm1 77: shufps $36, %xmm1, %xmm0 81: movaps %xmm0, (%rdi) 84: popq %rbp 85: ret define void @fetch_r16g16b16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = shufflevector <3 x i16> %5, <3 x i16> undef, <4 x i32> %7 = shufflevector <4 x i16> %6, <4 x i16> %6, <8 x i32> %8 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %7, <8 x i16> ) %9 = shufflevector <8 x i16> %8, <8 x i16> %8, <4 x i32> %10 = ashr <4 x i16> %9, %11 = shufflevector <4 x i16> %10, <4 x i16> %10, <2 x i32> %12 = shufflevector <4 x i16> %10, <4 x i16> %10, <2 x i32> %13 = bitcast <2 x i16> %11 to <4 x i8> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = shufflevector <4 x i8> %13, <4 x i8> %14, <4 x i32> %16 = bitcast <4 x i8> %15 to i32 %17 = and i32 %16, 16777215 %18 = or i32 %17, bitcast (<4 x i8> to i32) %19 = bitcast i32 %18 to <4 x i8> store <4 x i8> %19, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: pinsrw $1, %ecx, %xmm0 20: movzwl 4(%rsi), %edx 24: pinsrw $2, %edx, %xmm0 29: pinsrw $4, %eax, %xmm0 34: movabsq $139845047042064, %rax 44: movabsq $139845047042080, %rsi 54: movabsq $139845047042096, %r8 64: pxor %xmm1, %xmm1 68: pinsrw $5, %ecx, %xmm0 73: pinsrw $6, %edx, %xmm0 78: pmaxsw %xmm1, %xmm0 82: psraw $7, %xmm0 87: pshufd $1, %xmm0, %xmm1 92: pshufb (%r8), %xmm0 98: pshufb (%rsi), %xmm1 103: por %xmm0, %xmm1 107: movd %xmm1, %ecx 111: andl $16777215, %ecx 117: orl (%rax), %ecx 119: movl %ecx, (%rdi) 121: popq %rbp 122: ret define void @fetch_r16g16b16a16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = extractelement <4 x i16> %5, i32 0 %7 = sext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <4 x i16> %5, i32 1 %10 = sext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <4 x i16> %5, i32 2 %13 = sext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = extractelement <4 x i16> %5, i32 3 %16 = sext i16 %15 to i32 %17 = insertelement <4 x i32> %14, i32 %16, i32 3 %18 = sitofp <4 x i32> %17 to <4 x float> %19 = fmul <4 x float> %18, store <4 x float> %19, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrw $1, %xmm0, %eax 13: movswl %ax, %eax 16: movd %xmm0, %ecx 20: movswl %cx, %ecx 23: movd %ecx, %xmm1 27: pinsrd $1, %eax, %xmm1 33: pextrw $2, %xmm0, %eax 38: movswl %ax, %eax 41: movabsq $139845047042064, %rcx 51: pinsrd $2, %eax, %xmm1 57: pextrw $3, %xmm0, %eax 62: movswl %ax, %eax 65: pinsrd $3, %eax, %xmm1 71: cvtdq2ps %xmm1, %xmm0 74: mulps (%rcx), %xmm0 77: movaps %xmm0, (%rdi) 80: popq %rbp 81: ret define void @fetch_r16g16b16a16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = shufflevector <4 x i16> %5, <4 x i16> %5, <8 x i32> %7 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %6, <8 x i16> ) %8 = shufflevector <8 x i16> %7, <8 x i16> %7, <4 x i32> %9 = ashr <4 x i16> %8, %10 = shufflevector <4 x i16> %9, <4 x i16> %9, <2 x i32> %11 = shufflevector <4 x i16> %9, <4 x i16> %9, <2 x i32> %12 = bitcast <2 x i16> %10 to <4 x i8> %13 = bitcast <2 x i16> %11 to <4 x i8> %14 = shufflevector <4 x i8> %12, <4 x i8> %13, <4 x i32> store <4 x i8> %14, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16A16_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pshufd $68, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsw %xmm0, %xmm1 21: psraw $7, %xmm1 26: pshufd $1, %xmm1, %xmm0 31: movabsq $139845047042064, %rax 41: pshufb (%rax), %xmm1 46: movabsq $139845047042080, %rax 56: pshufb (%rax), %xmm0 61: por %xmm1, %xmm0 65: movd %xmm0, (%rdi) 69: popq %rbp 70: ret define void @fetch_r16_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = sext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = insertelement <4 x i32> %7, i32 %6, i32 1 %9 = insertelement <4 x i32> %8, i32 %6, i32 2 %10 = insertelement <4 x i32> %9, i32 %6, i32 3 %11 = sitofp <4 x i32> %10 to <4 x float> %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16A16_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movswl (%rsi), %eax 20: movd %eax, %xmm1 24: pshufd $0, %xmm1, %xmm1 29: cvtdq2ps %xmm1, %xmm1 32: movlhps %xmm0, %xmm1 35: shufps $72, %xmm0, %xmm1 39: movaps %xmm1, (%rdi) 42: popq %rbp 43: ret define void @fetch_r16_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = shufflevector <4 x i16> %7, <4 x i16> %7, <8 x i32> %9 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %8, <8 x i16> ) %10 = shufflevector <8 x i16> %9, <8 x i16> %9, <4 x i32> %11 = shufflevector <4 x i16> %10, <4 x i16> %10, <8 x i32> %12 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %11, <8 x i16> ) %13 = shufflevector <8 x i16> %12, <8 x i16> %12, <4 x i32> %14 = ashr <4 x i16> %13, %15 = sub <4 x i16> %13, %14 %16 = shufflevector <4 x i16> %15, <4 x i16> %15, <2 x i32> %17 = shufflevector <4 x i16> %15, <4 x i16> %15, <2 x i32> %18 = bitcast <2 x i16> %16 to <4 x i8> %19 = bitcast <2 x i16> %17 to <4 x i8> %20 = shufflevector <4 x i8> %18, <4 x i8> %19, <4 x i32> %21 = shl <4 x i8> %20, %22 = sub <4 x i8> %21, %20 %23 = bitcast <4 x i8> %22 to i32 %24 = and i32 %23, 255 %25 = or i32 %24, bitcast (<4 x i8> to i32) %26 = bitcast i32 %25 to <4 x i8> store <4 x i8> %26, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movabsq $139845047042064, %rcx 17: movdqa (%rcx), %xmm1 21: movd %eax, %xmm0 25: punpcklwd %xmm0, %xmm0 29: pshufd $0, %xmm0, %xmm0 34: pshufd $68, %xmm0, %xmm0 39: pxor %xmm2, %xmm2 43: pmaxsw %xmm0, %xmm2 47: movabsq $139845047042080, %rax 57: psllw $5, %xmm1 62: movabsq $139845047042096, %rcx 72: movabsq $139845047042112, %rdx 82: movabsq $139845047042128, %rsi 92: movabsq $139845047042144, %r8 102: movabsq $139845047042160, %r9 112: pshufd $68, %xmm2, %xmm0 117: pminsw (%rax), %xmm0 121: movdqa %xmm0, %xmm2 125: psraw $255, %xmm2 130: psubw %xmm2, %xmm0 134: pshufd $1, %xmm0, %xmm2 139: pshufb (%r8), %xmm0 145: pshufb (%rsi), %xmm2 150: por %xmm0, %xmm2 154: movdqa (%rdx), %xmm3 158: pand %xmm2, %xmm3 162: psllw $4, %xmm3 167: movdqa %xmm1, %xmm0 171: movdqa %xmm2, %xmm4 175: pblendvb %xmm0, %xmm3, %xmm4 180: movdqa (%rcx), %xmm3 184: pand %xmm4, %xmm3 188: psllw $2, %xmm3 193: movdqa %xmm1, %xmm0 197: paddb %xmm0, %xmm0 201: pblendvb %xmm0, %xmm3, %xmm4 206: movdqa %xmm4, %xmm1 210: paddb %xmm1, %xmm1 214: paddb %xmm0, %xmm0 218: pblendvb %xmm0, %xmm1, %xmm4 223: psubb %xmm2, %xmm4 227: movd %xmm4, %eax 231: movzbl %al, %eax 234: orl (%r9), %eax 237: movl %eax, (%rdi) 239: popq %rbp 240: ret define void @fetch_r16g16_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i16>* %5 = load <2 x i16>* %4, align 2 %6 = extractelement <2 x i16> %5, i32 0 %7 = sext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <2 x i16> %5, i32 1 %10 = sext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = insertelement <4 x i32> %11, i32 0, i32 2 %13 = insertelement <4 x i32> %12, i32 0, i32 3 %14 = sitofp <4 x i32> %13 to <4 x float> %15 = shufflevector <4 x float> %14, <4 x float> , <4 x i32> store <4 x float> %15, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16_SSCALED (unorm8) ... FAILED Packed: ff 7f 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movswl %ax, %ecx 9: movabsq $139845047042064, %rdx 19: movaps (%rdx), %xmm0 22: movd %ecx, %xmm1 26: movd %eax, %xmm2 30: pextrw $1, %xmm2, %eax 35: movswl %ax, %eax 38: movd %eax, %xmm2 42: punpckldq %xmm2, %xmm1 46: movq %xmm1, %xmm1 50: cvtdq2ps %xmm1, %xmm1 53: movlhps %xmm0, %xmm1 56: movaps %xmm1, (%rdi) 59: popq %rbp 60: ret define void @fetch_r16g16_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i16>* %5 = load <2 x i16>* %4, align 2 %6 = shufflevector <2 x i16> %5, <2 x i16> undef, <4 x i32> %7 = shufflevector <4 x i16> %6, <4 x i16> %6, <8 x i32> %8 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %7, <8 x i16> ) %9 = shufflevector <8 x i16> %8, <8 x i16> %8, <4 x i32> %10 = shufflevector <4 x i16> %9, <4 x i16> %9, <8 x i32> %11 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %10, <8 x i16> ) %12 = shufflevector <8 x i16> %11, <8 x i16> %11, <4 x i32> %13 = ashr <4 x i16> %12, %14 = sub <4 x i16> %12, %13 %15 = shufflevector <4 x i16> %14, <4 x i16> %14, <2 x i32> %16 = shufflevector <4 x i16> %14, <4 x i16> %14, <2 x i32> %17 = bitcast <2 x i16> %15 to <4 x i8> %18 = bitcast <2 x i16> %16 to <4 x i8> %19 = shufflevector <4 x i8> %17, <4 x i8> %18, <4 x i32> %20 = shl <4 x i8> %19, %21 = sub <4 x i8> %20, %19 %22 = bitcast <4 x i8> %21 to i32 %23 = and i32 %22, 65535 %24 = or i32 %23, bitcast (<4 x i8> to i32) %25 = bitcast i32 %24 to <4 x i8> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $68, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsw %xmm0, %xmm1 21: pshufd $68, %xmm1, %xmm1 26: movabsq $139845047042064, %rax 36: pminsw (%rax), %xmm1 40: movabsq $139845047042080, %rax 50: movdqa (%rax), %xmm0 54: movdqa %xmm1, %xmm2 58: psraw $255, %xmm2 63: psllw $5, %xmm0 68: movabsq $139845047042096, %rax 78: movabsq $139845047042112, %rcx 88: movabsq $139845047042128, %rdx 98: movabsq $139845047042144, %rsi 108: movabsq $139845047042160, %r8 118: psubw %xmm2, %xmm1 122: pshufd $1, %xmm1, %xmm2 127: pshufb (%rsi), %xmm1 132: pshufb (%rdx), %xmm2 137: por %xmm1, %xmm2 141: movdqa (%rcx), %xmm1 145: pand %xmm2, %xmm1 149: psllw $4, %xmm1 154: movdqa %xmm2, %xmm3 158: pblendvb %xmm0, %xmm1, %xmm3 163: movdqa (%rax), %xmm1 167: pand %xmm3, %xmm1 171: psllw $2, %xmm1 176: paddb %xmm0, %xmm0 180: pblendvb %xmm0, %xmm1, %xmm3 185: movdqa %xmm3, %xmm1 189: paddb %xmm1, %xmm1 193: paddb %xmm0, %xmm0 197: pblendvb %xmm0, %xmm1, %xmm3 202: psubb %xmm2, %xmm3 206: movd %xmm3, %eax 210: movzwl %ax, %eax 213: orl (%r8), %eax 216: movl %eax, (%rdi) 218: popq %rbp 219: ret define void @fetch_r16g16b16_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = extractelement <3 x i16> %5, i32 0 %7 = sext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i16> %5, i32 1 %10 = sext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i16> %5, i32 2 %13 = sext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = shufflevector <4 x float> %16, <4 x float> , <4 x i32> store <4 x float> %17, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16_SSCALED (unorm8) ... FAILED Packed: ff 7f 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 ff 7f Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movabsq $139845047042064, %rcx 16: movaps (%rcx), %xmm0 19: movswl 4(%rsi), %ecx 23: movswl %ax, %edx 26: movd %edx, %xmm1 30: movd %eax, %xmm2 34: pextrw $1, %xmm2, %eax 39: movswl %ax, %eax 42: movd %eax, %xmm2 46: punpckldq %xmm2, %xmm1 50: movd %ecx, %xmm2 54: movlhps %xmm2, %xmm1 57: cvtdq2ps %xmm1, %xmm1 60: shufps $33, %xmm1, %xmm0 64: shufps $36, %xmm0, %xmm1 68: movaps %xmm1, (%rdi) 71: popq %rbp 72: ret define void @fetch_r16g16b16_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = shufflevector <3 x i16> %5, <3 x i16> undef, <4 x i32> %7 = shufflevector <4 x i16> %6, <4 x i16> %6, <8 x i32> %8 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %7, <8 x i16> ) %9 = shufflevector <8 x i16> %8, <8 x i16> %8, <4 x i32> %10 = shufflevector <4 x i16> %9, <4 x i16> %9, <8 x i32> %11 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %10, <8 x i16> ) %12 = shufflevector <8 x i16> %11, <8 x i16> %11, <4 x i32> %13 = ashr <4 x i16> %12, %14 = sub <4 x i16> %12, %13 %15 = shufflevector <4 x i16> %14, <4 x i16> %14, <2 x i32> %16 = shufflevector <4 x i16> %14, <4 x i16> %14, <2 x i32> %17 = bitcast <2 x i16> %15 to <4 x i8> %18 = bitcast <2 x i16> %16 to <4 x i8> %19 = shufflevector <4 x i8> %17, <4 x i8> %18, <4 x i32> %20 = shl <4 x i8> %19, %21 = sub <4 x i8> %20, %19 %22 = bitcast <4 x i8> %21 to i32 %23 = and i32 %22, 16777215 %24 = or i32 %23, bitcast (<4 x i8> to i32) %25 = bitcast i32 %24 to <4 x i8> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: pinsrw $1, %ecx, %xmm0 20: movzwl 4(%rsi), %edx 24: pinsrw $2, %edx, %xmm0 29: movabsq $139845047042064, %rsi 39: movdqa (%rsi), %xmm1 43: pinsrw $4, %eax, %xmm0 48: movabsq $139845047042080, %rax 58: pxor %xmm2, %xmm2 62: psllw $5, %xmm1 67: movabsq $139845047042096, %rsi 77: movabsq $139845047042112, %r8 87: movabsq $139845047042128, %r9 97: movabsq $139845047042144, %r10 107: movabsq $139845047042160, %r11 117: pinsrw $5, %ecx, %xmm0 122: pinsrw $6, %edx, %xmm0 127: pmaxsw %xmm2, %xmm0 131: pshufd $68, %xmm0, %xmm0 136: pminsw (%rax), %xmm0 140: movdqa %xmm0, %xmm2 144: psraw $255, %xmm2 149: psubw %xmm2, %xmm0 153: pshufd $1, %xmm0, %xmm2 158: pshufb (%r10), %xmm0 164: pshufb (%r9), %xmm2 170: por %xmm0, %xmm2 174: movdqa (%r8), %xmm3 179: pand %xmm2, %xmm3 183: psllw $4, %xmm3 188: movdqa %xmm1, %xmm0 192: movdqa %xmm2, %xmm4 196: pblendvb %xmm0, %xmm3, %xmm4 201: movdqa (%rsi), %xmm3 205: pand %xmm4, %xmm3 209: psllw $2, %xmm3 214: movdqa %xmm1, %xmm0 218: paddb %xmm0, %xmm0 222: pblendvb %xmm0, %xmm3, %xmm4 227: movdqa %xmm4, %xmm1 231: paddb %xmm1, %xmm1 235: paddb %xmm0, %xmm0 239: pblendvb %xmm0, %xmm1, %xmm4 244: psubb %xmm2, %xmm4 248: movd %xmm4, %eax 252: andl $16777215, %eax 258: orl (%r11), %eax 261: movl %eax, (%rdi) 263: popq %rbp 264: ret define void @fetch_r16g16b16a16_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = extractelement <4 x i16> %5, i32 0 %7 = sext i16 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <4 x i16> %5, i32 1 %10 = sext i16 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <4 x i16> %5, i32 2 %13 = sext i16 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = extractelement <4 x i16> %5, i32 3 %16 = sext i16 %15 to i32 %17 = insertelement <4 x i32> %14, i32 %16, i32 3 %18 = sitofp <4 x i32> %17 to <4 x float> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16_SSCALED (unorm8) ... FAILED Packed: ff 7f 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 00 ff 7f Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 00 ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrw $1, %xmm0, %eax 13: movswl %ax, %eax 16: movd %xmm0, %ecx 20: movswl %cx, %ecx 23: movd %ecx, %xmm1 27: pinsrd $1, %eax, %xmm1 33: pextrw $2, %xmm0, %eax 38: movswl %ax, %eax 41: pinsrd $2, %eax, %xmm1 47: pextrw $3, %xmm0, %eax 52: movswl %ax, %eax 55: pinsrd $3, %eax, %xmm1 61: cvtdq2ps %xmm1, %xmm0 64: movaps %xmm0, (%rdi) 67: popq %rbp 68: ret define void @fetch_r16g16b16a16_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = shufflevector <4 x i16> %5, <4 x i16> %5, <8 x i32> %7 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %6, <8 x i16> ) %8 = shufflevector <8 x i16> %7, <8 x i16> %7, <4 x i32> %9 = shufflevector <4 x i16> %8, <4 x i16> %8, <8 x i32> %10 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %9, <8 x i16> ) %11 = shufflevector <8 x i16> %10, <8 x i16> %10, <4 x i32> %12 = ashr <4 x i16> %11, %13 = sub <4 x i16> %11, %12 %14 = shufflevector <4 x i16> %13, <4 x i16> %13, <2 x i32> %15 = shufflevector <4 x i16> %13, <4 x i16> %13, <2 x i32> %16 = bitcast <2 x i16> %14 to <4 x i8> %17 = bitcast <2 x i16> %15 to <4 x i8> %18 = shufflevector <4 x i8> %16, <4 x i8> %17, <4 x i32> %19 = shl <4 x i8> %18, %20 = sub <4 x i8> %19, %18 store <4 x i8> %20, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16A16_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pshufd $68, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsw %xmm0, %xmm1 21: pshufd $68, %xmm1, %xmm1 26: movabsq $139845047042064, %rax 36: pminsw (%rax), %xmm1 40: movabsq $139845047042080, %rax 50: movdqa %xmm1, %xmm2 54: psraw $255, %xmm2 59: movdqa (%rax), %xmm0 63: psllw $5, %xmm0 68: movabsq $139845047042096, %rax 78: movabsq $139845047042112, %rcx 88: movabsq $139845047042128, %rdx 98: movabsq $139845047042144, %rsi 108: psubw %xmm2, %xmm1 112: pshufd $1, %xmm1, %xmm2 117: pshufb (%rsi), %xmm1 122: pshufb (%rdx), %xmm2 127: por %xmm1, %xmm2 131: movdqa (%rcx), %xmm1 135: pand %xmm2, %xmm1 139: psllw $4, %xmm1 144: movdqa %xmm2, %xmm3 148: pblendvb %xmm0, %xmm1, %xmm3 153: movdqa (%rax), %xmm1 157: pand %xmm3, %xmm1 161: psllw $2, %xmm1 166: paddb %xmm0, %xmm0 170: pblendvb %xmm0, %xmm1, %xmm3 175: movdqa %xmm3, %xmm1 179: paddb %xmm1, %xmm1 183: paddb %xmm0, %xmm0 187: pblendvb %xmm0, %xmm1, %xmm3 192: psubb %xmm2, %xmm3 196: movd %xmm3, (%rdi) 200: popq %rbp 201: ret define void @fetch_r8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16A16_SSCALED (unorm8) ... FAILED Packed: ff 7f 00 00 Unpacked (0,0): 00 00 00 00 obtained ff 00 00 00 expected FAILED Packed: 00 00 ff 7f Unpacked (0,0): 00 00 00 00 obtained 00 ff 00 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 ff 00 expected FAILED Packed: 00 00 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movss (%rax), %xmm1 51: movabsq $139845047042080, %rax 61: mulps %xmm0, %xmm1 64: movaps (%rax), %xmm0 67: movlhps %xmm0, %xmm1 70: shufps $72, %xmm0, %xmm1 74: movaps %xmm1, (%rdi) 77: popq %rbp 78: ret define void @fetch_r8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> , i8 %4, i32 0 %6 = bitcast <4 x i8> %5 to i32 %7 = and i32 %6, 255 %8 = or i32 %7, bitcast (<4 x i8> to i32) %9 = bitcast i32 %8 to <4 x i8> store <4 x i8> %9, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: movd %xmm0, %eax 15: movzbl %al, %eax 18: movabsq $139845047042064, %rcx 28: orl (%rcx), %eax 30: movl %eax, (%rdi) 32: popq %rbp 33: ret define void @fetch_r8g8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $8, %edx 39: pinsrd $1, %edx, %xmm0 45: movabsq $139845047042064, %rdx 55: movaps (%rdx), %xmm1 58: movabsq $139845047042080, %rdx 68: movabsq $139845047042096, %rsi 78: pinsrd $2, %eax, %xmm0 84: shrl %cl, %ecx 86: pinsrd $3, %ecx, %xmm0 92: pand (%rsi), %xmm0 96: cvtdq2ps %xmm0, %xmm0 99: mulps (%rdx), %xmm0 102: movlhps %xmm1, %xmm0 105: movaps %xmm0, (%rdi) 108: popq %rbp 109: ret define void @fetch_r8g8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = bitcast i32 %6 to <4 x i8> %8 = bitcast <4 x i8> %7 to i32 %9 = and i32 %8, 65535 %10 = or i32 %9, bitcast (<4 x i8> to i32) %11 = bitcast i32 %10 to <4 x i8> store <4 x i8> %11, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movabsq $139845047042064, %rcx 17: orl (%rcx), %eax 19: movl %eax, (%rdi) 21: popq %rbp 22: ret define void @fetch_r8g8b8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = extractelement <3 x i8> %5, i32 0 %7 = zext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i8> %5, i32 1 %10 = zext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i8> %5, i32 2 %13 = zext i8 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = fmul <4 x float> %16, %18 = shufflevector <4 x float> %17, <4 x float> , <4 x i32> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $1, %xmm0, %eax 17: movd %eax, %xmm1 21: pextrb $0, %xmm0, %eax 27: movd %eax, %xmm0 31: punpckldq %xmm1, %xmm0 35: movzbl 2(%rsi), %eax 39: movd %eax, %xmm1 43: movlhps %xmm1, %xmm0 46: movabsq $139845047042064, %rax 56: movaps (%rax), %xmm1 59: movabsq $139845047042080, %rax 69: cvtdq2ps %xmm0, %xmm0 72: mulps (%rax), %xmm0 75: shufps $33, %xmm0, %xmm1 79: shufps $36, %xmm1, %xmm0 83: movaps %xmm0, (%rdi) 86: popq %rbp 87: ret define void @fetch_r8g8b8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = shufflevector <3 x i8> %5, <3 x i8> undef, <4 x i32> %7 = bitcast <4 x i8> %6 to i32 %8 = and i32 %7, 16777215 %9 = or i32 %8, bitcast (<4 x i8> to i32) %10 = bitcast i32 %9 to <4 x i8> store <4 x i8> %10, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $0, %xmm0, %eax 17: pextrb $1, %xmm0, %ecx 23: shll $8, %ecx 26: orl %eax, %ecx 28: pinsrw $0, %ecx, %xmm0 33: movzbl 2(%rsi), %eax 37: movabsq $139845047042064, %rcx 47: pinsrw $1, %eax, %xmm0 52: movd %xmm0, %eax 56: andl $16777215, %eax 62: orl (%rcx), %eax 64: movl %eax, (%rdi) 66: popq %rbp 67: ret define void @fetch_r8g8b8a8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, store <4 x float> %11, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $8, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $16, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $24, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: pinsrd $3, %eax, %xmm0 78: pand (%rdx), %xmm0 82: cvtdq2ps %xmm0, %xmm0 85: mulps (%rcx), %xmm0 88: movaps %xmm0, (%rdi) 91: popq %rbp 92: ret define void @fetch_r8g8b8a8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> store <4 x i8> %6, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8A8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movl %eax, (%rdi) 8: popq %rbp 9: ret define void @fetch_x8b8g8r8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8A8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: movd %xmm0, %eax 17: shrl %cl, %eax 19: movd %eax, %xmm1 23: pextrd $1, %xmm0, %eax 29: shrl $8, %eax 32: pinsrd $1, %eax, %xmm1 38: pextrd $2, %xmm0, %eax 44: shrl $16, %eax 47: movabsq $139845047042064, %rcx 57: movaps (%rcx), %xmm2 60: movabsq $139845047042080, %rcx 70: movabsq $139845047042096, %rdx 80: pinsrd $2, %eax, %xmm1 86: pextrd $3, %xmm0, %eax 92: shrl $24, %eax 95: pinsrd $3, %eax, %xmm1 101: pand (%rdx), %xmm1 105: cvtdq2ps %xmm1, %xmm0 108: mulps (%rcx), %xmm0 111: shufps $17, %xmm0, %xmm2 115: shufps $43, %xmm2, %xmm0 119: movaps %xmm0, (%rdi) 122: popq %rbp 123: ret define void @fetch_x8b8g8r8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = lshr i32 %7, 24 %9 = or i32 %8, bitcast (<4 x i8> to i32) %10 = lshr i32 %7, 8 %11 = and i32 %10, 65280 %12 = or i32 %9, %11 %13 = shl i32 %7, 8 %14 = and i32 %13, 16711680 %15 = or i32 %12, %14 %16 = bitcast i32 %15 to <4 x i8> store <4 x i8> %16, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_X8B8G8R8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movl %eax, %ecx 8: shrl $24, %ecx 11: movabsq $139845047042064, %rdx 21: orl (%rdx), %ecx 23: movl %eax, %edx 25: shrl $8, %edx 28: andl $65280, %edx 34: orl %ecx, %edx 36: shll $8, %eax 39: andl $16711680, %eax 45: orl %edx, %eax 47: movl %eax, (%rdi) 49: popq %rbp 50: ret define void @fetch_r8_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = shufflevector <4 x float> %10, <4 x float> , <4 x i32> store <4 x float> %11, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_X8B8G8R8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movaps (%rax), %xmm1 50: movlhps %xmm1, %xmm0 53: shufps $72, %xmm1, %xmm0 57: movaps %xmm0, (%rdi) 60: popq %rbp 61: ret define void @fetch_r8_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> %30 = bitcast <4 x i8> %29 to i32 %31 = and i32 %30, 255 %32 = or i32 %31, bitcast (<4 x i8> to i32) %33 = bitcast i32 %32 to <4 x i8> store <4 x i8> %33, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $255, %eax 9: movd %eax, %xmm0 13: movzbl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movabsq $139845047042080, %rcx 57: movabsq $139845047042096, %rdx 67: movabsq $139845047042112, %rsi 77: movabsq $139845047042128, %r8 87: movabsq $139845047042144, %r9 97: movabsq $139845047042160, %r10 107: xorps %xmm1, %xmm1 110: maxps %xmm1, %xmm0 113: minps (%r10), %xmm0 117: mulps (%r9), %xmm0 121: addps (%r8), %xmm0 125: andps (%rsi), %xmm0 128: pshufd $1, %xmm0, %xmm1 133: pshufd $3, %xmm0, %xmm2 138: movaps %xmm0, %xmm3 141: movhlps %xmm3, %xmm3 144: punpcklwd %xmm2, %xmm3 148: pshufb (%rdx), %xmm3 153: punpcklwd %xmm1, %xmm0 157: pshufb (%rcx), %xmm0 162: por %xmm3, %xmm0 166: movd %xmm0, %ecx 170: movzbl %cl, %ecx 173: orl (%rax), %ecx 175: movl %ecx, (%rdi) 177: popq %rbp 178: ret define void @fetch_r8g8_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $8, %edx 39: pinsrd $1, %edx, %xmm0 45: movabsq $139845047042064, %rdx 55: movaps (%rdx), %xmm1 58: movabsq $139845047042080, %rdx 68: pinsrd $2, %eax, %xmm0 74: shrl %cl, %ecx 76: pinsrd $3, %ecx, %xmm0 82: pand (%rdx), %xmm0 86: cvtdq2ps %xmm0, %xmm0 89: movlhps %xmm1, %xmm0 92: movaps %xmm0, (%rdi) 95: popq %rbp 96: ret define void @fetch_r8g8_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 65535 %33 = or i32 %32, bitcast (<4 x i8> to i32) %34 = bitcast i32 %33 to <4 x i8> store <4 x i8> %34, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movzwl (%rsi), %eax 8: movd %eax, %xmm0 12: pshufd $0, %xmm0, %xmm0 17: pextrd $3, %xmm0, %eax 23: pextrd $1, %xmm0, %ecx 29: shrl $8, %ecx 32: pextrd $2, %xmm0, %edx 38: shrl %cl, %edx 40: pinsrd $1, %ecx, %xmm0 46: movabsq $139845047042064, %rcx 56: movabsq $139845047042080, %rsi 66: movabsq $139845047042096, %r8 76: movabsq $139845047042112, %r9 86: movabsq $139845047042128, %r10 96: movabsq $139845047042144, %r11 106: xorps %xmm1, %xmm1 109: movabsq $139845047042160, %rbx 119: pinsrd $2, %edx, %xmm0 125: movabsq $139845047042176, %rdx 135: shrl %cl, %eax 137: pinsrd $3, %eax, %xmm0 143: pand (%rdx), %xmm0 147: cvtdq2ps %xmm0, %xmm0 150: maxps %xmm1, %xmm0 153: minps (%rbx), %xmm0 156: mulps (%r11), %xmm0 160: addps (%r10), %xmm0 164: andps (%r9), %xmm0 168: pshufd $1, %xmm0, %xmm1 173: pshufd $3, %xmm0, %xmm2 178: movaps %xmm0, %xmm3 181: movhlps %xmm3, %xmm3 184: punpcklwd %xmm2, %xmm3 188: pshufb (%r8), %xmm3 194: punpcklwd %xmm1, %xmm0 198: pshufb (%rsi), %xmm0 203: por %xmm3, %xmm0 207: movd %xmm0, %eax 211: movzwl %ax, %eax 214: orl (%rcx), %eax 216: movl %eax, (%rdi) 218: popq %rbx 219: popq %rbp 220: ret define void @fetch_r8g8b8_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = extractelement <3 x i8> %5, i32 0 %7 = zext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i8> %5, i32 1 %10 = zext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i8> %5, i32 2 %13 = zext i8 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = shufflevector <4 x float> %16, <4 x float> , <4 x i32> store <4 x float> %17, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $1, %xmm0, %eax 17: movd %eax, %xmm1 21: pextrb $0, %xmm0, %eax 27: movd %eax, %xmm0 31: punpckldq %xmm1, %xmm0 35: movzbl 2(%rsi), %eax 39: movd %eax, %xmm1 43: movlhps %xmm1, %xmm0 46: movabsq $139845047042064, %rax 56: movaps (%rax), %xmm1 59: cvtdq2ps %xmm0, %xmm0 62: shufps $33, %xmm0, %xmm1 66: shufps $36, %xmm1, %xmm0 70: movaps %xmm0, (%rdi) 73: popq %rbp 74: ret define void @fetch_r8g8b8_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = shufflevector <3 x i8> %5, <3 x i8> undef, <4 x i32> %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shl <4 x i8> %9, %11 = sub <4 x i8> %10, %9 %12 = bitcast <4 x i8> %11 to i32 %13 = and i32 %12, 16777215 %14 = or i32 %13, bitcast (<4 x i8> to i32) %15 = bitcast i32 %14 to <4 x i8> store <4 x i8> %15, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $0, %xmm0, %eax 17: pextrb $1, %xmm0, %ecx 23: shll $8, %ecx 26: orl %eax, %ecx 28: pinsrw $0, %ecx, %xmm1 33: movzbl 2(%rsi), %eax 37: movabsq $139845047042064, %rcx 47: movdqa (%rcx), %xmm0 51: psllw $5, %xmm0 56: movabsq $139845047042080, %rcx 66: movabsq $139845047042096, %rdx 76: movabsq $139845047042112, %rsi 86: movabsq $139845047042128, %r8 96: pinsrw $1, %eax, %xmm1 101: pminub (%rsi), %xmm1 105: movdqa (%rdx), %xmm2 109: pand %xmm1, %xmm2 113: psllw $4, %xmm2 118: movdqa %xmm1, %xmm3 122: pblendvb %xmm0, %xmm2, %xmm3 127: movdqa (%rcx), %xmm2 131: pand %xmm3, %xmm2 135: psllw $2, %xmm2 140: paddb %xmm0, %xmm0 144: pblendvb %xmm0, %xmm2, %xmm3 149: movdqa %xmm3, %xmm2 153: paddb %xmm2, %xmm2 157: paddb %xmm0, %xmm0 161: pblendvb %xmm0, %xmm2, %xmm3 166: psubb %xmm1, %xmm3 170: movd %xmm3, %eax 174: andl $16777215, %eax 180: orl (%r8), %eax 183: movl %eax, (%rdi) 185: popq %rbp 186: ret define void @fetch_r8g8b8a8_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> store <4 x float> %10, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8_USCALED (unorm8) ... FAILED Packed: ff 00 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 ff 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 ff 00 Unpacked (0,0): 00 00 00 ff obtained 00 00 ff ff expected FAILED Packed: ff ff ff 00 Unpacked (0,0): 00 00 00 ff obtained ff ff ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $8, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $16, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $24, %eax 52: movabsq $139845047042064, %rcx 62: pinsrd $3, %eax, %xmm0 68: pand (%rcx), %xmm0 72: cvtdq2ps %xmm0, %xmm0 75: movaps %xmm0, (%rdi) 78: popq %rbp 79: ret define void @fetch_r8g8b8a8_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8A8_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $8, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $16, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $24, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: movabsq $139845047042096, %rsi 82: movabsq $139845047042112, %r8 92: movabsq $139845047042128, %r9 102: xorps %xmm1, %xmm1 105: movabsq $139845047042144, %r10 115: pinsrd $3, %eax, %xmm0 121: movdqa (%r10), %xmm2 126: pand %xmm2, %xmm0 130: cvtdq2ps %xmm0, %xmm0 133: maxps %xmm1, %xmm0 136: minps (%r9), %xmm0 140: mulps (%r8), %xmm0 144: addps (%rsi), %xmm0 147: andps %xmm2, %xmm0 150: pshufd $1, %xmm0, %xmm1 155: pshufd $3, %xmm0, %xmm2 160: movaps %xmm0, %xmm3 163: movhlps %xmm3, %xmm3 166: punpcklwd %xmm2, %xmm3 170: pshufb (%rdx), %xmm3 175: punpcklwd %xmm1, %xmm0 179: pshufb (%rcx), %xmm0 184: por %xmm3, %xmm0 188: movd %xmm0, (%rdi) 192: popq %rbp 193: ret define void @fetch_r8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = sext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = insertelement <4 x i32> %6, i32 %5, i32 1 %8 = insertelement <4 x i32> %7, i32 %5, i32 2 %9 = insertelement <4 x i32> %8, i32 %5, i32 3 %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8A8_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movsbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: cvtdq2ps %xmm0, %xmm0 19: movabsq $139845047042064, %rax 29: mulps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: movaps (%rax), %xmm1 45: movlhps %xmm1, %xmm0 48: shufps $72, %xmm1, %xmm0 52: movaps %xmm0, (%rdi) 55: popq %rbp 56: ret define void @fetch_r8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> undef, i8 %4, i32 0 %6 = shufflevector <4 x i8> %5, <4 x i8> undef, <4 x i32> zeroinitializer %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shl <4 x i8> %9, %11 = bitcast <4 x i8> %10 to i32 %12 = and i32 %11, 255 %13 = or i32 %12, bitcast (<4 x i8> to i32) %14 = bitcast i32 %13 to <4 x i8> store <4 x i8> %14, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklbw %xmm0, %xmm0 15: punpcklbw %xmm0, %xmm0 19: pshufd $0, %xmm0, %xmm0 24: pxor %xmm1, %xmm1 28: pmaxsb %xmm0, %xmm1 33: movabsq $139845047042064, %rax 43: movdqa (%rax), %xmm0 47: movabsq $139845047042080, %rax 57: movabsq $139845047042096, %rcx 67: movabsq $139845047042112, %rdx 77: movdqa (%rcx), %xmm2 81: pand %xmm1, %xmm2 85: psllw $4, %xmm2 90: psllw $5, %xmm0 95: pblendvb %xmm0, %xmm2, %xmm1 100: movdqa (%rax), %xmm2 104: pand %xmm1, %xmm2 108: psllw $2, %xmm2 113: paddb %xmm0, %xmm0 117: pblendvb %xmm0, %xmm2, %xmm1 122: movdqa %xmm1, %xmm2 126: paddb %xmm2, %xmm2 130: paddb %xmm0, %xmm0 134: pblendvb %xmm0, %xmm2, %xmm1 139: movd %xmm1, %eax 143: movzbl %al, %eax 146: orl (%rdx), %eax 148: movl %eax, (%rdi) 150: popq %rbp 151: ret define void @fetch_r8g8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i8>* %5 = load <2 x i8>* %4, align 1 %6 = extractelement <2 x i8> %5, i32 0 %7 = sext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <2 x i8> %5, i32 1 %10 = sext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = insertelement <4 x i32> %11, i32 0, i32 2 %13 = insertelement <4 x i32> %12, i32 0, i32 3 %14 = sitofp <4 x i32> %13 to <4 x float> %15 = fmul <4 x float> %14, %16 = shufflevector <4 x float> %15, <4 x float> , <4 x i32> store <4 x float> %16, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $1, %xmm0, %eax 17: movsbl %al, %eax 20: movabsq $139845047042064, %rcx 30: movaps (%rcx), %xmm1 33: movd %eax, %xmm2 37: pextrb $0, %xmm0, %eax 43: movsbl %al, %eax 46: movd %eax, %xmm0 50: movabsq $139845047042080, %rax 60: punpckldq %xmm2, %xmm0 64: movq %xmm0, %xmm0 68: cvtdq2ps %xmm0, %xmm0 71: mulps (%rax), %xmm0 74: movlhps %xmm1, %xmm0 77: movaps %xmm0, (%rdi) 80: popq %rbp 81: ret define void @fetch_r8g8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i8>* %5 = load <2 x i8>* %4, align 1 %6 = shufflevector <2 x i8> %5, <2 x i8> undef, <4 x i32> %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shl <4 x i8> %9, %11 = bitcast <4 x i8> %10 to i32 %12 = and i32 %11, 65535 %13 = or i32 %12, bitcast (<4 x i8> to i32) %14 = bitcast i32 %13 to <4 x i8> store <4 x i8> %14, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pxor %xmm1, %xmm1 15: pmaxsb %xmm0, %xmm1 20: movabsq $139845047042064, %rax 30: movdqa (%rax), %xmm2 34: pand %xmm1, %xmm2 38: psllw $4, %xmm2 43: movabsq $139845047042080, %rax 53: movdqa (%rax), %xmm0 57: psllw $5, %xmm0 62: pblendvb %xmm0, %xmm2, %xmm1 67: movabsq $139845047042096, %rax 77: movabsq $139845047042112, %rcx 87: movdqa (%rax), %xmm2 91: pand %xmm1, %xmm2 95: psllw $2, %xmm2 100: paddb %xmm0, %xmm0 104: pblendvb %xmm0, %xmm2, %xmm1 109: movdqa %xmm1, %xmm2 113: paddb %xmm2, %xmm2 117: paddb %xmm0, %xmm0 121: pblendvb %xmm0, %xmm2, %xmm1 126: movd %xmm1, %eax 130: movzwl %ax, %eax 133: orl (%rcx), %eax 135: movl %eax, (%rdi) 137: popq %rbp 138: ret define void @fetch_r8g8b8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = extractelement <3 x i8> %5, i32 0 %7 = sext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i8> %5, i32 1 %10 = sext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i8> %5, i32 2 %13 = sext i8 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = fmul <4 x float> %16, %18 = shufflevector <4 x float> %17, <4 x float> , <4 x i32> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $1, %xmm0, %eax 17: movsbl 2(%rsi), %ecx 21: movsbl %al, %eax 24: movabsq $139845047042064, %rdx 34: movaps (%rdx), %xmm1 37: movd %eax, %xmm2 41: pextrb $0, %xmm0, %eax 47: movsbl %al, %eax 50: movd %eax, %xmm0 54: movabsq $139845047042080, %rax 64: punpckldq %xmm2, %xmm0 68: movd %ecx, %xmm2 72: movlhps %xmm2, %xmm0 75: cvtdq2ps %xmm0, %xmm0 78: mulps (%rax), %xmm0 81: shufps $33, %xmm0, %xmm1 85: shufps $36, %xmm1, %xmm0 89: movaps %xmm0, (%rdi) 92: popq %rbp 93: ret define void @fetch_r8g8b8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = shufflevector <3 x i8> %5, <3 x i8> undef, <4 x i32> %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shl <4 x i8> %9, %11 = bitcast <4 x i8> %10 to i32 %12 = and i32 %11, 16777215 %13 = or i32 %12, bitcast (<4 x i8> to i32) %14 = bitcast i32 %13 to <4 x i8> store <4 x i8> %14, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $0, %xmm0, %eax 17: pextrb $1, %xmm0, %ecx 23: shll $8, %ecx 26: orl %eax, %ecx 28: pinsrw $0, %ecx, %xmm1 33: movzbl 2(%rsi), %eax 37: movabsq $139845047042064, %rcx 47: movdqa (%rcx), %xmm0 51: psllw $5, %xmm0 56: movabsq $139845047042080, %rcx 66: movabsq $139845047042096, %rdx 76: pxor %xmm2, %xmm2 80: movabsq $139845047042112, %rsi 90: pinsrw $1, %eax, %xmm1 95: pmaxsb %xmm2, %xmm1 100: movdqa (%rdx), %xmm2 104: pand %xmm1, %xmm2 108: psllw $4, %xmm2 113: pblendvb %xmm0, %xmm2, %xmm1 118: movdqa (%rcx), %xmm2 122: pand %xmm1, %xmm2 126: psllw $2, %xmm2 131: paddb %xmm0, %xmm0 135: pblendvb %xmm0, %xmm2, %xmm1 140: movdqa %xmm1, %xmm2 144: paddb %xmm2, %xmm2 148: paddb %xmm0, %xmm0 152: pblendvb %xmm0, %xmm2, %xmm1 157: movd %xmm1, %eax 161: andl $16777215, %eax 167: orl (%rsi), %eax 169: movl %eax, (%rdi) 171: popq %rbp 172: ret define void @fetch_r8g8b8a8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i8>* %5 = load <4 x i8>* %4, align 1 %6 = extractelement <4 x i8> %5, i32 0 %7 = sext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <4 x i8> %5, i32 1 %10 = sext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <4 x i8> %5, i32 2 %13 = sext i8 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = extractelement <4 x i8> %5, i32 3 %16 = sext i8 %15 to i32 %17 = insertelement <4 x i32> %14, i32 %16, i32 3 %18 = sitofp <4 x i32> %17 to <4 x float> %19 = fmul <4 x float> %18, store <4 x float> %19, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pextrb $1, %xmm0, %eax 14: movsbl %al, %eax 17: pextrb $0, %xmm0, %ecx 23: movsbl %cl, %ecx 26: movd %ecx, %xmm1 30: pinsrd $1, %eax, %xmm1 36: pextrb $2, %xmm0, %eax 42: movsbl %al, %eax 45: movabsq $139845047042064, %rcx 55: pinsrd $2, %eax, %xmm1 61: pextrb $3, %xmm0, %eax 67: movsbl %al, %eax 70: pinsrd $3, %eax, %xmm1 76: cvtdq2ps %xmm1, %xmm0 79: mulps (%rcx), %xmm0 82: movaps %xmm0, (%rdi) 85: popq %rbp 86: ret define void @fetch_r8g8b8a8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i8>* %5 = load <4 x i8>* %4, align 1 %6 = shufflevector <4 x i8> %5, <4 x i8> %5, <16 x i32> %7 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %6, <16 x i8> ) %8 = shufflevector <16 x i8> %7, <16 x i8> %7, <4 x i32> %9 = shl <4 x i8> %8, store <4 x i8> %9, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8A8_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsb %xmm0, %xmm1 17: movabsq $139845047042064, %rax 27: movdqa (%rax), %xmm2 31: pand %xmm1, %xmm2 35: psllw $4, %xmm2 40: movabsq $139845047042080, %rax 50: movdqa (%rax), %xmm0 54: psllw $5, %xmm0 59: pblendvb %xmm0, %xmm2, %xmm1 64: movabsq $139845047042096, %rax 74: movdqa (%rax), %xmm2 78: pand %xmm1, %xmm2 82: psllw $2, %xmm2 87: paddb %xmm0, %xmm0 91: pblendvb %xmm0, %xmm2, %xmm1 96: movdqa %xmm1, %xmm2 100: paddb %xmm2, %xmm2 104: paddb %xmm0, %xmm0 108: pblendvb %xmm0, %xmm2, %xmm1 113: movd %xmm1, (%rdi) 117: popq %rbp 118: ret define void @fetch_r8_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = sext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = insertelement <4 x i32> %6, i32 %5, i32 1 %8 = insertelement <4 x i32> %7, i32 %5, i32 2 %9 = insertelement <4 x i32> %8, i32 %5, i32 3 %10 = sitofp <4 x i32> %9 to <4 x float> %11 = shufflevector <4 x float> %10, <4 x float> , <4 x i32> store <4 x float> %11, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8A8_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movsbl (%rsi), %eax 20: movd %eax, %xmm1 24: pshufd $0, %xmm1, %xmm1 29: cvtdq2ps %xmm1, %xmm1 32: movlhps %xmm0, %xmm1 35: shufps $72, %xmm0, %xmm1 39: movaps %xmm1, (%rdi) 42: popq %rbp 43: ret define void @fetch_r8_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> undef, i8 %4, i32 0 %6 = shufflevector <4 x i8> %5, <4 x i8> undef, <4 x i32> zeroinitializer %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shufflevector <4 x i8> %9, <4 x i8> %9, <16 x i32> %11 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %10, <16 x i8> ) %12 = shufflevector <16 x i8> %11, <16 x i8> %11, <4 x i32> %13 = shl <4 x i8> %12, %14 = sub <4 x i8> %13, %12 %15 = bitcast <4 x i8> %14 to i32 %16 = and i32 %15, 255 %17 = or i32 %16, bitcast (<4 x i8> to i32) %18 = bitcast i32 %17 to <4 x i8> store <4 x i8> %18, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklbw %xmm0, %xmm0 15: punpcklbw %xmm0, %xmm0 19: pshufd $0, %xmm0, %xmm0 24: movabsq $139845047042064, %rax 34: pxor %xmm1, %xmm1 38: pmaxsb %xmm0, %xmm1 43: movdqa (%rax), %xmm0 47: movabsq $139845047042080, %rax 57: psllw $5, %xmm0 62: movdqa (%rax), %xmm2 66: movabsq $139845047042096, %rax 76: movabsq $139845047042112, %rcx 86: movabsq $139845047042128, %rdx 96: pminsb %xmm1, %xmm2 101: movdqa (%rcx), %xmm1 105: pand %xmm2, %xmm1 109: psllw $4, %xmm1 114: movdqa %xmm2, %xmm3 118: pblendvb %xmm0, %xmm1, %xmm3 123: movdqa (%rax), %xmm1 127: pand %xmm3, %xmm1 131: psllw $2, %xmm1 136: paddb %xmm0, %xmm0 140: pblendvb %xmm0, %xmm1, %xmm3 145: movdqa %xmm3, %xmm1 149: paddb %xmm1, %xmm1 153: paddb %xmm0, %xmm0 157: pblendvb %xmm0, %xmm1, %xmm3 162: psubb %xmm2, %xmm3 166: movd %xmm3, %eax 170: movzbl %al, %eax 173: orl (%rdx), %eax 175: movl %eax, (%rdi) 177: popq %rbp 178: ret define void @fetch_r8g8_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i8>* %5 = load <2 x i8>* %4, align 1 %6 = extractelement <2 x i8> %5, i32 0 %7 = sext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <2 x i8> %5, i32 1 %10 = sext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = insertelement <4 x i32> %11, i32 0, i32 2 %13 = insertelement <4 x i32> %12, i32 0, i32 3 %14 = sitofp <4 x i32> %13 to <4 x float> %15 = shufflevector <4 x float> %14, <4 x float> , <4 x i32> store <4 x float> %15, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8_SSCALED (unorm8) ... FAILED Packed: 7f 00 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $1, %xmm0, %eax 17: movsbl %al, %eax 20: movabsq $139845047042064, %rcx 30: movaps (%rcx), %xmm1 33: movd %eax, %xmm2 37: pextrb $0, %xmm0, %eax 43: movsbl %al, %eax 46: movd %eax, %xmm0 50: punpckldq %xmm2, %xmm0 54: movq %xmm0, %xmm0 58: cvtdq2ps %xmm0, %xmm0 61: movlhps %xmm1, %xmm0 64: movaps %xmm0, (%rdi) 67: popq %rbp 68: ret define void @fetch_r8g8_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i8>* %5 = load <2 x i8>* %4, align 1 %6 = shufflevector <2 x i8> %5, <2 x i8> undef, <4 x i32> %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shufflevector <4 x i8> %9, <4 x i8> %9, <16 x i32> %11 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %10, <16 x i8> ) %12 = shufflevector <16 x i8> %11, <16 x i8> %11, <4 x i32> %13 = shl <4 x i8> %12, %14 = sub <4 x i8> %13, %12 %15 = bitcast <4 x i8> %14 to i32 %16 = and i32 %15, 65535 %17 = or i32 %16, bitcast (<4 x i8> to i32) %18 = bitcast i32 %17 to <4 x i8> store <4 x i8> %18, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pxor %xmm1, %xmm1 15: pmaxsb %xmm0, %xmm1 20: movabsq $139845047042064, %rax 30: movdqa (%rax), %xmm2 34: pminsb %xmm1, %xmm2 39: movabsq $139845047042080, %rax 49: movdqa (%rax), %xmm1 53: pand %xmm2, %xmm1 57: psllw $4, %xmm1 62: movabsq $139845047042096, %rax 72: movdqa (%rax), %xmm0 76: psllw $5, %xmm0 81: movabsq $139845047042112, %rax 91: movabsq $139845047042128, %rcx 101: movdqa %xmm2, %xmm3 105: pblendvb %xmm0, %xmm1, %xmm3 110: movdqa (%rax), %xmm1 114: pand %xmm3, %xmm1 118: psllw $2, %xmm1 123: paddb %xmm0, %xmm0 127: pblendvb %xmm0, %xmm1, %xmm3 132: movdqa %xmm3, %xmm1 136: paddb %xmm1, %xmm1 140: paddb %xmm0, %xmm0 144: pblendvb %xmm0, %xmm1, %xmm3 149: psubb %xmm2, %xmm3 153: movd %xmm3, %eax 157: movzwl %ax, %eax 160: orl (%rcx), %eax 162: movl %eax, (%rdi) 164: popq %rbp 165: ret define void @fetch_r8g8b8_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = extractelement <3 x i8> %5, i32 0 %7 = sext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <3 x i8> %5, i32 1 %10 = sext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <3 x i8> %5, i32 2 %13 = sext i8 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = insertelement <4 x i32> %14, i32 0, i32 3 %16 = sitofp <4 x i32> %15 to <4 x float> %17 = shufflevector <4 x float> %16, <4 x float> , <4 x i32> store <4 x float> %17, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8_SSCALED (unorm8) ... FAILED Packed: 7f 00 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 7f 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $1, %xmm0, %eax 17: movabsq $139845047042064, %rcx 27: movaps (%rcx), %xmm1 30: movsbl 2(%rsi), %ecx 34: movsbl %al, %eax 37: movd %eax, %xmm2 41: pextrb $0, %xmm0, %eax 47: movsbl %al, %eax 50: movd %eax, %xmm0 54: punpckldq %xmm2, %xmm0 58: movd %ecx, %xmm2 62: movlhps %xmm2, %xmm0 65: cvtdq2ps %xmm0, %xmm0 68: shufps $33, %xmm0, %xmm1 72: shufps $36, %xmm1, %xmm0 76: movaps %xmm0, (%rdi) 79: popq %rbp 80: ret define void @fetch_r8g8b8_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i8>* %5 = load <3 x i8>* %4, align 1 %6 = shufflevector <3 x i8> %5, <3 x i8> undef, <4 x i32> %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shufflevector <4 x i8> %9, <4 x i8> %9, <16 x i32> %11 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %10, <16 x i8> ) %12 = shufflevector <16 x i8> %11, <16 x i8> %11, <4 x i32> %13 = shl <4 x i8> %12, %14 = sub <4 x i8> %13, %12 %15 = bitcast <4 x i8> %14 to i32 %16 = and i32 %15, 16777215 %17 = or i32 %16, bitcast (<4 x i8> to i32) %18 = bitcast i32 %17 to <4 x i8> store <4 x i8> %18, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pextrb $0, %xmm0, %eax 17: pextrb $1, %xmm0, %ecx 23: shll $8, %ecx 26: orl %eax, %ecx 28: movabsq $139845047042064, %rax 38: pinsrw $0, %ecx, %xmm1 43: movdqa (%rax), %xmm2 47: movzbl 2(%rsi), %eax 51: movabsq $139845047042080, %rcx 61: movdqa (%rcx), %xmm0 65: psllw $5, %xmm0 70: movabsq $139845047042096, %rcx 80: movabsq $139845047042112, %rdx 90: pxor %xmm3, %xmm3 94: movabsq $139845047042128, %rsi 104: pinsrw $1, %eax, %xmm1 109: pmaxsb %xmm3, %xmm1 114: pminsb %xmm2, %xmm1 119: movdqa (%rdx), %xmm2 123: pand %xmm1, %xmm2 127: psllw $4, %xmm2 132: movdqa %xmm1, %xmm3 136: pblendvb %xmm0, %xmm2, %xmm3 141: movdqa (%rcx), %xmm2 145: pand %xmm3, %xmm2 149: psllw $2, %xmm2 154: paddb %xmm0, %xmm0 158: pblendvb %xmm0, %xmm2, %xmm3 163: movdqa %xmm3, %xmm2 167: paddb %xmm2, %xmm2 171: paddb %xmm0, %xmm0 175: pblendvb %xmm0, %xmm2, %xmm3 180: psubb %xmm1, %xmm3 184: movd %xmm3, %eax 188: andl $16777215, %eax 194: orl (%rsi), %eax 196: movl %eax, (%rdi) 198: popq %rbp 199: ret define void @fetch_r8g8b8a8_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i8>* %5 = load <4 x i8>* %4, align 1 %6 = extractelement <4 x i8> %5, i32 0 %7 = sext i8 %6 to i32 %8 = insertelement <4 x i32> undef, i32 %7, i32 0 %9 = extractelement <4 x i8> %5, i32 1 %10 = sext i8 %9 to i32 %11 = insertelement <4 x i32> %8, i32 %10, i32 1 %12 = extractelement <4 x i8> %5, i32 2 %13 = sext i8 %12 to i32 %14 = insertelement <4 x i32> %11, i32 %13, i32 2 %15 = extractelement <4 x i8> %5, i32 3 %16 = sext i8 %15 to i32 %17 = insertelement <4 x i32> %14, i32 %16, i32 3 %18 = sitofp <4 x i32> %17 to <4 x float> store <4 x float> %18, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8_SSCALED (unorm8) ... FAILED Packed: 7f 00 00 00 Unpacked (0,0): 00 00 00 ff obtained ff 00 00 ff expected FAILED Packed: 00 7f 00 00 Unpacked (0,0): 00 00 00 ff obtained 00 ff 00 ff expected FAILED Packed: 00 00 7f 00 Unpacked (0,0): 00 00 00 ff obtained 00 00 ff ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pextrb $1, %xmm0, %eax 14: movsbl %al, %eax 17: pextrb $0, %xmm0, %ecx 23: movsbl %cl, %ecx 26: movd %ecx, %xmm1 30: pinsrd $1, %eax, %xmm1 36: pextrb $2, %xmm0, %eax 42: movsbl %al, %eax 45: pinsrd $2, %eax, %xmm1 51: pextrb $3, %xmm0, %eax 57: movsbl %al, %eax 60: pinsrd $3, %eax, %xmm1 66: cvtdq2ps %xmm1, %xmm0 69: movaps %xmm0, (%rdi) 72: popq %rbp 73: ret define void @fetch_r8g8b8a8_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i8>* %5 = load <4 x i8>* %4, align 1 %6 = shufflevector <4 x i8> %5, <4 x i8> %5, <16 x i32> %7 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %6, <16 x i8> ) %8 = shufflevector <16 x i8> %7, <16 x i8> %7, <4 x i32> %9 = shufflevector <4 x i8> %8, <4 x i8> %8, <16 x i32> %10 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %9, <16 x i8> ) %11 = shufflevector <16 x i8> %10, <16 x i8> %10, <4 x i32> %12 = shl <4 x i8> %11, %13 = sub <4 x i8> %12, %11 store <4 x i8> %13, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8A8_SSCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsb %xmm0, %xmm1 17: movabsq $139845047042064, %rax 27: movdqa (%rax), %xmm2 31: pminsb %xmm1, %xmm2 36: movabsq $139845047042080, %rax 46: movdqa (%rax), %xmm1 50: pand %xmm2, %xmm1 54: psllw $4, %xmm1 59: movabsq $139845047042096, %rax 69: movdqa (%rax), %xmm0 73: psllw $5, %xmm0 78: movdqa %xmm2, %xmm3 82: pblendvb %xmm0, %xmm1, %xmm3 87: movabsq $139845047042112, %rax 97: movdqa (%rax), %xmm1 101: pand %xmm3, %xmm1 105: psllw $2, %xmm1 110: paddb %xmm0, %xmm0 114: pblendvb %xmm0, %xmm1, %xmm3 119: movdqa %xmm3, %xmm1 123: paddb %xmm1, %xmm1 127: paddb %xmm0, %xmm0 131: pblendvb %xmm0, %xmm1, %xmm3 136: psubb %xmm2, %xmm3 140: movd %xmm3, (%rdi) 144: popq %rbp 145: ret define void @fetch_r32_fixed_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = sitofp <4 x i32> %7 to <4 x float> %9 = fmul <4 x float> %8, %10 = shufflevector <4 x float> %9, <4 x float> , <4 x i32> store <4 x float> %10, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8A8_SSCALED (unorm8) ... FAILED Packed: 7f 00 00 00 Unpacked (0,0): 00 00 00 00 obtained ff 00 00 00 expected FAILED Packed: 00 7f 00 00 Unpacked (0,0): 00 00 00 00 obtained 00 ff 00 00 expected FAILED Packed: 00 00 7f 00 Unpacked (0,0): 00 00 00 00 obtained 00 00 ff 00 expected FAILED Packed: 00 00 00 7f Unpacked (0,0): 00 00 00 00 obtained 00 00 00 ff expected 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: cvtdq2ps %xmm0, %xmm0 16: movabsq $139845047042064, %rax 26: mulps (%rax), %xmm0 29: movabsq $139845047042080, %rax 39: movaps (%rax), %xmm1 42: movlhps %xmm1, %xmm0 45: shufps $72, %xmm1, %xmm0 49: movaps %xmm0, (%rdi) 52: popq %rbp 53: ret define void @fetch_r32_fixed_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %7, <4 x i32> zeroinitializer) %9 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %8, <4 x i32> ) %10 = ashr <4 x i32> %9, %11 = sub <4 x i32> %9, %10 %12 = ashr <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> %26 = bitcast <4 x i8> %25 to i32 %27 = and i32 %26, 255 %28 = or i32 %27, bitcast (<4 x i8> to i32) %29 = bitcast i32 %28 to <4 x i8> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32_FIXED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pxor %xmm1, %xmm1 17: pmaxsd %xmm0, %xmm1 22: movabsq $139845047042064, %rax 32: movdqa (%rax), %xmm0 36: pminsd %xmm1, %xmm0 41: movdqa %xmm0, %xmm1 45: psrad $15, %xmm1 50: psubd %xmm1, %xmm0 54: movabsq $139845047042080, %rax 64: movabsq $139845047042096, %rcx 74: movabsq $139845047042112, %rdx 84: psrad $8, %xmm0 89: pshufd $1, %xmm0, %xmm1 94: pshufd $3, %xmm0, %xmm2 99: movdqa %xmm0, %xmm3 103: movhlps %xmm3, %xmm3 106: punpcklwd %xmm2, %xmm3 110: pshufb (%rdx), %xmm3 115: punpcklwd %xmm1, %xmm0 119: pshufb (%rcx), %xmm0 124: por %xmm3, %xmm0 128: movd %xmm0, %ecx 132: movzbl %cl, %ecx 135: orl (%rax), %ecx 137: movl %ecx, (%rdi) 139: popq %rbp 140: ret define void @fetch_r32g32_fixed_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = fmul <4 x float> %7, %9 = shufflevector <4 x float> %8, <4 x float> , <4 x i32> store <4 x float> %9, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32_FIXED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: cvtdq2ps %xmm0, %xmm0 11: movabsq $139845047042064, %rax 21: mulps (%rax), %xmm0 24: movabsq $139845047042080, %rax 34: movaps (%rax), %xmm1 37: movlhps %xmm1, %xmm0 40: movaps %xmm0, (%rdi) 43: popq %rbp 44: ret define void @fetch_r32g32_fixed_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i32>* %5 = load <2 x i32>* %4, align 4 %6 = shufflevector <2 x i32> %5, <2 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %6, <4 x i32> zeroinitializer) %8 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %7, <4 x i32> ) %9 = ashr <4 x i32> %8, %10 = sub <4 x i32> %8, %9 %11 = ashr <4 x i32> %10, %12 = extractelement <4 x i32> %11, i32 0 %13 = extractelement <4 x i32> %11, i32 1 %14 = extractelement <4 x i32> %11, i32 2 %15 = extractelement <4 x i32> %11, i32 3 %16 = bitcast i32 %12 to <2 x i16> %17 = bitcast i32 %13 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast i32 %14 to <2 x i16> %20 = bitcast i32 %15 to <2 x i16> %21 = shufflevector <2 x i16> %19, <2 x i16> %20, <2 x i32> %22 = bitcast <2 x i16> %18 to <4 x i8> %23 = bitcast <2 x i16> %21 to <4 x i8> %24 = shufflevector <4 x i8> %22, <4 x i8> %23, <4 x i32> %25 = bitcast <4 x i8> %24 to i32 %26 = and i32 %25, 65535 %27 = or i32 %26, bitcast (<4 x i8> to i32) %28 = bitcast i32 %27 to <4 x i8> store <4 x i8> %28, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32_FIXED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsd %xmm0, %xmm1 17: movabsq $139845047042064, %rax 27: movdqa (%rax), %xmm0 31: pminsd %xmm1, %xmm0 36: movdqa %xmm0, %xmm1 40: psrad $15, %xmm1 45: psubd %xmm1, %xmm0 49: psrad $8, %xmm0 54: movabsq $139845047042080, %rax 64: movabsq $139845047042096, %rcx 74: movabsq $139845047042112, %rdx 84: pshufd $1, %xmm0, %xmm1 89: pshufd $3, %xmm0, %xmm2 94: movdqa %xmm0, %xmm3 98: movhlps %xmm3, %xmm3 101: punpcklwd %xmm2, %xmm3 105: pshufb (%rdx), %xmm3 110: punpcklwd %xmm1, %xmm0 114: pshufb (%rcx), %xmm0 119: por %xmm3, %xmm0 123: movd %xmm0, %ecx 127: movzwl %cx, %ecx 130: orl (%rax), %ecx 132: movl %ecx, (%rdi) 134: popq %rbp 135: ret define void @fetch_r32g32b32_fixed_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = sitofp <4 x i32> %6 to <4 x float> %8 = fmul <4 x float> %7, %9 = shufflevector <4 x float> %8, <4 x float> , <4 x i32> store <4 x float> %9, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32_FIXED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: cvtdq2ps %xmm0, %xmm0 30: movabsq $139845047042064, %rax 40: mulps (%rax), %xmm0 43: movabsq $139845047042080, %rax 53: movaps (%rax), %xmm1 56: shufps $33, %xmm0, %xmm1 60: shufps $36, %xmm1, %xmm0 64: movaps %xmm0, (%rdi) 67: popq %rbp 68: ret define void @fetch_r32g32b32_fixed_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i32>* %5 = load <3 x i32>* %4, align 4 %6 = shufflevector <3 x i32> %5, <3 x i32> undef, <4 x i32> %7 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %6, <4 x i32> zeroinitializer) %8 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %7, <4 x i32> ) %9 = ashr <4 x i32> %8, %10 = sub <4 x i32> %8, %9 %11 = ashr <4 x i32> %10, %12 = extractelement <4 x i32> %11, i32 0 %13 = extractelement <4 x i32> %11, i32 1 %14 = extractelement <4 x i32> %11, i32 2 %15 = extractelement <4 x i32> %11, i32 3 %16 = bitcast i32 %12 to <2 x i16> %17 = bitcast i32 %13 to <2 x i16> %18 = shufflevector <2 x i16> %16, <2 x i16> %17, <2 x i32> %19 = bitcast i32 %14 to <2 x i16> %20 = bitcast i32 %15 to <2 x i16> %21 = shufflevector <2 x i16> %19, <2 x i16> %20, <2 x i32> %22 = bitcast <2 x i16> %18 to <4 x i8> %23 = bitcast <2 x i16> %21 to <4 x i8> %24 = shufflevector <4 x i8> %22, <4 x i8> %23, <4 x i32> %25 = bitcast <4 x i8> %24 to i32 %26 = and i32 %25, 16777215 %27 = or i32 %26, bitcast (<4 x i8> to i32) %28 = bitcast i32 %27 to <4 x i8> store <4 x i8> %28, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32_FIXED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrd $1, %xmm0, %eax 14: pinsrd $1, %eax, %xmm0 20: pinsrd $2, 8(%rsi), %xmm0 27: pxor %xmm1, %xmm1 31: pmaxsd %xmm0, %xmm1 36: movabsq $139845047042064, %rax 46: movdqa (%rax), %xmm0 50: pminsd %xmm1, %xmm0 55: movabsq $139845047042080, %rax 65: movabsq $139845047042096, %rcx 75: movabsq $139845047042112, %rdx 85: movdqa %xmm0, %xmm1 89: psrad $15, %xmm1 94: psubd %xmm1, %xmm0 98: psrad $8, %xmm0 103: pshufd $1, %xmm0, %xmm1 108: pshufd $3, %xmm0, %xmm2 113: movdqa %xmm0, %xmm3 117: movhlps %xmm3, %xmm3 120: punpcklwd %xmm2, %xmm3 124: pshufb (%rdx), %xmm3 129: punpcklwd %xmm1, %xmm0 133: pshufb (%rcx), %xmm0 138: por %xmm3, %xmm0 142: movd %xmm0, %ecx 146: andl $16777215, %ecx 152: orl (%rax), %ecx 154: movl %ecx, (%rdi) 156: popq %rbp 157: ret define void @fetch_r32g32b32a32_fixed_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = sitofp <4 x i32> %5 to <4 x float> %7 = fmul <4 x float> %6, store <4 x float> %7, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32_FIXED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movups (%rsi), %xmm0 7: cvtdq2ps %xmm0, %xmm0 10: movabsq $139845047042064, %rax 20: mulps (%rax), %xmm0 23: movaps %xmm0, (%rdi) 26: popq %rbp 27: ret define void @fetch_r32g32b32a32_fixed_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i32>* %5 = load <4 x i32>* %4, align 4 %6 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %5, <4 x i32> zeroinitializer) %7 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %6, <4 x i32> ) %8 = ashr <4 x i32> %7, %9 = sub <4 x i32> %7, %8 %10 = ashr <4 x i32> %9, %11 = extractelement <4 x i32> %10, i32 0 %12 = extractelement <4 x i32> %10, i32 1 %13 = extractelement <4 x i32> %10, i32 2 %14 = extractelement <4 x i32> %10, i32 3 %15 = bitcast i32 %11 to <2 x i16> %16 = bitcast i32 %12 to <2 x i16> %17 = shufflevector <2 x i16> %15, <2 x i16> %16, <2 x i32> %18 = bitcast i32 %13 to <2 x i16> %19 = bitcast i32 %14 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast <2 x i16> %17 to <4 x i8> %22 = bitcast <2 x i16> %20 to <4 x i8> %23 = shufflevector <4 x i8> %21, <4 x i8> %22, <4 x i32> store <4 x i8> %23, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R32G32B32A32_FIXED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movdqu (%rsi), %xmm0 8: pxor %xmm1, %xmm1 12: pmaxsd %xmm0, %xmm1 17: movabsq $139845047042064, %rax 27: movdqa (%rax), %xmm0 31: pminsd %xmm1, %xmm0 36: movdqa %xmm0, %xmm1 40: psrad $15, %xmm1 45: psubd %xmm1, %xmm0 49: psrad $8, %xmm0 54: movabsq $139845047042080, %rax 64: movabsq $139845047042096, %rcx 74: pshufd $1, %xmm0, %xmm1 79: pshufd $3, %xmm0, %xmm2 84: movdqa %xmm0, %xmm3 88: movhlps %xmm3, %xmm3 91: punpcklwd %xmm2, %xmm3 95: pshufb (%rcx), %xmm3 100: punpcklwd %xmm1, %xmm0 104: pshufb (%rax), %xmm0 109: por %xmm3, %xmm0 113: movd %xmm0, (%rdi) 117: popq %rbp 118: ret define void @fetch_r16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = zext <4 x i16> %7 to <4 x i32> %9 = and <4 x i32> %8, %10 = shl <4 x i32> %9, %11 = bitcast <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = icmp ugt <4 x i32> %9, %15 = sext <4 x i1> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = xor <4 x i32> %8, %9 %18 = shl <4 x i32> %17, %19 = or <4 x i32> %18, %16 %20 = or <4 x i32> %13, %19 %21 = bitcast <4 x i32> %20 to <4 x float> %22 = shufflevector <4 x float> %21, <4 x float> , <4 x i32> store <4 x float> %22, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R32G32B32A32_FIXED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklwd %xmm0, %xmm0 15: pshufd $0, %xmm0, %xmm0 20: extractps $0, %xmm0, %eax 26: movl $2139095040, %ecx 31: movabsq $139845047042064, %rdx 41: movabsq $139845047042080, %rsi 51: pextrw $1, %xmm0, %r8d 57: movdqa (%rsi), %xmm1 61: movaps (%rdx), %xmm2 64: movd %ecx, %xmm3 68: movabsq $139845047042096, %rcx 78: movabsq $139845047042112, %rdx 88: movabsq $139845047042128, %rsi 98: movzwl %ax, %eax 101: movd %eax, %xmm4 105: pinsrd $1, %r8d, %xmm4 112: pextrw $2, %xmm0, %eax 117: pinsrd $2, %eax, %xmm4 123: pextrw $3, %xmm0, %eax 128: pinsrd $3, %eax, %xmm4 134: movdqa (%rsi), %xmm0 138: pand %xmm4, %xmm0 142: pxor %xmm0, %xmm4 146: movdqa %xmm0, %xmm5 150: pxor %xmm1, %xmm5 154: pxor (%rdx), %xmm1 158: pcmpgtd %xmm1, %xmm5 162: pand %xmm3, %xmm5 166: pslld $16, %xmm4 171: por %xmm5, %xmm4 175: pslld $13, %xmm0 180: mulps (%rcx), %xmm0 183: orps %xmm4, %xmm0 186: movlhps %xmm2, %xmm0 189: shufps $72, %xmm2, %xmm0 193: movaps %xmm0, (%rdi) 196: popq %rbp 197: ret define void @fetch_r16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = zext <4 x i16> %7 to <4 x i32> %9 = and <4 x i32> %8, %10 = shl <4 x i32> %9, %11 = bitcast <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = icmp ugt <4 x i32> %9, %15 = sext <4 x i1> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = xor <4 x i32> %8, %9 %18 = shl <4 x i32> %17, %19 = or <4 x i32> %18, %16 %20 = or <4 x i32> %13, %19 %21 = bitcast <4 x i32> %20 to <4 x float> %22 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %21, <4 x float> zeroinitializer) %23 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %22, <4 x float> ) %24 = fmul <4 x float> %23, %25 = fadd <4 x float> %24, %26 = bitcast <4 x float> %25 to <4 x i32> %27 = and <4 x i32> %26, %28 = extractelement <4 x i32> %27, i32 0 %29 = extractelement <4 x i32> %27, i32 1 %30 = extractelement <4 x i32> %27, i32 2 %31 = extractelement <4 x i32> %27, i32 3 %32 = bitcast i32 %28 to <2 x i16> %33 = bitcast i32 %29 to <2 x i16> %34 = shufflevector <2 x i16> %32, <2 x i16> %33, <2 x i32> %35 = bitcast i32 %30 to <2 x i16> %36 = bitcast i32 %31 to <2 x i16> %37 = shufflevector <2 x i16> %35, <2 x i16> %36, <2 x i32> %38 = bitcast <2 x i16> %34 to <4 x i8> %39 = bitcast <2 x i16> %37 to <4 x i8> %40 = shufflevector <4 x i8> %38, <4 x i8> %39, <4 x i32> %41 = bitcast <4 x i8> %40 to i32 %42 = and i32 %41, 255 %43 = or i32 %42, bitcast (<4 x i8> to i32) %44 = bitcast i32 %43 to <4 x i8> store <4 x i8> %44, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: punpcklwd %xmm0, %xmm0 18: pshufd $0, %xmm0, %xmm0 23: extractps $0, %xmm0, %eax 29: movabsq $139845047042064, %rcx 39: pextrw $3, %xmm0, %edx 44: pextrw $1, %xmm0, %esi 49: pextrw $2, %xmm0, %r8d 55: movabsq $139845047042080, %r9 65: movabsq $139845047042096, %r10 75: movabsq $139845047042112, %r11 85: movabsq $139845047042128, %rbx 95: xorps %xmm0, %xmm0 98: movzwl %ax, %eax 101: movabsq $139845047042144, %r14 111: movdqa (%rcx), %xmm1 115: movabsq $139845047042160, %rcx 125: movd %eax, %xmm2 129: movabsq $139845047042176, %rax 139: pinsrd $1, %esi, %xmm2 145: movabsq $139845047042192, %rsi 155: pinsrd $2, %r8d, %xmm2 162: movabsq $139845047042208, %r8 172: pinsrd $3, %edx, %xmm2 178: movabsq $139845047042224, %rdx 188: movdqa (%rdx), %xmm3 192: pand %xmm2, %xmm3 196: pxor %xmm3, %xmm2 200: movdqa %xmm3, %xmm4 204: pxor %xmm1, %xmm4 208: movabsq $139845047042240, %rdx 218: pxor (%rdx), %xmm1 222: pcmpgtd %xmm1, %xmm4 226: pand (%r8), %xmm4 231: pslld $16, %xmm2 236: por %xmm4, %xmm2 240: pslld $13, %xmm3 245: mulps (%rsi), %xmm3 248: orps %xmm2, %xmm3 251: maxps %xmm0, %xmm3 254: minps (%rax), %xmm3 257: mulps (%rcx), %xmm3 260: addps (%r14), %xmm3 264: andps (%rbx), %xmm3 267: pshufd $1, %xmm3, %xmm0 272: pshufd $3, %xmm3, %xmm1 277: movaps %xmm3, %xmm2 280: movhlps %xmm2, %xmm2 283: punpcklwd %xmm1, %xmm2 287: pshufb (%r11), %xmm2 293: punpcklwd %xmm0, %xmm3 297: pshufb (%r10), %xmm3 303: por %xmm2, %xmm3 307: movd %xmm3, %eax 311: movzbl %al, %eax 314: orl (%r9), %eax 317: movl %eax, (%rdi) 319: popq %rbx 320: popq %r14 322: popq %rbp 323: ret define void @fetch_r16g16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i16>* %5 = load <2 x i16>* %4, align 2 %6 = shufflevector <2 x i16> %5, <2 x i16> undef, <4 x i32> %7 = zext <4 x i16> %6 to <4 x i32> %8 = and <4 x i32> %7, %9 = shl <4 x i32> %8, %10 = bitcast <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = icmp ugt <4 x i32> %8, %14 = sext <4 x i1> %13 to <4 x i32> %15 = and <4 x i32> %14, %16 = xor <4 x i32> %7, %8 %17 = shl <4 x i32> %16, %18 = or <4 x i32> %17, %15 %19 = or <4 x i32> %12, %18 %20 = bitcast <4 x i32> %19 to <4 x float> %21 = shufflevector <4 x float> %20, <4 x float> , <4 x i32> store <4 x float> %21, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: movzwl %ax, %eax 18: movd %eax, %xmm1 22: pinsrd $1, %ecx, %xmm1 28: pextrw $2, %xmm0, %eax 33: movabsq $139845047042064, %rcx 43: pinsrd $2, %eax, %xmm1 49: movdqa (%rcx), %xmm2 53: pextrw $3, %xmm0, %eax 58: movabsq $139845047042080, %rcx 68: movaps (%rcx), %xmm0 71: movabsq $139845047042096, %rcx 81: movabsq $139845047042112, %rdx 91: movabsq $139845047042128, %rsi 101: movabsq $139845047042144, %r8 111: pinsrd $3, %eax, %xmm1 117: movdqa (%r8), %xmm3 122: pand %xmm1, %xmm3 126: pxor %xmm3, %xmm1 130: movdqa %xmm3, %xmm4 134: pxor %xmm2, %xmm4 138: pxor (%rsi), %xmm2 142: pcmpgtd %xmm2, %xmm4 146: pand (%rdx), %xmm4 150: pslld $16, %xmm1 155: por %xmm4, %xmm1 159: pslld $13, %xmm3 164: mulps (%rcx), %xmm3 167: orps %xmm1, %xmm3 170: movlhps %xmm0, %xmm3 173: movaps %xmm3, (%rdi) 176: popq %rbp 177: ret define void @fetch_r16g16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <2 x i16>* %5 = load <2 x i16>* %4, align 2 %6 = shufflevector <2 x i16> %5, <2 x i16> undef, <4 x i32> %7 = zext <4 x i16> %6 to <4 x i32> %8 = and <4 x i32> %7, %9 = shl <4 x i32> %8, %10 = bitcast <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = icmp ugt <4 x i32> %8, %14 = sext <4 x i1> %13 to <4 x i32> %15 = and <4 x i32> %14, %16 = xor <4 x i32> %7, %8 %17 = shl <4 x i32> %16, %18 = or <4 x i32> %17, %15 %19 = or <4 x i32> %12, %18 %20 = bitcast <4 x i32> %19 to <4 x float> %21 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %20, <4 x float> zeroinitializer) %22 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %21, <4 x float> ) %23 = fmul <4 x float> %22, %24 = fadd <4 x float> %23, %25 = bitcast <4 x float> %24 to <4 x i32> %26 = and <4 x i32> %25, %27 = extractelement <4 x i32> %26, i32 0 %28 = extractelement <4 x i32> %26, i32 1 %29 = extractelement <4 x i32> %26, i32 2 %30 = extractelement <4 x i32> %26, i32 3 %31 = bitcast i32 %27 to <2 x i16> %32 = bitcast i32 %28 to <2 x i16> %33 = shufflevector <2 x i16> %31, <2 x i16> %32, <2 x i32> %34 = bitcast i32 %29 to <2 x i16> %35 = bitcast i32 %30 to <2 x i16> %36 = shufflevector <2 x i16> %34, <2 x i16> %35, <2 x i32> %37 = bitcast <2 x i16> %33 to <4 x i8> %38 = bitcast <2 x i16> %36 to <4 x i8> %39 = shufflevector <4 x i8> %37, <4 x i8> %38, <4 x i32> %40 = bitcast <4 x i8> %39 to i32 %41 = and i32 %40, 65535 %42 = or i32 %41, bitcast (<4 x i8> to i32) %43 = bitcast i32 %42 to <4 x i8> store <4 x i8> %43, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movl (%rsi), %eax 9: movd %eax, %xmm0 13: pextrw $1, %xmm0, %ecx 18: movzwl %ax, %eax 21: movd %eax, %xmm1 25: pinsrd $1, %ecx, %xmm1 31: pextrw $2, %xmm0, %eax 36: movabsq $139845047042064, %rcx 46: pinsrd $2, %eax, %xmm1 52: movdqa (%rcx), %xmm2 56: pextrw $3, %xmm0, %eax 61: movabsq $139845047042080, %rcx 71: movabsq $139845047042096, %rdx 81: movabsq $139845047042112, %rsi 91: movabsq $139845047042128, %r8 101: movabsq $139845047042144, %r9 111: movabsq $139845047042160, %r10 121: movabsq $139845047042176, %r11 131: xorps %xmm0, %xmm0 134: movabsq $139845047042192, %rbx 144: movabsq $139845047042208, %r14 154: pinsrd $3, %eax, %xmm1 160: movabsq $139845047042224, %rax 170: movdqa (%rax), %xmm3 174: pand %xmm1, %xmm3 178: pxor %xmm3, %xmm1 182: movdqa %xmm3, %xmm4 186: pxor %xmm2, %xmm4 190: movabsq $139845047042240, %rax 200: pxor (%rax), %xmm2 204: pcmpgtd %xmm2, %xmm4 208: pand (%r14), %xmm4 213: pslld $16, %xmm1 218: por %xmm4, %xmm1 222: pslld $13, %xmm3 227: mulps (%rbx), %xmm3 230: orps %xmm1, %xmm3 233: maxps %xmm0, %xmm3 236: minps (%r11), %xmm3 240: mulps (%r10), %xmm3 244: addps (%r9), %xmm3 248: andps (%r8), %xmm3 252: pshufd $1, %xmm3, %xmm0 257: pshufd $3, %xmm3, %xmm1 262: movaps %xmm3, %xmm2 265: movhlps %xmm2, %xmm2 268: punpcklwd %xmm1, %xmm2 272: pshufb (%rsi), %xmm2 277: punpcklwd %xmm0, %xmm3 281: pshufb (%rdx), %xmm3 286: por %xmm2, %xmm3 290: movd %xmm3, %eax 294: movzwl %ax, %eax 297: orl (%rcx), %eax 299: movl %eax, (%rdi) 301: popq %rbx 302: popq %r14 304: popq %rbp 305: ret define void @fetch_r16g16b16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = shufflevector <3 x i16> %5, <3 x i16> undef, <4 x i32> %7 = zext <4 x i16> %6 to <4 x i32> %8 = and <4 x i32> %7, %9 = shl <4 x i32> %8, %10 = bitcast <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = icmp ugt <4 x i32> %8, %14 = sext <4 x i1> %13 to <4 x i32> %15 = and <4 x i32> %14, %16 = xor <4 x i32> %7, %8 %17 = shl <4 x i32> %16, %18 = or <4 x i32> %17, %15 %19 = or <4 x i32> %12, %18 %20 = bitcast <4 x i32> %19 to <4 x float> %21 = shufflevector <4 x float> %20, <4 x float> , <4 x i32> store <4 x float> %21, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %eax, %xmm0 10: pextrw $1, %xmm0, %ecx 15: movzwl %ax, %eax 18: movd %eax, %xmm0 22: pinsrd $1, %ecx, %xmm0 28: movzwl 4(%rsi), %eax 32: movabsq $139845047042064, %rcx 42: movabsq $139845047042080, %rdx 52: pinsrd $2, %eax, %xmm0 58: movdqa (%rdx), %xmm1 62: movzwl %ax, %eax 65: movaps (%rcx), %xmm2 68: movabsq $139845047042096, %rcx 78: movabsq $139845047042112, %rdx 88: movabsq $139845047042128, %rsi 98: movabsq $139845047042144, %r8 108: pinsrd $3, %eax, %xmm0 114: movdqa (%r8), %xmm3 119: pand %xmm0, %xmm3 123: pxor %xmm3, %xmm0 127: movdqa %xmm3, %xmm4 131: pxor %xmm1, %xmm4 135: pxor (%rsi), %xmm1 139: pcmpgtd %xmm1, %xmm4 143: pand (%rdx), %xmm4 147: pslld $16, %xmm0 152: por %xmm4, %xmm0 156: pslld $13, %xmm3 161: mulps (%rcx), %xmm3 164: orps %xmm0, %xmm3 167: shufps $33, %xmm3, %xmm2 171: shufps $36, %xmm2, %xmm3 175: movaps %xmm3, (%rdi) 178: popq %rbp 179: ret define void @fetch_r16g16b16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <3 x i16>* %5 = load <3 x i16>* %4, align 2 %6 = shufflevector <3 x i16> %5, <3 x i16> undef, <4 x i32> %7 = zext <4 x i16> %6 to <4 x i32> %8 = and <4 x i32> %7, %9 = shl <4 x i32> %8, %10 = bitcast <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = icmp ugt <4 x i32> %8, %14 = sext <4 x i1> %13 to <4 x i32> %15 = and <4 x i32> %14, %16 = xor <4 x i32> %7, %8 %17 = shl <4 x i32> %16, %18 = or <4 x i32> %17, %15 %19 = or <4 x i32> %12, %18 %20 = bitcast <4 x i32> %19 to <4 x float> %21 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %20, <4 x float> zeroinitializer) %22 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %21, <4 x float> ) %23 = fmul <4 x float> %22, %24 = fadd <4 x float> %23, %25 = bitcast <4 x float> %24 to <4 x i32> %26 = and <4 x i32> %25, %27 = extractelement <4 x i32> %26, i32 0 %28 = extractelement <4 x i32> %26, i32 1 %29 = extractelement <4 x i32> %26, i32 2 %30 = extractelement <4 x i32> %26, i32 3 %31 = bitcast i32 %27 to <2 x i16> %32 = bitcast i32 %28 to <2 x i16> %33 = shufflevector <2 x i16> %31, <2 x i16> %32, <2 x i32> %34 = bitcast i32 %29 to <2 x i16> %35 = bitcast i32 %30 to <2 x i16> %36 = shufflevector <2 x i16> %34, <2 x i16> %35, <2 x i32> %37 = bitcast <2 x i16> %33 to <4 x i8> %38 = bitcast <2 x i16> %36 to <4 x i8> %39 = shufflevector <4 x i8> %37, <4 x i8> %38, <4 x i32> %40 = bitcast <4 x i8> %39 to i32 %41 = and i32 %40, 16777215 %42 = or i32 %41, bitcast (<4 x i8> to i32) %43 = bitcast i32 %42 to <4 x i8> store <4 x i8> %43, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movl (%rsi), %eax 9: movd %eax, %xmm0 13: pextrw $1, %xmm0, %ecx 18: movzwl %ax, %eax 21: movd %eax, %xmm0 25: pinsrd $1, %ecx, %xmm0 31: movzwl 4(%rsi), %eax 35: movabsq $139845047042064, %rcx 45: pinsrd $2, %eax, %xmm0 51: movdqa (%rcx), %xmm1 55: movzwl %ax, %eax 58: movabsq $139845047042080, %rcx 68: movabsq $139845047042096, %rdx 78: movabsq $139845047042112, %rsi 88: movabsq $139845047042128, %r8 98: movabsq $139845047042144, %r9 108: movabsq $139845047042160, %r10 118: movabsq $139845047042176, %r11 128: xorps %xmm2, %xmm2 131: movabsq $139845047042192, %rbx 141: movabsq $139845047042208, %r14 151: pinsrd $3, %eax, %xmm0 157: movabsq $139845047042224, %rax 167: movdqa (%rax), %xmm3 171: pand %xmm0, %xmm3 175: pxor %xmm3, %xmm0 179: movdqa %xmm3, %xmm4 183: pxor %xmm1, %xmm4 187: movabsq $139845047042240, %rax 197: pxor (%rax), %xmm1 201: pcmpgtd %xmm1, %xmm4 205: pand (%r14), %xmm4 210: pslld $16, %xmm0 215: por %xmm4, %xmm0 219: pslld $13, %xmm3 224: mulps (%rbx), %xmm3 227: orps %xmm0, %xmm3 230: maxps %xmm2, %xmm3 233: minps (%r11), %xmm3 237: mulps (%r10), %xmm3 241: addps (%r9), %xmm3 245: andps (%r8), %xmm3 249: pshufd $1, %xmm3, %xmm0 254: pshufd $3, %xmm3, %xmm1 259: movaps %xmm3, %xmm2 262: movhlps %xmm2, %xmm2 265: punpcklwd %xmm1, %xmm2 269: pshufb (%rsi), %xmm2 274: punpcklwd %xmm0, %xmm3 278: pshufb (%rdx), %xmm3 283: por %xmm2, %xmm3 287: movd %xmm3, %eax 291: andl $16777215, %eax 297: orl (%rcx), %eax 299: movl %eax, (%rdi) 301: popq %rbx 302: popq %r14 304: popq %rbp 305: ret define void @fetch_r16g16b16a16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = zext <4 x i16> %5 to <4 x i32> %7 = and <4 x i32> %6, %8 = shl <4 x i32> %7, %9 = bitcast <4 x i32> %8 to <4 x float> %10 = fmul <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = icmp ugt <4 x i32> %7, %13 = sext <4 x i1> %12 to <4 x i32> %14 = and <4 x i32> %13, %15 = xor <4 x i32> %6, %7 %16 = shl <4 x i32> %15, %17 = or <4 x i32> %16, %14 %18 = or <4 x i32> %11, %17 %19 = bitcast <4 x i32> %18 to <4 x float> store <4 x float> %19, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pextrw $1, %xmm0, %eax 13: movd %xmm0, %ecx 17: movzwl %cx, %ecx 20: movd %ecx, %xmm1 24: movabsq $139845047042064, %rcx 34: pinsrd $1, %eax, %xmm1 40: movdqa (%rcx), %xmm2 44: pextrw $2, %xmm0, %eax 49: movabsq $139845047042080, %rcx 59: movabsq $139845047042096, %rdx 69: movabsq $139845047042112, %rsi 79: movabsq $139845047042128, %r8 89: pinsrd $2, %eax, %xmm1 95: pextrw $3, %xmm0, %eax 100: pinsrd $3, %eax, %xmm1 106: movdqa (%r8), %xmm0 111: pand %xmm1, %xmm0 115: pxor %xmm0, %xmm1 119: movdqa %xmm0, %xmm3 123: pxor %xmm2, %xmm3 127: pxor (%rsi), %xmm2 131: pcmpgtd %xmm2, %xmm3 135: pand (%rdx), %xmm3 139: pslld $16, %xmm1 144: por %xmm3, %xmm1 148: pslld $13, %xmm0 153: mulps (%rcx), %xmm0 156: orps %xmm1, %xmm0 159: movaps %xmm0, (%rdi) 162: popq %rbp 163: ret define void @fetch_r16g16b16a16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to <4 x i16>* %5 = load <4 x i16>* %4, align 2 %6 = zext <4 x i16> %5 to <4 x i32> %7 = and <4 x i32> %6, %8 = shl <4 x i32> %7, %9 = bitcast <4 x i32> %8 to <4 x float> %10 = fmul <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = icmp ugt <4 x i32> %7, %13 = sext <4 x i1> %12 to <4 x i32> %14 = and <4 x i32> %13, %15 = xor <4 x i32> %6, %7 %16 = shl <4 x i32> %15, %17 = or <4 x i32> %16, %14 %18 = or <4 x i32> %11, %17 %19 = bitcast <4 x i32> %18 to <4 x float> %20 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %19, <4 x float> zeroinitializer) %21 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %20, <4 x float> ) %22 = fmul <4 x float> %21, %23 = fadd <4 x float> %22, %24 = bitcast <4 x float> %23 to <4 x i32> %25 = and <4 x i32> %24, %26 = extractelement <4 x i32> %25, i32 0 %27 = extractelement <4 x i32> %25, i32 1 %28 = extractelement <4 x i32> %25, i32 2 %29 = extractelement <4 x i32> %25, i32 3 %30 = bitcast i32 %26 to <2 x i16> %31 = bitcast i32 %27 to <2 x i16> %32 = shufflevector <2 x i16> %30, <2 x i16> %31, <2 x i32> %33 = bitcast i32 %28 to <2 x i16> %34 = bitcast i32 %29 to <2 x i16> %35 = shufflevector <2 x i16> %33, <2 x i16> %34, <2 x i32> %36 = bitcast <2 x i16> %32 to <4 x i8> %37 = bitcast <2 x i16> %35 to <4 x i8> %38 = shufflevector <4 x i8> %36, <4 x i8> %37, <4 x i32> store <4 x i8> %38, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R16G16B16A16_FLOAT (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movq (%rsi), %xmm0 11: pextrw $1, %xmm0, %eax 16: movd %xmm0, %ecx 20: movzwl %cx, %ecx 23: movd %ecx, %xmm1 27: movabsq $139845047042064, %rcx 37: pinsrd $1, %eax, %xmm1 43: movdqa (%rcx), %xmm2 47: pextrw $3, %xmm0, %eax 52: pextrw $2, %xmm0, %ecx 57: movabsq $139845047042080, %rdx 67: movabsq $139845047042096, %rsi 77: movabsq $139845047042112, %r8 87: movabsq $139845047042128, %r9 97: movabsq $139845047042144, %r10 107: movabsq $139845047042160, %r11 117: xorps %xmm0, %xmm0 120: movabsq $139845047042176, %rbx 130: movabsq $139845047042192, %r14 140: pinsrd $2, %ecx, %xmm1 146: movabsq $139845047042208, %rcx 156: pinsrd $3, %eax, %xmm1 162: movabsq $139845047042224, %rax 172: movdqa (%rax), %xmm3 176: pand %xmm1, %xmm3 180: pxor %xmm3, %xmm1 184: movdqa %xmm3, %xmm4 188: pxor %xmm2, %xmm4 192: pxor (%rcx), %xmm2 196: pcmpgtd %xmm2, %xmm4 200: pand (%r14), %xmm4 205: pslld $16, %xmm1 210: por %xmm4, %xmm1 214: pslld $13, %xmm3 219: mulps (%rbx), %xmm3 222: orps %xmm1, %xmm3 225: maxps %xmm0, %xmm3 228: minps (%r11), %xmm3 232: mulps (%r10), %xmm3 236: addps (%r9), %xmm3 240: andps (%r8), %xmm3 244: pshufd $1, %xmm3, %xmm0 249: pshufd $3, %xmm3, %xmm1 254: movaps %xmm3, %xmm2 257: movhlps %xmm2, %xmm2 260: punpcklwd %xmm1, %xmm2 264: pshufb (%rsi), %xmm2 269: punpcklwd %xmm0, %xmm3 273: pshufb (%rdx), %xmm3 278: por %xmm2, %xmm3 282: movd %xmm3, (%rdi) 286: popq %rbx 287: popq %r14 289: popq %rbp 290: ret define void @fetch_l8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4454568 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R16G16B16A16_FLOAT (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4454568, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_l8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4454568 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_L8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4454568, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_l8a8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4455537 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_L8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4455537, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_l8a8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4455537 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_L8A8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4455537, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r8g8b8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4456598 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_L8A8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4456598, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r8g8b8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4456598 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4456598, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_a8b8g8r8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4458954 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4458954, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_a8b8g8r8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4458954 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_A8B8G8R8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4458954, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_x8b8g8r8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4460148 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_A8B8G8R8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4460148, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_x8b8g8r8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4460148 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_X8B8G8R8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4460148, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_b8g8r8a8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4461332 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_X8B8G8R8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4461332, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_b8g8r8a8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4461332 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B8G8R8A8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4461332, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_b8g8r8x8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4462525 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B8G8R8A8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4462525, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_b8g8r8x8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4462525 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B8G8R8X8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4462525, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_a8r8g8b8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4463710 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B8G8R8X8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4463710, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_a8r8g8b8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4463710 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_A8R8G8B8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4463710, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_x8r8g8b8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4464904 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_A8R8G8B8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4464904, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_x8r8g8b8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4464904 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_X8R8G8B8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4464904, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r8g8b8a8_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4457712 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_X8R8G8B8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4457712, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r8g8b8a8_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4457712 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8A8_SRGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4457712, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_dxt1_rgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4384062 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8A8_SRGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4384062, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt1_rgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4383826 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_DXT1_RGB (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4383826, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt1_rgba_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4384251 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_DXT1_RGB (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4384251, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt1_rgba_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4383885 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_DXT1_RGBA (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4383885, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt3_rgba_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4384451 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_DXT1_RGBA (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4384451, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt3_rgba_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4383944 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_DXT3_RGBA (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4383944, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt5_rgba_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4384651 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_DXT3_RGBA (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4384651, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt5_rgba_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4384003 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_DXT5_RGBA (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4384003, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt1_srgb_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4389709 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_DXT5_RGBA (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4389709, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt1_srgb_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4388989 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4388989, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt1_srgba_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4389889 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4389889, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt1_srgba_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4389169 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4389169, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt3_srgba_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4390069 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4390069, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt3_srgba_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4389349 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4389349, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_dxt5_srgba_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4390249 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4390249, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_dxt5_srgba_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4389529 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4389529, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_rgtc1_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4693556 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4693556, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_rgtc1_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4692308 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4692308, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_rgtc1_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4694485 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4694485, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_rgtc1_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4693683 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4693683, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_rgtc2_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4696293 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4696293, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_rgtc2_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4694612 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4694612, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_rgtc2_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4697534 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4697534, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_rgtc2_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4696478 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4696478, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_r8g8_b8g8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = or i32 %6, 8 %8 = lshr i32 %5, %7 %u = and i32 %5, 255 %v = and i32 %5, 16711680 %y = shl i32 %8, 8 %9 = and i32 %y, 65280 %10 = or i32 %u, %9 %11 = or i32 %10, %v %12 = or i32 %11, -16777216 %13 = bitcast i32 %12 to <4 x i8> %14 = extractelement <4 x i8> %13, i32 0 %15 = zext i8 %14 to i32 %16 = insertelement <4 x i32> undef, i32 %15, i32 0 %17 = extractelement <4 x i8> %13, i32 1 %18 = zext i8 %17 to i32 %19 = insertelement <4 x i32> %16, i32 %18, i32 1 %20 = extractelement <4 x i8> %13, i32 2 %21 = zext i8 %20 to i32 %22 = insertelement <4 x i32> %19, i32 %21, i32 2 %23 = extractelement <4 x i8> %13, i32 3 %24 = zext i8 %23 to i32 %25 = insertelement <4 x i32> %22, i32 %24, i32 3 %26 = sitofp <4 x i32> %25 to <4 x float> %27 = fmul <4 x float> %26, store <4 x float> %27, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: leal 8(%rdx), %ecx 12: movl %eax, %edx 14: shrl %cl, %edx 16: shll $8, %edx 19: movzbl %al, %ecx 22: movabsq $139845047042064, %rsi 32: movzwl %dx, %edx 35: orl %ecx, %edx 37: andl $16711680, %eax 43: orl %edx, %eax 45: orl $4278190080, %eax 51: movd %eax, %xmm0 55: pextrb $1, %xmm0, %eax 61: pextrb $0, %xmm0, %ecx 67: movd %ecx, %xmm1 71: pinsrd $1, %eax, %xmm1 77: pextrb $2, %xmm0, %eax 83: pinsrd $2, %eax, %xmm1 89: pextrb $3, %xmm0, %eax 95: pinsrd $3, %eax, %xmm1 101: cvtdq2ps %xmm1, %xmm0 104: mulps (%rsi), %xmm0 107: movaps %xmm0, (%rdi) 110: popq %rbp 111: ret define void @fetch_r8g8_b8g8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = or i32 %6, 8 %8 = lshr i32 %5, %7 %u = and i32 %5, 255 %v = and i32 %5, 16711680 %y = shl i32 %8, 8 %9 = and i32 %y, 65280 %10 = or i32 %u, %9 %11 = or i32 %10, %v %12 = or i32 %11, -16777216 %13 = bitcast i32 %12 to <4 x i8> store <4 x i8> %13, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8_B8G8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: leal 8(%rdx), %ecx 12: movl %eax, %edx 14: shrl %cl, %edx 16: shll $8, %edx 19: movzbl %al, %ecx 22: movzwl %dx, %edx 25: orl %ecx, %edx 27: andl $16711680, %eax 33: orl %edx, %eax 35: orl $4278190080, %eax 41: movl %eax, (%rdi) 43: popq %rbp 44: ret define void @fetch_g8r8_g8b8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = lshr i32 %5, %6 %8 = lshr i32 %5, 8 %u = and i32 %8, 255 %y = shl i32 %7, 8 %9 = and i32 %y, 65280 %10 = and i32 %8, 16711680 %11 = or i32 %u, %9 %12 = or i32 %11, %10 %13 = or i32 %12, -16777216 %14 = bitcast i32 %13 to <4 x i8> %15 = extractelement <4 x i8> %14, i32 0 %16 = zext i8 %15 to i32 %17 = insertelement <4 x i32> undef, i32 %16, i32 0 %18 = extractelement <4 x i8> %14, i32 1 %19 = zext i8 %18 to i32 %20 = insertelement <4 x i32> %17, i32 %19, i32 1 %21 = extractelement <4 x i8> %14, i32 2 %22 = zext i8 %21 to i32 %23 = insertelement <4 x i32> %20, i32 %22, i32 2 %24 = extractelement <4 x i8> %14, i32 3 %25 = zext i8 %24 to i32 %26 = insertelement <4 x i32> %23, i32 %25, i32 3 %27 = sitofp <4 x i32> %26 to <4 x float> %28 = fmul <4 x float> %27, store <4 x float> %28, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8_B8G8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: movb %dl, %cl 11: movl %eax, %edx 13: shrl %cl, %edx 15: shll $8, %edx 18: movzwl %dx, %ecx 21: shrl $8, %eax 24: movzbl %al, %edx 27: movabsq $139845047042064, %rsi 37: orl %ecx, %edx 39: andl $16711680, %eax 45: orl %edx, %eax 47: orl $4278190080, %eax 53: movd %eax, %xmm0 57: pextrb $1, %xmm0, %eax 63: pextrb $0, %xmm0, %ecx 69: movd %ecx, %xmm1 73: pinsrd $1, %eax, %xmm1 79: pextrb $2, %xmm0, %eax 85: pinsrd $2, %eax, %xmm1 91: pextrb $3, %xmm0, %eax 97: pinsrd $3, %eax, %xmm1 103: cvtdq2ps %xmm1, %xmm0 106: mulps (%rsi), %xmm0 109: movaps %xmm0, (%rdi) 112: popq %rbp 113: ret define void @fetch_g8r8_g8b8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = lshr i32 %5, %6 %8 = lshr i32 %5, 8 %u = and i32 %8, 255 %y = shl i32 %7, 8 %9 = and i32 %y, 65280 %10 = and i32 %8, 16711680 %11 = or i32 %u, %9 %12 = or i32 %11, %10 %13 = or i32 %12, -16777216 %14 = bitcast i32 %13 to <4 x i8> store <4 x i8> %14, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_G8R8_G8B8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: movb %dl, %cl 11: movl %eax, %edx 13: shrl %cl, %edx 15: shll $8, %edx 18: movzwl %dx, %ecx 21: shrl $8, %eax 24: movzbl %al, %edx 27: orl %ecx, %edx 29: andl $16711680, %eax 35: orl %edx, %eax 37: orl $4278190080, %eax 43: movl %eax, (%rdi) 45: popq %rbp 46: ret define void @fetch_r8sg8sb8ux8u_norm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4466225 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_G8R8_G8B8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4466225, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r8sg8sb8ux8u_norm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4466225 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8SG8SB8UX8U_NORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4466225, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r5sg5sb6u_norm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4469399 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8SG8SB8UX8U_NORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4469399, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r5sg5sb6u_norm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4469399 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R5SG5SB6U_NORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4469399, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_a8b8g8r8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R5SG5SB6U_NORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $8, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $16, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $24, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: pinsrd $3, %eax, %xmm0 78: pand (%rdx), %xmm0 82: cvtdq2ps %xmm0, %xmm0 85: mulps (%rcx), %xmm0 88: pshufd $27, %xmm0, %xmm0 93: movdqa %xmm0, (%rdi) 97: popq %rbp 98: ret define void @fetch_a8b8g8r8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = call i32 @llvm.bswap.i32(i32 %7) %9 = bitcast i32 %8 to <4 x i8> store <4 x i8> %9, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_A8B8G8R8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: bswapl %eax 8: movl %eax, (%rdi) 10: popq %rbp 11: ret define void @fetch_b5g5r5x1_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_A8B8G8R8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: pextrd $1, %xmm0, %ecx 28: shrl $5, %ecx 31: pextrd $3, %xmm0, %edx 37: shrl %cl, %edx 39: pinsrd $1, %ecx, %xmm0 45: movabsq $139845047042064, %rcx 55: movaps (%rcx), %xmm1 58: shrl $10, %eax 61: movabsq $139845047042080, %rcx 71: movabsq $139845047042096, %rsi 81: pinsrd $2, %eax, %xmm0 87: pinsrd $3, %edx, %xmm0 93: pand (%rsi), %xmm0 97: cvtdq2ps %xmm0, %xmm0 100: mulps (%rcx), %xmm0 103: shufps $1, %xmm0, %xmm1 107: shufps $38, %xmm1, %xmm0 111: movaps %xmm0, (%rdi) 114: popq %rbp 115: ret define void @fetch_b5g5r5x1_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = lshr i32 %32, 16 %34 = and i32 %33, 255 %35 = or i32 %34, bitcast (<4 x i8> to i32) %36 = and i32 %32, 65280 %37 = or i32 %35, %36 %38 = shl i32 %32, 16 %39 = and i32 %38, 16711680 %40 = or i32 %37, %39 %41 = bitcast i32 %40 to <4 x i8> store <4 x i8> %41, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B5G5R5X1_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: pshufd $0, %xmm0, %xmm0 19: pextrd $2, %xmm0, %eax 25: pextrd $1, %xmm0, %ecx 31: shrl $5, %ecx 34: pextrd $3, %xmm0, %edx 40: shrl %cl, %edx 42: pinsrd $1, %ecx, %xmm0 48: shrl $10, %eax 51: movabsq $139845047042064, %rcx 61: movabsq $139845047042080, %rsi 71: movabsq $139845047042096, %r8 81: movabsq $139845047042112, %r9 91: movabsq $139845047042128, %r10 101: movabsq $139845047042144, %r11 111: movabsq $139845047042160, %rbx 121: xorps %xmm1, %xmm1 124: movabsq $139845047042176, %r14 134: pinsrd $2, %eax, %xmm0 140: movabsq $139845047042192, %rax 150: pinsrd $3, %edx, %xmm0 156: pand (%rax), %xmm0 160: cvtdq2ps %xmm0, %xmm0 163: mulps (%r14), %xmm0 167: maxps %xmm1, %xmm0 170: minps (%rbx), %xmm0 173: mulps (%r11), %xmm0 177: addps (%r10), %xmm0 181: andps (%r9), %xmm0 185: pshufd $1, %xmm0, %xmm1 190: pshufd $3, %xmm0, %xmm2 195: movaps %xmm0, %xmm3 198: movhlps %xmm3, %xmm3 201: punpcklwd %xmm2, %xmm3 205: pshufb (%r8), %xmm3 211: punpcklwd %xmm1, %xmm0 215: pshufb (%rsi), %xmm0 220: por %xmm3, %xmm0 224: movd %xmm0, %eax 228: movl %eax, %edx 230: shrl $16, %edx 233: movzbl %dl, %edx 236: orl (%rcx), %edx 238: movl %eax, %ecx 240: andl $65280, %ecx 246: orl %edx, %ecx 248: shll $16, %eax 251: andl $16711680, %eax 257: orl %ecx, %eax 259: movl %eax, (%rdi) 261: popq %rbx 262: popq %r14 264: popq %rbp 265: ret define void @fetch_r10g10b10a2_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> store <4 x float> %10, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B5G5R5X1_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $10, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $20, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $30, %eax 52: movabsq $139845047042064, %rcx 62: pinsrd $3, %eax, %xmm0 68: pand (%rcx), %xmm0 72: cvtdq2ps %xmm0, %xmm0 75: movaps %xmm0, (%rdi) 78: popq %rbp 79: ret define void @fetch_r10g10b10a2_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> store <4 x i8> %29, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $10, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $20, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $30, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: movabsq $139845047042096, %rsi 82: movabsq $139845047042112, %r8 92: movabsq $139845047042128, %r9 102: movabsq $139845047042144, %r10 112: xorps %xmm1, %xmm1 115: movabsq $139845047042160, %r11 125: pinsrd $3, %eax, %xmm0 131: pand (%r11), %xmm0 136: cvtdq2ps %xmm0, %xmm0 139: maxps %xmm1, %xmm0 142: minps (%r10), %xmm0 146: mulps (%r9), %xmm0 150: addps (%r8), %xmm0 154: andps (%rsi), %xmm0 157: pshufd $1, %xmm0, %xmm1 162: pshufd $3, %xmm0, %xmm2 167: movaps %xmm0, %xmm3 170: movhlps %xmm3, %xmm3 173: punpcklwd %xmm2, %xmm3 177: pshufb (%rdx), %xmm3 182: punpcklwd %xmm1, %xmm0 186: pshufb (%rcx), %xmm0 191: por %xmm3, %xmm0 195: movd %xmm0, (%rdi) 199: popq %rbp 200: ret define void @fetch_r11g11b10_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4671855 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4671855, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r11g11b10_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4671855 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4671855, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r9g9b9e5_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4671040 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4671040, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r9g9b9e5_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4671040 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4671040, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r1_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4672415 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4672415, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r1_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4672415 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4672415, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r10g10b10x2_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = shufflevector <4 x float> %10, <4 x float> , <4 x i32> store <4 x float> %11, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $1, %xmm0, %eax 19: shrl $10, %eax 22: pextrd $3, %xmm0, %ecx 28: shrl %cl, %ecx 30: pextrd $2, %xmm0, %edx 36: pinsrd $1, %eax, %xmm0 42: shrl $20, %edx 45: pinsrd $2, %edx, %xmm0 51: movabsq $139845047042064, %rax 61: movaps (%rax), %xmm1 64: movabsq $139845047042080, %rax 74: pinsrd $3, %ecx, %xmm0 80: pand (%rax), %xmm0 84: cvtdq2ps %xmm0, %xmm0 87: shufps $33, %xmm0, %xmm1 91: shufps $36, %xmm1, %xmm0 95: movaps %xmm0, (%rdi) 98: popq %rbp 99: ret define void @fetch_r10g10b10x2_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> %30 = bitcast <4 x i8> %29 to i32 %31 = and i32 %30, 16777215 %32 = or i32 %31, bitcast (<4 x i8> to i32) %33 = bitcast i32 %32 to <4 x i8> store <4 x i8> %33, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R10G10B10X2_USCALED (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: pextrd $3, %xmm0, %eax 20: pextrd $2, %xmm0, %ecx 26: pextrd $1, %xmm0, %edx 32: shrl $10, %edx 35: pinsrd $1, %edx, %xmm0 41: shrl $20, %ecx 44: shrl %cl, %eax 46: pinsrd $2, %ecx, %xmm0 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: movabsq $139845047042096, %rsi 82: movabsq $139845047042112, %r8 92: movabsq $139845047042128, %r9 102: movabsq $139845047042144, %r10 112: movabsq $139845047042160, %r11 122: xorps %xmm1, %xmm1 125: movabsq $139845047042176, %rbx 135: pinsrd $3, %eax, %xmm0 141: pand (%rbx), %xmm0 145: cvtdq2ps %xmm0, %xmm0 148: maxps %xmm1, %xmm0 151: minps (%r11), %xmm0 155: mulps (%r10), %xmm0 159: addps (%r9), %xmm0 163: andps (%r8), %xmm0 167: pshufd $1, %xmm0, %xmm1 172: pshufd $3, %xmm0, %xmm2 177: movaps %xmm0, %xmm3 180: movhlps %xmm3, %xmm3 183: punpcklwd %xmm2, %xmm3 187: pshufb (%rsi), %xmm3 192: punpcklwd %xmm1, %xmm0 196: pshufb (%rdx), %xmm0 201: por %xmm3, %xmm0 205: movd %xmm0, %eax 209: andl $16777215, %eax 215: orl (%rcx), %eax 217: movl %eax, (%rdi) 219: popq %rbx 220: popq %rbp 221: ret define void @fetch_r10g10b10x2_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4549466 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R10G10B10X2_USCALED (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4549466, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r10g10b10x2_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4549466 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R10G10B10X2_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4549466, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_l4a4_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R10G10B10X2_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $4, %edx 39: pinsrd $1, %edx, %xmm0 45: movabsq $139845047042064, %rdx 55: movabsq $139845047042080, %rsi 65: pinsrd $2, %eax, %xmm0 71: shrl %cl, %ecx 73: pinsrd $3, %ecx, %xmm0 79: pand (%rsi), %xmm0 83: cvtdq2ps %xmm0, %xmm0 86: mulps (%rdx), %xmm0 89: pshufd $64, %xmm0, %xmm0 94: movdqa %xmm0, (%rdi) 98: popq %rbp 99: ret define void @fetch_l4a4_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 255 %33 = shl i32 %31, 8 %34 = and i32 %33, 65280 %35 = or i32 %32, %34 %36 = shl i32 %31, 16 %37 = or i32 %35, %36 %38 = bitcast i32 %37 to <4 x i8> store <4 x i8> %38, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_L4A4_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movzbl (%rsi), %eax 8: movd %eax, %xmm0 12: pshufd $0, %xmm0, %xmm0 17: pextrd $3, %xmm0, %eax 23: pextrd $1, %xmm0, %ecx 29: shrl $4, %ecx 32: pextrd $2, %xmm0, %edx 38: shrl %cl, %edx 40: pinsrd $1, %ecx, %xmm0 46: movabsq $139845047042064, %rcx 56: movabsq $139845047042080, %rsi 66: movabsq $139845047042096, %r8 76: movabsq $139845047042112, %r9 86: movabsq $139845047042128, %r10 96: movabsq $139845047042144, %r11 106: xorps %xmm1, %xmm1 109: movabsq $139845047042160, %rbx 119: pinsrd $2, %edx, %xmm0 125: movabsq $139845047042176, %rdx 135: shrl %cl, %eax 137: pinsrd $3, %eax, %xmm0 143: pand (%rdx), %xmm0 147: cvtdq2ps %xmm0, %xmm0 150: mulps (%rbx), %xmm0 153: maxps %xmm1, %xmm0 156: minps (%r11), %xmm0 160: mulps (%r10), %xmm0 164: addps (%r9), %xmm0 168: andps (%r8), %xmm0 172: pshufd $1, %xmm0, %xmm1 177: pshufd $3, %xmm0, %xmm2 182: movaps %xmm0, %xmm3 185: movhlps %xmm3, %xmm3 188: punpcklwd %xmm2, %xmm3 192: pshufb (%rsi), %xmm3 197: punpcklwd %xmm1, %xmm0 201: pshufb (%rcx), %xmm0 206: por %xmm3, %xmm0 210: movd %xmm0, %eax 214: movzbl %al, %ecx 217: movl %eax, %edx 219: shll $8, %edx 222: movzwl %dx, %edx 225: orl %ecx, %edx 227: shll $16, %eax 230: orl %edx, %eax 232: movl %eax, (%rdi) 234: popq %rbx 235: popq %rbp 236: ret define void @fetch_b10g10r10a2_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_L4A4_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $10, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $20, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $30, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: pinsrd $3, %eax, %xmm0 78: pand (%rdx), %xmm0 82: cvtdq2ps %xmm0, %xmm0 85: mulps (%rcx), %xmm0 88: pshufd $198, %xmm0, %xmm0 93: movdqa %xmm0, (%rdi) 97: popq %rbp 98: ret define void @fetch_b10g10r10a2_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = lshr i32 %31, 16 %33 = and i32 %32, 255 %34 = and i32 %31, -16711936 %35 = or i32 %33, %34 %36 = shl i32 %31, 16 %37 = and i32 %36, 16711680 %38 = or i32 %35, %37 %39 = bitcast i32 %38 to <4 x i8> store <4 x i8> %39, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B10G10R10A2_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: pextrd $3, %xmm0, %eax 20: pextrd $2, %xmm0, %ecx 26: pextrd $1, %xmm0, %edx 32: shrl $10, %edx 35: pinsrd $1, %edx, %xmm0 41: shrl $20, %ecx 44: pinsrd $2, %ecx, %xmm0 50: shrl $30, %eax 53: movabsq $139845047042064, %rcx 63: movabsq $139845047042080, %rdx 73: movabsq $139845047042096, %rsi 83: movabsq $139845047042112, %r8 93: movabsq $139845047042128, %r9 103: movabsq $139845047042144, %r10 113: xorps %xmm1, %xmm1 116: movabsq $139845047042160, %r11 126: movabsq $139845047042176, %rbx 136: pinsrd $3, %eax, %xmm0 142: pand (%rbx), %xmm0 146: cvtdq2ps %xmm0, %xmm0 149: mulps (%r11), %xmm0 153: maxps %xmm1, %xmm0 156: minps (%r10), %xmm0 160: mulps (%r9), %xmm0 164: addps (%r8), %xmm0 168: andps (%rsi), %xmm0 171: pshufd $1, %xmm0, %xmm1 176: pshufd $3, %xmm0, %xmm2 181: movaps %xmm0, %xmm3 184: movhlps %xmm3, %xmm3 187: punpcklwd %xmm2, %xmm3 191: pshufb (%rdx), %xmm3 196: punpcklwd %xmm1, %xmm0 200: pshufb (%rcx), %xmm0 205: por %xmm3, %xmm0 209: movd %xmm0, %eax 213: movl %eax, %ecx 215: andl $4278255360, %ecx 221: movl %eax, %edx 223: shrl $16, %edx 226: movzbl %dl, %edx 229: orl %ecx, %edx 231: shll $16, %eax 234: andl $16711680, %eax 240: orl %edx, %eax 242: movl %eax, (%rdi) 244: popq %rbx 245: popq %rbp 246: ret define void @fetch_r10sg10sb10sa2u_norm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4467807 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B10G10R10A2_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4467807, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r10sg10sb10sa2u_norm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4467807 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R10SG10SB10SA2U_NORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4467807, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r8g8bx_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4673814 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R10SG10SB10SA2U_NORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4673814, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r8g8bx_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4673814 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8Bx_SNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4673814, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r8g8b8x8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8Bx_SNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $2, %xmm0, %eax 19: pextrd $1, %xmm0, %ecx 25: shrl $8, %ecx 28: pextrd $3, %xmm0, %edx 34: shrl %cl, %edx 36: pinsrd $1, %ecx, %xmm0 42: shrl $16, %eax 45: pinsrd $2, %eax, %xmm0 51: movabsq $139845047042064, %rax 61: movaps (%rax), %xmm1 64: movabsq $139845047042080, %rax 74: movabsq $139845047042096, %rcx 84: pinsrd $3, %edx, %xmm0 90: pand (%rcx), %xmm0 94: cvtdq2ps %xmm0, %xmm0 97: mulps (%rax), %xmm0 100: shufps $33, %xmm0, %xmm1 104: shufps $36, %xmm1, %xmm0 108: movaps %xmm0, (%rdi) 111: popq %rbp 112: ret define void @fetch_r8g8b8x8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = bitcast i32 %5 to <4 x i8> %7 = bitcast <4 x i8> %6 to i32 %8 = and i32 %7, 16777215 %9 = or i32 %8, bitcast (<4 x i8> to i32) %10 = bitcast i32 %9 to <4 x i8> store <4 x i8> %10, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_R8G8B8X8_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $16777215, %eax 9: andl (%rsi), %eax 11: movabsq $139845047042064, %rcx 21: orl (%rcx), %eax 23: movl %eax, (%rdi) 25: popq %rbp 26: ret define void @fetch_b4g4r4x4_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_R8G8B8X8_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: pextrd $1, %xmm0, %ecx 28: shrl $4, %ecx 31: pextrd $3, %xmm0, %edx 37: shrl %cl, %edx 39: pinsrd $1, %ecx, %xmm0 45: movabsq $139845047042064, %rcx 55: movaps (%rcx), %xmm1 58: shrl $8, %eax 61: movabsq $139845047042080, %rcx 71: movabsq $139845047042096, %rsi 81: pinsrd $2, %eax, %xmm0 87: pinsrd $3, %edx, %xmm0 93: pand (%rsi), %xmm0 97: cvtdq2ps %xmm0, %xmm0 100: mulps (%rcx), %xmm0 103: shufps $1, %xmm0, %xmm1 107: shufps $38, %xmm1, %xmm0 111: movaps %xmm0, (%rdi) 114: popq %rbp 115: ret define void @fetch_b4g4r4x4_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = lshr i32 %32, 16 %34 = and i32 %33, 255 %35 = or i32 %34, bitcast (<4 x i8> to i32) %36 = and i32 %32, 65280 %37 = or i32 %35, %36 %38 = shl i32 %32, 16 %39 = and i32 %38, 16711680 %40 = or i32 %37, %39 %41 = bitcast i32 %40 to <4 x i8> store <4 x i8> %41, <4 x i8>* %0, align 4 ret void } Testing PIPE_FORMAT_B4G4R4X4_UNORM (float) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: pshufd $0, %xmm0, %xmm0 19: pextrd $2, %xmm0, %eax 25: pextrd $1, %xmm0, %ecx 31: shrl $4, %ecx 34: pextrd $3, %xmm0, %edx 40: shrl %cl, %edx 42: pinsrd $1, %ecx, %xmm0 48: shrl $8, %eax 51: movabsq $139845047042064, %rcx 61: movabsq $139845047042080, %rsi 71: movabsq $139845047042096, %r8 81: movabsq $139845047042112, %r9 91: movabsq $139845047042128, %r10 101: movabsq $139845047042144, %r11 111: movabsq $139845047042160, %rbx 121: xorps %xmm1, %xmm1 124: movabsq $139845047042176, %r14 134: pinsrd $2, %eax, %xmm0 140: movabsq $139845047042192, %rax 150: pinsrd $3, %edx, %xmm0 156: pand (%rax), %xmm0 160: cvtdq2ps %xmm0, %xmm0 163: mulps (%r14), %xmm0 167: maxps %xmm1, %xmm0 170: minps (%rbx), %xmm0 173: mulps (%r11), %xmm0 177: addps (%r10), %xmm0 181: andps (%r9), %xmm0 185: pshufd $1, %xmm0, %xmm1 190: pshufd $3, %xmm0, %xmm2 195: movaps %xmm0, %xmm3 198: movhlps %xmm3, %xmm3 201: punpcklwd %xmm2, %xmm3 205: pshufb (%r8), %xmm3 211: punpcklwd %xmm1, %xmm0 215: pshufb (%rsi), %xmm0 220: por %xmm3, %xmm0 224: movd %xmm0, %eax 228: movl %eax, %edx 230: shrl $16, %edx 233: movzbl %dl, %edx 236: orl (%rcx), %edx 238: movl %eax, %ecx 240: andl $65280, %ecx 246: orl %edx, %ecx 248: shll $16, %eax 251: andl $16711680, %eax 257: orl %ecx, %eax 259: movl %eax, (%rdi) 261: popq %rbx 262: popq %r14 264: popq %rbp 265: ret define void @fetch_b2g3r3_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } Testing PIPE_FORMAT_B4G4R4X4_UNORM (unorm8) ... 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: pextrd $1, %xmm0, %ecx 28: shrl $2, %ecx 31: pextrd $3, %xmm0, %edx 37: shrl %cl, %edx 39: pinsrd $1, %ecx, %xmm0 45: movabsq $139845047042064, %rcx 55: movaps (%rcx), %xmm1 58: shrl $5, %eax 61: movabsq $139845047042080, %rcx 71: movabsq $139845047042096, %rsi 81: pinsrd $2, %eax, %xmm0 87: pinsrd $3, %edx, %xmm0 93: pand (%rsi), %xmm0 97: cvtdq2ps %xmm0, %xmm0 100: mulps (%rcx), %xmm0 103: shufps $1, %xmm0, %xmm1 107: shufps $38, %xmm1, %xmm0 111: movaps %xmm0, (%rdi) 114: popq %rbp 115: ret define void @fetch_b2g3r3_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = lshr i32 %31, 16 %33 = and i32 %32, 255 %34 = or i32 %33, bitcast (<4 x i8> to i32) %35 = and i32 %31, 65280 %36 = or i32 %34, %35 %37 = shl i32 %31, 16 %38 = and i32 %37, 16711680 %39 = or i32 %36, %38 %40 = bitcast i32 %39 to <4 x i8> store <4 x i8> %40, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzbl (%rsi), %eax 10: movd %eax, %xmm0 14: pshufd $0, %xmm0, %xmm0 19: pextrd $2, %xmm0, %eax 25: pextrd $1, %xmm0, %ecx 31: shrl $2, %ecx 34: pextrd $3, %xmm0, %edx 40: shrl %cl, %edx 42: pinsrd $1, %ecx, %xmm0 48: shrl $5, %eax 51: movabsq $139845047042064, %rcx 61: movabsq $139845047042080, %rsi 71: movabsq $139845047042096, %r8 81: movabsq $139845047042112, %r9 91: movabsq $139845047042128, %r10 101: movabsq $139845047042144, %r11 111: movabsq $139845047042160, %rbx 121: xorps %xmm1, %xmm1 124: movabsq $139845047042176, %r14 134: pinsrd $2, %eax, %xmm0 140: movabsq $139845047042192, %rax 150: pinsrd $3, %edx, %xmm0 156: pand (%rax), %xmm0 160: cvtdq2ps %xmm0, %xmm0 163: mulps (%r14), %xmm0 167: maxps %xmm1, %xmm0 170: minps (%rbx), %xmm0 173: mulps (%r11), %xmm0 177: addps (%r10), %xmm0 181: andps (%r9), %xmm0 185: pshufd $1, %xmm0, %xmm1 190: pshufd $3, %xmm0, %xmm2 195: movaps %xmm0, %xmm3 198: movhlps %xmm3, %xmm3 201: punpcklwd %xmm2, %xmm3 205: pshufb (%r8), %xmm3 211: punpcklwd %xmm1, %xmm0 215: pshufb (%rsi), %xmm0 220: por %xmm3, %xmm0 224: movd %xmm0, %eax 228: movl %eax, %edx 230: shrl $16, %edx 233: movzbl %dl, %edx 236: orl (%rcx), %edx 238: movl %eax, %ecx 240: andl $65280, %ecx 246: orl %edx, %ecx 248: shll $16, %eax 251: andl $16711680, %eax 257: orl %ecx, %eax 259: movl %eax, (%rdi) 261: popq %rbx 262: popq %r14 264: popq %rbp 265: ret define void @fetch_l16a16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $2, %xmm0, %eax 19: shrl %cl, %eax 21: pextrd $3, %xmm0, %ecx 27: pextrd $1, %xmm0, %edx 33: shrl $16, %edx 36: pinsrd $1, %edx, %xmm0 42: pinsrd $2, %eax, %xmm0 48: shrl %cl, %ecx 50: movabsq $139845047042064, %rax 60: movabsq $139845047042080, %rdx 70: pinsrd $3, %ecx, %xmm0 76: pand (%rdx), %xmm0 80: cvtdq2ps %xmm0, %xmm0 83: mulps (%rax), %xmm0 86: pshufd $64, %xmm0, %xmm0 91: movdqa %xmm0, (%rdi) 95: popq %rbp 96: ret define void @fetch_l16a16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 255 %33 = shl i32 %31, 8 %34 = and i32 %33, 65280 %35 = or i32 %32, %34 %36 = shl i32 %31, 16 %37 = or i32 %35, %36 %38 = bitcast i32 %37 to <4 x i8> store <4 x i8> %38, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movd (%rsi), %xmm0 9: pshufd $0, %xmm0, %xmm0 14: pextrd $2, %xmm0, %eax 20: shrl %cl, %eax 22: pextrd $3, %xmm0, %ecx 28: pextrd $1, %xmm0, %edx 34: shrl $16, %edx 37: pinsrd $1, %edx, %xmm0 43: pinsrd $2, %eax, %xmm0 49: shrl %cl, %ecx 51: movabsq $139845047042064, %rax 61: movabsq $139845047042080, %rdx 71: movabsq $139845047042096, %rsi 81: movabsq $139845047042112, %r8 91: movabsq $139845047042128, %r9 101: movabsq $139845047042144, %r10 111: xorps %xmm1, %xmm1 114: movabsq $139845047042160, %r11 124: movabsq $139845047042176, %rbx 134: pinsrd $3, %ecx, %xmm0 140: pand (%rbx), %xmm0 144: cvtdq2ps %xmm0, %xmm0 147: mulps (%r11), %xmm0 151: maxps %xmm1, %xmm0 154: minps (%r10), %xmm0 158: mulps (%r9), %xmm0 162: addps (%r8), %xmm0 166: andps (%rsi), %xmm0 169: pshufd $1, %xmm0, %xmm1 174: pshufd $3, %xmm0, %xmm2 179: movaps %xmm0, %xmm3 182: movhlps %xmm3, %xmm3 185: punpcklwd %xmm2, %xmm3 189: pshufb (%rdx), %xmm3 194: punpcklwd %xmm1, %xmm0 198: pshufb (%rax), %xmm0 203: por %xmm3, %xmm0 207: movd %xmm0, %eax 211: movzbl %al, %ecx 214: movl %eax, %edx 216: shll $8, %edx 219: movzwl %dx, %edx 222: orl %ecx, %edx 224: shll $16, %eax 227: orl %edx, %eax 229: movl %eax, (%rdi) 231: popq %rbx 232: popq %rbp 233: ret define void @fetch_a16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movss (%rax), %xmm1 51: mulps %xmm0, %xmm1 54: pslldq $12, %xmm1 59: movdqa %xmm1, (%rdi) 63: popq %rbp 64: ret define void @fetch_a16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = bitcast <4 x i8> %31 to i32 %33 = shl i32 %32, 24 %34 = bitcast i32 %33 to <4 x i8> store <4 x i8> %34, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: movabsq $139845047042064, %rax 44: movss (%rax), %xmm0 48: cvtdq2ps %xmm1, %xmm1 51: movabsq $139845047042080, %rax 61: movabsq $139845047042096, %rcx 71: movabsq $139845047042112, %rdx 81: movabsq $139845047042128, %rsi 91: movabsq $139845047042144, %r8 101: movabsq $139845047042160, %r9 111: xorps %xmm2, %xmm2 114: mulps %xmm1, %xmm0 117: maxps %xmm2, %xmm0 120: minps (%r9), %xmm0 124: mulps (%r8), %xmm0 128: addps (%rsi), %xmm0 131: andps (%rdx), %xmm0 134: pshufd $1, %xmm0, %xmm1 139: pshufd $3, %xmm0, %xmm2 144: movaps %xmm0, %xmm3 147: movhlps %xmm3, %xmm3 150: punpcklwd %xmm2, %xmm3 154: pshufb (%rcx), %xmm3 159: punpcklwd %xmm1, %xmm0 163: pshufb (%rax), %xmm0 168: por %xmm3, %xmm0 172: movd %xmm0, %eax 176: shll $24, %eax 179: movl %eax, (%rdi) 181: popq %rbp 182: ret define void @fetch_i16_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %13, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: cvtdq2ps %xmm1, %xmm0 37: movabsq $139845047042064, %rax 47: movss (%rax), %xmm1 51: mulps %xmm0, %xmm1 54: pshufd $0, %xmm1, %xmm0 59: movdqa %xmm0, (%rdi) 63: popq %rbp 64: ret define void @fetch_i16_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %12, <4 x float> zeroinitializer) %14 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %13, <4 x float> ) %15 = fmul <4 x float> %14, %16 = fadd <4 x float> %15, %17 = bitcast <4 x float> %16 to <4 x i32> %18 = and <4 x i32> %17, %19 = extractelement <4 x i32> %18, i32 0 %20 = extractelement <4 x i32> %18, i32 1 %21 = extractelement <4 x i32> %18, i32 2 %22 = extractelement <4 x i32> %18, i32 3 %23 = bitcast i32 %19 to <2 x i16> %24 = bitcast i32 %20 to <2 x i16> %25 = shufflevector <2 x i16> %23, <2 x i16> %24, <2 x i32> %26 = bitcast i32 %21 to <2 x i16> %27 = bitcast i32 %22 to <2 x i16> %28 = shufflevector <2 x i16> %26, <2 x i16> %27, <2 x i32> %29 = bitcast <2 x i16> %25 to <4 x i8> %30 = bitcast <2 x i16> %28 to <4 x i8> %31 = shufflevector <4 x i8> %29, <4 x i8> %30, <4 x i32> %32 = and <4 x i8> %31, %33 = bitcast <4 x i8> %32 to i32 %34 = shl i32 %33, 8 %35 = or i32 %33, %34 %36 = shl i32 %35, 16 %37 = or i32 %35, %36 %38 = bitcast i32 %37 to <4 x i8> store <4 x i8> %38, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl $65535, %eax 9: movd %eax, %xmm0 13: movzwl (%rsi), %eax 16: movd %eax, %xmm1 20: pshufd $0, %xmm1, %xmm1 25: psrld $0, %xmm1 30: pand %xmm0, %xmm1 34: movabsq $139845047042064, %rax 44: movss (%rax), %xmm0 48: movl $255, %eax 53: cvtdq2ps %xmm1, %xmm1 56: movd %eax, %xmm2 60: movabsq $139845047042080, %rax 70: movabsq $139845047042096, %rcx 80: movabsq $139845047042112, %rdx 90: movabsq $139845047042128, %rsi 100: movabsq $139845047042144, %r8 110: movabsq $139845047042160, %r9 120: xorps %xmm3, %xmm3 123: mulps %xmm1, %xmm0 126: maxps %xmm3, %xmm0 129: minps (%r9), %xmm0 133: mulps (%r8), %xmm0 137: addps (%rsi), %xmm0 140: andps (%rdx), %xmm0 143: pshufd $1, %xmm0, %xmm1 148: pshufd $3, %xmm0, %xmm3 153: movaps %xmm0, %xmm4 156: movhlps %xmm4, %xmm4 159: punpcklwd %xmm3, %xmm4 163: pshufb (%rcx), %xmm4 168: punpcklwd %xmm1, %xmm0 172: pshufb (%rax), %xmm0 177: por %xmm4, %xmm0 181: pand %xmm2, %xmm0 185: movd %xmm0, %eax 189: movl %eax, %ecx 191: shll $8, %ecx 194: orl %eax, %ecx 196: movl %ecx, %eax 198: shll $16, %eax 201: orl %ecx, %eax 203: movl %eax, (%rdi) 205: popq %rbp 206: ret define void @fetch_latc1_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4674735 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4674735, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_latc1_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4674102 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4674102, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_latc1_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4675472 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4675472, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_latc1_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4674870 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4674870, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_latc2_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4676333 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4676333, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_latc2_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4675607 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4675607, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_latc2_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4677181 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4677181, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_latc2_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca i32, align 4 store i32 0, i32* %4, align 4 %5 = bitcast i32* %4 to i8* call void inttoptr (i64 4676520 to void (i8*, i8*, i32, i32)*)(i8* %5, i8* %1, i32 %2, i32 %3) %6 = load i32* %4, align 4 %7 = bitcast i32 %6 to <4 x i8> store <4 x i8> %7, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: pushq %rax 6: movq %rdi, %rbx 9: movl $0, -12(%rbp) 16: leaq -12(%rbp), %rdi 20: movl $4676520, %eax 25: callq *%rax 27: movl -12(%rbp), %eax 30: movl %eax, (%rbx) 32: addq $8, %rsp 36: popq %rbx 37: popq %rbp 38: ret define void @fetch_a8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4438796 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4438796, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_a8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4438796 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4438796, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_l8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = sext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = insertelement <4 x i32> %6, i32 %5, i32 1 %8 = insertelement <4 x i32> %7, i32 %5, i32 2 %9 = insertelement <4 x i32> %8, i32 %5, i32 3 %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movsbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: cvtdq2ps %xmm0, %xmm0 19: movabsq $139845047042064, %rax 29: mulps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: movaps (%rax), %xmm1 45: shufps $1, %xmm0, %xmm1 49: shufps $32, %xmm1, %xmm0 53: movaps %xmm0, (%rdi) 56: popq %rbp 57: ret define void @fetch_l8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> undef, i8 %4, i32 0 %6 = shufflevector <4 x i8> %5, <4 x i8> undef, <4 x i32> zeroinitializer %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shl <4 x i8> %9, %11 = bitcast <4 x i8> %10 to i32 %12 = and i32 %11, 255 %13 = or i32 %12, bitcast (<4 x i8> to i32) %14 = shl i32 %11, 8 %15 = and i32 %14, 65280 %16 = or i32 %13, %15 %17 = shl i32 %11, 16 %18 = and i32 %17, 16711680 %19 = or i32 %16, %18 %20 = bitcast i32 %19 to <4 x i8> store <4 x i8> %20, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklbw %xmm0, %xmm0 15: punpcklbw %xmm0, %xmm0 19: pshufd $0, %xmm0, %xmm0 24: pxor %xmm1, %xmm1 28: pmaxsb %xmm0, %xmm1 33: movabsq $139845047042064, %rax 43: movdqa (%rax), %xmm0 47: movabsq $139845047042080, %rax 57: movabsq $139845047042096, %rcx 67: movabsq $139845047042112, %rdx 77: movdqa (%rcx), %xmm2 81: pand %xmm1, %xmm2 85: psllw $4, %xmm2 90: psllw $5, %xmm0 95: pblendvb %xmm0, %xmm2, %xmm1 100: movdqa (%rax), %xmm2 104: pand %xmm1, %xmm2 108: psllw $2, %xmm2 113: paddb %xmm0, %xmm0 117: pblendvb %xmm0, %xmm2, %xmm1 122: movdqa %xmm1, %xmm2 126: paddb %xmm2, %xmm2 130: paddb %xmm0, %xmm0 134: pblendvb %xmm0, %xmm2, %xmm1 139: movd %xmm1, %eax 143: movzbl %al, %ecx 146: orl (%rdx), %ecx 148: movl %eax, %edx 150: shll $8, %edx 153: movzwl %dx, %edx 156: orl %ecx, %edx 158: shll $16, %eax 161: andl $16711680, %eax 167: orl %edx, %eax 169: movl %eax, (%rdi) 171: popq %rbp 172: ret define void @fetch_l8a8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4441027 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4441027, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_l8a8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4441027 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4441027, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_i8_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = sext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = insertelement <4 x i32> %6, i32 %5, i32 1 %8 = insertelement <4 x i32> %7, i32 %5, i32 2 %9 = insertelement <4 x i32> %8, i32 %5, i32 3 %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %12, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movsbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: cvtdq2ps %xmm0, %xmm0 19: movabsq $139845047042064, %rax 29: mulps (%rax), %xmm0 32: pshufd $0, %xmm0, %xmm0 37: movdqa %xmm0, (%rdi) 41: popq %rbp 42: ret define void @fetch_i8_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = insertelement <4 x i8> undef, i8 %4, i32 0 %6 = shufflevector <4 x i8> %5, <4 x i8> undef, <4 x i32> zeroinitializer %7 = shufflevector <4 x i8> %6, <4 x i8> %6, <16 x i32> %8 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %7, <16 x i8> ) %9 = shufflevector <16 x i8> %8, <16 x i8> %8, <4 x i32> %10 = shl <4 x i8> %9, %11 = and <4 x i8> %10, %12 = bitcast <4 x i8> %11 to i32 %13 = shl i32 %12, 8 %14 = or i32 %12, %13 %15 = shl i32 %14, 16 %16 = or i32 %14, %15 %17 = bitcast i32 %16 to <4 x i8> store <4 x i8> %17, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklbw %xmm0, %xmm0 15: punpcklbw %xmm0, %xmm0 19: pshufd $0, %xmm0, %xmm0 24: pxor %xmm1, %xmm1 28: pmaxsb %xmm0, %xmm1 33: movabsq $139845047042064, %rax 43: movdqa (%rax), %xmm0 47: movabsq $139845047042080, %rax 57: movabsq $139845047042096, %rcx 67: movl $255, %edx 72: movd %edx, %xmm2 76: movdqa (%rcx), %xmm3 80: pand %xmm1, %xmm3 84: psllw $4, %xmm3 89: psllw $5, %xmm0 94: pblendvb %xmm0, %xmm3, %xmm1 99: movdqa (%rax), %xmm3 103: pand %xmm1, %xmm3 107: psllw $2, %xmm3 112: paddb %xmm0, %xmm0 116: pblendvb %xmm0, %xmm3, %xmm1 121: movdqa %xmm1, %xmm3 125: paddb %xmm3, %xmm3 129: paddb %xmm0, %xmm0 133: pblendvb %xmm0, %xmm3, %xmm1 138: pand %xmm2, %xmm1 142: movd %xmm1, %eax 146: movl %eax, %ecx 148: shll $8, %ecx 151: orl %eax, %ecx 153: movl %ecx, %eax 155: shll $16, %eax 158: orl %ecx, %eax 160: movl %eax, (%rdi) 162: popq %rbp 163: ret define void @fetch_a16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4443430 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4443430, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_a16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4443430 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4443430, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_l16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = sext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = insertelement <4 x i32> %7, i32 %6, i32 1 %9 = insertelement <4 x i32> %8, i32 %6, i32 2 %10 = insertelement <4 x i32> %9, i32 %6, i32 3 %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movswl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: cvtdq2ps %xmm0, %xmm0 19: movabsq $139845047042064, %rax 29: mulps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: movaps (%rax), %xmm1 45: shufps $1, %xmm0, %xmm1 49: shufps $32, %xmm1, %xmm0 53: movaps %xmm0, (%rdi) 56: popq %rbp 57: ret define void @fetch_l16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = shufflevector <4 x i16> %7, <4 x i16> %7, <8 x i32> %9 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %8, <8 x i16> ) %10 = shufflevector <8 x i16> %9, <8 x i16> %9, <4 x i32> %11 = ashr <4 x i16> %10, %12 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %13 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = bitcast <2 x i16> %13 to <4 x i8> %16 = shufflevector <4 x i8> %14, <4 x i8> %15, <4 x i32> %17 = bitcast <4 x i8> %16 to i32 %18 = and i32 %17, 255 %19 = or i32 %18, bitcast (<4 x i8> to i32) %20 = shl i32 %17, 8 %21 = and i32 %20, 65280 %22 = or i32 %19, %21 %23 = shl i32 %17, 16 %24 = and i32 %23, 16711680 %25 = or i32 %22, %24 %26 = bitcast i32 %25 to <4 x i8> store <4 x i8> %26, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklwd %xmm0, %xmm0 15: pshufd $0, %xmm0, %xmm0 20: pshufd $68, %xmm0, %xmm0 25: pxor %xmm1, %xmm1 29: pmaxsw %xmm0, %xmm1 33: movabsq $139845047042064, %rax 43: movabsq $139845047042080, %rcx 53: movabsq $139845047042096, %rdx 63: psraw $7, %xmm1 68: pshufd $1, %xmm1, %xmm0 73: pshufb (%rdx), %xmm1 78: pshufb (%rcx), %xmm0 83: por %xmm1, %xmm0 87: movd %xmm0, %ecx 91: movzbl %cl, %edx 94: orl (%rax), %edx 96: movl %ecx, %eax 98: shll $8, %eax 101: movzwl %ax, %eax 104: orl %edx, %eax 106: shll $16, %ecx 109: andl $16711680, %ecx 115: orl %eax, %ecx 117: movl %ecx, (%rdi) 119: popq %rbp 120: ret define void @fetch_l16a16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4445601 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4445601, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_l16a16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4445601 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4445601, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_i16_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = sext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = insertelement <4 x i32> %7, i32 %6, i32 1 %9 = insertelement <4 x i32> %8, i32 %6, i32 2 %10 = insertelement <4 x i32> %9, i32 %6, i32 3 %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %13, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movswl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: cvtdq2ps %xmm0, %xmm0 19: movabsq $139845047042064, %rax 29: mulps (%rax), %xmm0 32: pshufd $0, %xmm0, %xmm0 37: movdqa %xmm0, (%rdi) 41: popq %rbp 42: ret define void @fetch_i16_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = shufflevector <4 x i16> %7, <4 x i16> %7, <8 x i32> %9 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %8, <8 x i16> ) %10 = shufflevector <8 x i16> %9, <8 x i16> %9, <4 x i32> %11 = ashr <4 x i16> %10, %12 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %13 = shufflevector <4 x i16> %11, <4 x i16> %11, <2 x i32> %14 = bitcast <2 x i16> %12 to <4 x i8> %15 = bitcast <2 x i16> %13 to <4 x i8> %16 = shufflevector <4 x i8> %14, <4 x i8> %15, <4 x i32> %17 = and <4 x i8> %16, %18 = bitcast <4 x i8> %17 to i32 %19 = shl i32 %18, 8 %20 = or i32 %18, %19 %21 = shl i32 %20, 16 %22 = or i32 %20, %21 %23 = bitcast i32 %22 to <4 x i8> store <4 x i8> %23, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklwd %xmm0, %xmm0 15: pshufd $0, %xmm0, %xmm0 20: pshufd $68, %xmm0, %xmm0 25: pxor %xmm1, %xmm1 29: pmaxsw %xmm0, %xmm1 33: movl $255, %eax 38: movd %eax, %xmm0 42: movabsq $139845047042064, %rax 52: movabsq $139845047042080, %rcx 62: psraw $7, %xmm1 67: pshufd $1, %xmm1, %xmm2 72: pshufb (%rcx), %xmm1 77: pshufb (%rax), %xmm2 82: por %xmm1, %xmm2 86: pand %xmm0, %xmm2 90: movd %xmm2, %eax 94: movl %eax, %ecx 96: shll $8, %ecx 99: orl %eax, %ecx 101: movl %ecx, %eax 103: shll $16, %eax 106: orl %ecx, %eax 108: movl %eax, (%rdi) 110: popq %rbp 111: ret define void @fetch_a16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4447731 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4447731, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_a16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4447731 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4447731, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_l16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = zext <4 x i16> %7 to <4 x i32> %9 = and <4 x i32> %8, %10 = shl <4 x i32> %9, %11 = bitcast <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = icmp ugt <4 x i32> %9, %15 = sext <4 x i1> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = xor <4 x i32> %8, %9 %18 = shl <4 x i32> %17, %19 = or <4 x i32> %18, %16 %20 = or <4 x i32> %13, %19 %21 = bitcast <4 x i32> %20 to <4 x float> %22 = shufflevector <4 x float> %21, <4 x float> , <4 x i32> store <4 x float> %22, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklwd %xmm0, %xmm0 15: pshufd $0, %xmm0, %xmm0 20: extractps $0, %xmm0, %eax 26: movl $2139095040, %ecx 31: movabsq $139845047042064, %rdx 41: movabsq $139845047042080, %rsi 51: pextrw $1, %xmm0, %r8d 57: movdqa (%rsi), %xmm1 61: movaps (%rdx), %xmm2 64: movd %ecx, %xmm3 68: movabsq $139845047042096, %rcx 78: movabsq $139845047042112, %rdx 88: movabsq $139845047042128, %rsi 98: movzwl %ax, %eax 101: movd %eax, %xmm4 105: pinsrd $1, %r8d, %xmm4 112: pextrw $2, %xmm0, %eax 117: pinsrd $2, %eax, %xmm4 123: pextrw $3, %xmm0, %eax 128: pinsrd $3, %eax, %xmm4 134: movdqa (%rsi), %xmm0 138: pand %xmm4, %xmm0 142: pxor %xmm0, %xmm4 146: movdqa %xmm0, %xmm5 150: pxor %xmm1, %xmm5 154: pxor (%rdx), %xmm1 158: pcmpgtd %xmm1, %xmm5 162: pand %xmm3, %xmm5 166: pslld $16, %xmm4 171: por %xmm5, %xmm4 175: pslld $13, %xmm0 180: mulps (%rcx), %xmm0 183: orps %xmm4, %xmm0 186: shufps $1, %xmm0, %xmm2 190: shufps $32, %xmm2, %xmm0 194: movaps %xmm0, (%rdi) 197: popq %rbp 198: ret define void @fetch_l16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = zext <4 x i16> %7 to <4 x i32> %9 = and <4 x i32> %8, %10 = shl <4 x i32> %9, %11 = bitcast <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = icmp ugt <4 x i32> %9, %15 = sext <4 x i1> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = xor <4 x i32> %8, %9 %18 = shl <4 x i32> %17, %19 = or <4 x i32> %18, %16 %20 = or <4 x i32> %13, %19 %21 = bitcast <4 x i32> %20 to <4 x float> %22 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %21, <4 x float> zeroinitializer) %23 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %22, <4 x float> ) %24 = fmul <4 x float> %23, %25 = fadd <4 x float> %24, %26 = bitcast <4 x float> %25 to <4 x i32> %27 = and <4 x i32> %26, %28 = extractelement <4 x i32> %27, i32 0 %29 = extractelement <4 x i32> %27, i32 1 %30 = extractelement <4 x i32> %27, i32 2 %31 = extractelement <4 x i32> %27, i32 3 %32 = bitcast i32 %28 to <2 x i16> %33 = bitcast i32 %29 to <2 x i16> %34 = shufflevector <2 x i16> %32, <2 x i16> %33, <2 x i32> %35 = bitcast i32 %30 to <2 x i16> %36 = bitcast i32 %31 to <2 x i16> %37 = shufflevector <2 x i16> %35, <2 x i16> %36, <2 x i32> %38 = bitcast <2 x i16> %34 to <4 x i8> %39 = bitcast <2 x i16> %37 to <4 x i8> %40 = shufflevector <4 x i8> %38, <4 x i8> %39, <4 x i32> %41 = bitcast <4 x i8> %40 to i32 %42 = and i32 %41, 255 %43 = or i32 %42, bitcast (<4 x i8> to i32) %44 = shl i32 %41, 8 %45 = and i32 %44, 65280 %46 = or i32 %43, %45 %47 = shl i32 %41, 16 %48 = and i32 %47, 16711680 %49 = or i32 %46, %48 %50 = bitcast i32 %49 to <4 x i8> store <4 x i8> %50, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: punpcklwd %xmm0, %xmm0 18: pshufd $0, %xmm0, %xmm0 23: extractps $0, %xmm0, %eax 29: movabsq $139845047042064, %rcx 39: pextrw $3, %xmm0, %edx 44: pextrw $2, %xmm0, %esi 49: pextrw $1, %xmm0, %r8d 55: movabsq $139845047042080, %r9 65: movabsq $139845047042096, %r10 75: movabsq $139845047042112, %r11 85: movabsq $139845047042128, %rbx 95: xorps %xmm0, %xmm0 98: movzwl %ax, %eax 101: movabsq $139845047042144, %r14 111: movdqa (%rcx), %xmm1 115: movabsq $139845047042160, %rcx 125: movd %eax, %xmm2 129: movabsq $139845047042176, %rax 139: pinsrd $1, %r8d, %xmm2 146: movabsq $139845047042192, %r8 156: pinsrd $2, %esi, %xmm2 162: movabsq $139845047042208, %rsi 172: pinsrd $3, %edx, %xmm2 178: movabsq $139845047042224, %rdx 188: movdqa (%rdx), %xmm3 192: pand %xmm2, %xmm3 196: pxor %xmm3, %xmm2 200: movdqa %xmm3, %xmm4 204: pxor %xmm1, %xmm4 208: movabsq $139845047042240, %rdx 218: pxor (%rdx), %xmm1 222: pcmpgtd %xmm1, %xmm4 226: pand (%rsi), %xmm4 230: pslld $16, %xmm2 235: por %xmm4, %xmm2 239: pslld $13, %xmm3 244: mulps (%r8), %xmm3 248: orps %xmm2, %xmm3 251: maxps %xmm0, %xmm3 254: minps (%rax), %xmm3 257: mulps (%rcx), %xmm3 260: addps (%r14), %xmm3 264: andps (%rbx), %xmm3 267: pshufd $1, %xmm3, %xmm0 272: pshufd $3, %xmm3, %xmm1 277: movaps %xmm3, %xmm2 280: movhlps %xmm2, %xmm2 283: punpcklwd %xmm1, %xmm2 287: pshufb (%r11), %xmm2 293: punpcklwd %xmm0, %xmm3 297: pshufb (%r10), %xmm3 303: por %xmm2, %xmm3 307: movd %xmm3, %eax 311: movzbl %al, %ecx 314: orl (%r9), %ecx 317: movl %eax, %edx 319: shll $8, %edx 322: movzwl %dx, %edx 325: orl %ecx, %edx 327: shll $16, %eax 330: andl $16711680, %eax 336: orl %edx, %eax 338: movl %eax, (%rdi) 340: popq %rbx 341: popq %r14 343: popq %rbp 344: ret define void @fetch_l16a16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4449543 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4449543, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_l16a16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4449543 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4449543, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_i16_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = zext <4 x i16> %7 to <4 x i32> %9 = and <4 x i32> %8, %10 = shl <4 x i32> %9, %11 = bitcast <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = icmp ugt <4 x i32> %9, %15 = sext <4 x i1> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = xor <4 x i32> %8, %9 %18 = shl <4 x i32> %17, %19 = or <4 x i32> %18, %16 %20 = or <4 x i32> %13, %19 %21 = bitcast <4 x i32> %20 to <4 x float> %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %22, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: punpcklwd %xmm0, %xmm0 15: pshufd $0, %xmm0, %xmm0 20: extractps $0, %xmm0, %eax 26: pextrw $1, %xmm0, %ecx 31: movabsq $139845047042064, %rdx 41: movl $2139095040, %esi 46: movd %esi, %xmm1 50: movdqa (%rdx), %xmm2 54: movabsq $139845047042080, %rdx 64: movabsq $139845047042096, %rsi 74: movabsq $139845047042112, %r8 84: movzwl %ax, %eax 87: movd %eax, %xmm3 91: pinsrd $1, %ecx, %xmm3 97: pextrw $2, %xmm0, %eax 102: pinsrd $2, %eax, %xmm3 108: pextrw $3, %xmm0, %eax 113: pinsrd $3, %eax, %xmm3 119: movdqa (%r8), %xmm0 124: pand %xmm3, %xmm0 128: pxor %xmm0, %xmm3 132: movdqa %xmm0, %xmm4 136: pxor %xmm2, %xmm4 140: pxor (%rsi), %xmm2 144: pcmpgtd %xmm2, %xmm4 148: pand %xmm1, %xmm4 152: pslld $16, %xmm3 157: por %xmm4, %xmm3 161: pslld $13, %xmm0 166: mulps (%rdx), %xmm0 169: orps %xmm3, %xmm0 172: pshufd $0, %xmm0, %xmm0 177: movdqa %xmm0, (%rdi) 181: popq %rbp 182: ret define void @fetch_i16_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = insertelement <4 x i16> undef, i16 %5, i32 0 %7 = shufflevector <4 x i16> %6, <4 x i16> undef, <4 x i32> zeroinitializer %8 = zext <4 x i16> %7 to <4 x i32> %9 = and <4 x i32> %8, %10 = shl <4 x i32> %9, %11 = bitcast <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = bitcast <4 x float> %12 to <4 x i32> %14 = icmp ugt <4 x i32> %9, %15 = sext <4 x i1> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = xor <4 x i32> %8, %9 %18 = shl <4 x i32> %17, %19 = or <4 x i32> %18, %16 %20 = or <4 x i32> %13, %19 %21 = bitcast <4 x i32> %20 to <4 x float> %22 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %21, <4 x float> zeroinitializer) %23 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %22, <4 x float> ) %24 = fmul <4 x float> %23, %25 = fadd <4 x float> %24, %26 = bitcast <4 x float> %25 to <4 x i32> %27 = and <4 x i32> %26, %28 = extractelement <4 x i32> %27, i32 0 %29 = extractelement <4 x i32> %27, i32 1 %30 = extractelement <4 x i32> %27, i32 2 %31 = extractelement <4 x i32> %27, i32 3 %32 = bitcast i32 %28 to <2 x i16> %33 = bitcast i32 %29 to <2 x i16> %34 = shufflevector <2 x i16> %32, <2 x i16> %33, <2 x i32> %35 = bitcast i32 %30 to <2 x i16> %36 = bitcast i32 %31 to <2 x i16> %37 = shufflevector <2 x i16> %35, <2 x i16> %36, <2 x i32> %38 = bitcast <2 x i16> %34 to <4 x i8> %39 = bitcast <2 x i16> %37 to <4 x i8> %40 = shufflevector <4 x i8> %38, <4 x i8> %39, <4 x i32> %41 = and <4 x i8> %40, %42 = bitcast <4 x i8> %41 to i32 %43 = shl i32 %42, 8 %44 = or i32 %42, %43 %45 = shl i32 %44, 16 %46 = or i32 %44, %45 %47 = bitcast i32 %46 to <4 x i8> store <4 x i8> %47, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %r14 6: pushq %rbx 7: movzwl (%rsi), %eax 10: movd %eax, %xmm0 14: punpcklwd %xmm0, %xmm0 18: pshufd $0, %xmm0, %xmm0 23: extractps $0, %xmm0, %eax 29: movl $255, %ecx 34: pextrw $1, %xmm0, %edx 39: movabsq $139845047042064, %rsi 49: pextrw $3, %xmm0, %r8d 55: pextrw $2, %xmm0, %r9d 61: movd %ecx, %xmm0 65: movabsq $139845047042080, %rcx 75: movabsq $139845047042096, %r10 85: movabsq $139845047042112, %r11 95: movabsq $139845047042128, %rbx 105: xorps %xmm1, %xmm1 108: movzwl %ax, %eax 111: movabsq $139845047042144, %r14 121: movdqa (%rsi), %xmm2 125: movabsq $139845047042160, %rsi 135: movd %eax, %xmm3 139: movabsq $139845047042176, %rax 149: pinsrd $1, %edx, %xmm3 155: movabsq $139845047042192, %rdx 165: pinsrd $2, %r9d, %xmm3 172: movabsq $139845047042208, %r9 182: pinsrd $3, %r8d, %xmm3 189: movabsq $139845047042224, %r8 199: movdqa (%r8), %xmm4 204: pand %xmm3, %xmm4 208: pxor %xmm4, %xmm3 212: movdqa %xmm4, %xmm5 216: pxor %xmm2, %xmm5 220: pxor (%r9), %xmm2 225: pcmpgtd %xmm2, %xmm5 229: pand (%rdx), %xmm5 233: pslld $16, %xmm3 238: por %xmm5, %xmm3 242: pslld $13, %xmm4 247: mulps (%rax), %xmm4 250: orps %xmm3, %xmm4 253: maxps %xmm1, %xmm4 256: minps (%rsi), %xmm4 259: mulps (%r14), %xmm4 263: addps (%rbx), %xmm4 266: andps (%r11), %xmm4 270: pshufd $1, %xmm4, %xmm1 275: pshufd $3, %xmm4, %xmm2 280: movaps %xmm4, %xmm3 283: movhlps %xmm3, %xmm3 286: punpcklwd %xmm2, %xmm3 290: pshufb (%r10), %xmm3 296: punpcklwd %xmm1, %xmm4 300: pshufb (%rcx), %xmm4 305: por %xmm3, %xmm4 309: pand %xmm0, %xmm4 313: movd %xmm4, %eax 317: movl %eax, %ecx 319: shll $8, %ecx 322: orl %eax, %ecx 324: movl %ecx, %eax 326: shll $16, %eax 329: orl %ecx, %eax 331: movl %eax, (%rdi) 333: popq %rbx 334: popq %r14 336: popq %rbp 337: ret define void @fetch_a32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = zext i32 %5 to i128 %7 = bitcast i128 %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %rax, %xmm0 11: pslldq $12, %xmm0 16: movdqa %xmm0, (%rdi) 20: popq %rbp 21: ret define void @fetch_a32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4451440 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4451440, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_l32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = zext i32 %5 to i128 %7 = bitcast i128 %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> , <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movabsq $139845047042064, %rax 14: movaps (%rax), %xmm0 17: movl (%rsi), %eax 19: movd %rax, %xmm1 24: shufps $1, %xmm1, %xmm0 28: shufps $32, %xmm0, %xmm1 32: movaps %xmm1, (%rdi) 35: popq %rbp 36: ret define void @fetch_l32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to float* %5 = load float* %4, align 4 %6 = insertelement <4 x float> undef, float %5, i32 0 %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer %8 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %7, <4 x float> zeroinitializer) %9 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %8, <4 x float> ) %10 = fmul <4 x float> %9, %11 = fadd <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = and <4 x i32> %12, %14 = extractelement <4 x i32> %13, i32 0 %15 = extractelement <4 x i32> %13, i32 1 %16 = extractelement <4 x i32> %13, i32 2 %17 = extractelement <4 x i32> %13, i32 3 %18 = bitcast i32 %14 to <2 x i16> %19 = bitcast i32 %15 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast i32 %16 to <2 x i16> %22 = bitcast i32 %17 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast <2 x i16> %20 to <4 x i8> %25 = bitcast <2 x i16> %23 to <4 x i8> %26 = shufflevector <4 x i8> %24, <4 x i8> %25, <4 x i32> %27 = bitcast <4 x i8> %26 to i32 %28 = and i32 %27, 255 %29 = or i32 %28, bitcast (<4 x i8> to i32) %30 = shl i32 %27, 8 %31 = and i32 %30, 65280 %32 = or i32 %29, %31 %33 = shl i32 %27, 16 %34 = and i32 %33, 16711680 %35 = or i32 %32, %34 %36 = bitcast i32 %35 to <4 x i8> store <4 x i8> %36, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movss (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: xorps %xmm1, %xmm1 16: maxps %xmm1, %xmm0 19: movabsq $139845047042064, %rax 29: minps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: mulps (%rax), %xmm0 45: movabsq $139845047042096, %rax 55: movabsq $139845047042112, %rcx 65: movabsq $139845047042128, %rdx 75: movabsq $139845047042144, %rsi 85: movabsq $139845047042160, %r8 95: addps (%rax), %xmm0 98: andps (%r8), %xmm0 102: pshufd $1, %xmm0, %xmm1 107: pshufd $3, %xmm0, %xmm2 112: movaps %xmm0, %xmm3 115: movhlps %xmm3, %xmm3 118: punpcklwd %xmm2, %xmm3 122: pshufb (%rsi), %xmm3 127: punpcklwd %xmm1, %xmm0 131: pshufb (%rdx), %xmm0 136: por %xmm3, %xmm0 140: movd %xmm0, %eax 144: movzbl %al, %edx 147: orl (%rcx), %edx 149: movl %eax, %ecx 151: shll $8, %ecx 154: movzwl %cx, %ecx 157: orl %edx, %ecx 159: shll $16, %eax 162: andl $16711680, %eax 168: orl %ecx, %eax 170: movl %eax, (%rdi) 172: popq %rbp 173: ret define void @fetch_l32a32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i64* %5 = load i64* %4, align 8 %6 = zext i64 %5 to i128 %7 = bitcast i128 %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32> store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movq (%rsi), %xmm0 8: pshufd $64, %xmm0, %xmm0 13: movdqa %xmm0, (%rdi) 17: popq %rbp 18: ret define void @fetch_l32a32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4452949 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4452949, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_i32_float_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = zext i32 %5 to i128 %7 = bitcast i128 %6 to <4 x float> %8 = shufflevector <4 x float> %7, <4 x float> undef, <4 x i32> zeroinitializer store <4 x float> %8, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: movd %rax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: movdqa %xmm0, (%rdi) 20: popq %rbp 21: ret define void @fetch_i32_float_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to float* %5 = load float* %4, align 4 %6 = insertelement <4 x float> undef, float %5, i32 0 %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer %8 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %7, <4 x float> zeroinitializer) %9 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %8, <4 x float> ) %10 = fmul <4 x float> %9, %11 = fadd <4 x float> %10, %12 = bitcast <4 x float> %11 to <4 x i32> %13 = and <4 x i32> %12, %14 = extractelement <4 x i32> %13, i32 0 %15 = extractelement <4 x i32> %13, i32 1 %16 = extractelement <4 x i32> %13, i32 2 %17 = extractelement <4 x i32> %13, i32 3 %18 = bitcast i32 %14 to <2 x i16> %19 = bitcast i32 %15 to <2 x i16> %20 = shufflevector <2 x i16> %18, <2 x i16> %19, <2 x i32> %21 = bitcast i32 %16 to <2 x i16> %22 = bitcast i32 %17 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast <2 x i16> %20 to <4 x i8> %25 = bitcast <2 x i16> %23 to <4 x i8> %26 = shufflevector <4 x i8> %24, <4 x i8> %25, <4 x i32> %27 = and <4 x i8> %26, %28 = bitcast <4 x i8> %27 to i32 %29 = shl i32 %28, 8 %30 = or i32 %28, %29 %31 = shl i32 %30, 16 %32 = or i32 %30, %31 %33 = bitcast i32 %32 to <4 x i8> store <4 x i8> %33, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movss (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: xorps %xmm1, %xmm1 16: maxps %xmm1, %xmm0 19: movabsq $139845047042064, %rax 29: minps (%rax), %xmm0 32: movabsq $139845047042080, %rax 42: mulps (%rax), %xmm0 45: movabsq $139845047042096, %rax 55: movl $255, %ecx 60: movd %ecx, %xmm1 64: movabsq $139845047042112, %rcx 74: movabsq $139845047042128, %rdx 84: movabsq $139845047042144, %rsi 94: addps (%rax), %xmm0 97: andps (%rsi), %xmm0 100: pshufd $1, %xmm0, %xmm2 105: pshufd $3, %xmm0, %xmm3 110: movaps %xmm0, %xmm4 113: movhlps %xmm4, %xmm4 116: punpcklwd %xmm3, %xmm4 120: pshufb (%rdx), %xmm4 125: punpcklwd %xmm2, %xmm0 129: pshufb (%rcx), %xmm0 134: por %xmm4, %xmm0 138: pand %xmm1, %xmm0 142: movd %xmm0, %eax 146: movl %eax, %ecx 148: shll $8, %ecx 151: orl %eax, %ecx 153: movl %ecx, %eax 155: shll $16, %eax 158: orl %ecx, %eax 160: movl %eax, (%rdi) 162: popq %rbp 163: ret define void @fetch_yv12_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4723822 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4723822, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_yv12_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4723822 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4723822, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_yv16_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4723954 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4723954, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_yv16_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4723954 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4723954, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_iyuv_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4724086 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4724086, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_iyuv_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4724086 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4724086, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_nv12_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4724218 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4724218, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_nv12_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4724218 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4724218, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_nv21_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4724350 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4724350, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_nv21_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4724350 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4724350, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r4a4_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $4, %edx 39: pinsrd $1, %edx, %xmm0 45: xorps %xmm1, %xmm1 48: movabsq $139845047042064, %rdx 58: movabsq $139845047042080, %rsi 68: pinsrd $2, %eax, %xmm0 74: shrl %cl, %ecx 76: pinsrd $3, %ecx, %xmm0 82: pand (%rsi), %xmm0 86: cvtdq2ps %xmm0, %xmm0 89: mulps (%rdx), %xmm0 92: shufps $1, %xmm1, %xmm0 96: pshufd $120, %xmm0, %xmm0 101: movdqa %xmm0, (%rdi) 105: popq %rbp 106: ret define void @fetch_r4a4_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = lshr i32 %31, 8 %33 = and i32 %32, 255 %34 = shl i32 %31, 24 %35 = or i32 %33, %34 %36 = bitcast i32 %35 to <4 x i8> store <4 x i8> %36, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movzbl (%rsi), %eax 8: movd %eax, %xmm0 12: pshufd $0, %xmm0, %xmm0 17: pextrd $3, %xmm0, %eax 23: pextrd $1, %xmm0, %ecx 29: shrl $4, %ecx 32: pextrd $2, %xmm0, %edx 38: shrl %cl, %edx 40: pinsrd $1, %ecx, %xmm0 46: movabsq $139845047042064, %rcx 56: movabsq $139845047042080, %rsi 66: movabsq $139845047042096, %r8 76: movabsq $139845047042112, %r9 86: movabsq $139845047042128, %r10 96: movabsq $139845047042144, %r11 106: xorps %xmm1, %xmm1 109: movabsq $139845047042160, %rbx 119: pinsrd $2, %edx, %xmm0 125: movabsq $139845047042176, %rdx 135: shrl %cl, %eax 137: pinsrd $3, %eax, %xmm0 143: pand (%rdx), %xmm0 147: cvtdq2ps %xmm0, %xmm0 150: mulps (%rbx), %xmm0 153: maxps %xmm1, %xmm0 156: minps (%r11), %xmm0 160: mulps (%r10), %xmm0 164: addps (%r9), %xmm0 168: andps (%r8), %xmm0 172: pshufd $1, %xmm0, %xmm1 177: pshufd $3, %xmm0, %xmm2 182: movaps %xmm0, %xmm3 185: movhlps %xmm3, %xmm3 188: punpcklwd %xmm2, %xmm3 192: pshufb (%rsi), %xmm3 197: punpcklwd %xmm1, %xmm0 201: pshufb (%rcx), %xmm0 206: por %xmm3, %xmm0 210: movd %xmm0, %eax 214: roll $24, %eax 217: andl $4278190335, %eax 223: movl %eax, (%rdi) 225: popq %rbx 226: popq %rbp 227: ret define void @fetch_a4r4_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = shufflevector <4 x float> %11, <4 x float> , <4 x i32> store <4 x float> %12, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzbl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $4, %edx 39: pinsrd $1, %edx, %xmm0 45: xorps %xmm1, %xmm1 48: movabsq $139845047042064, %rdx 58: movabsq $139845047042080, %rsi 68: pinsrd $2, %eax, %xmm0 74: shrl %cl, %ecx 76: pinsrd $3, %ecx, %xmm0 82: pand (%rsi), %xmm0 86: cvtdq2ps %xmm0, %xmm0 89: mulps (%rdx), %xmm0 92: shufps $4, %xmm1, %xmm0 96: pshufd $120, %xmm0, %xmm0 101: movdqa %xmm0, (%rdi) 105: popq %rbp 106: ret define void @fetch_a4r4_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = load i8* %1, align 1 %5 = zext i8 %4 to i32 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = fmul <4 x float> %10, %12 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %11, <4 x float> zeroinitializer) %13 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %12, <4 x float> ) %14 = fmul <4 x float> %13, %15 = fadd <4 x float> %14, %16 = bitcast <4 x float> %15 to <4 x i32> %17 = and <4 x i32> %16, %18 = extractelement <4 x i32> %17, i32 0 %19 = extractelement <4 x i32> %17, i32 1 %20 = extractelement <4 x i32> %17, i32 2 %21 = extractelement <4 x i32> %17, i32 3 %22 = bitcast i32 %18 to <2 x i16> %23 = bitcast i32 %19 to <2 x i16> %24 = shufflevector <2 x i16> %22, <2 x i16> %23, <2 x i32> %25 = bitcast i32 %20 to <2 x i16> %26 = bitcast i32 %21 to <2 x i16> %27 = shufflevector <2 x i16> %25, <2 x i16> %26, <2 x i32> %28 = bitcast <2 x i16> %24 to <4 x i8> %29 = bitcast <2 x i16> %27 to <4 x i8> %30 = shufflevector <4 x i8> %28, <4 x i8> %29, <4 x i32> %31 = bitcast <4 x i8> %30 to i32 %32 = and i32 %31, 255 %33 = shl i32 %31, 16 %34 = and i32 %33, -16777216 %35 = or i32 %32, %34 %36 = bitcast i32 %35 to <4 x i8> store <4 x i8> %36, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: movzbl (%rsi), %eax 8: movd %eax, %xmm0 12: pshufd $0, %xmm0, %xmm0 17: pextrd $3, %xmm0, %eax 23: pextrd $1, %xmm0, %ecx 29: shrl $4, %ecx 32: pextrd $2, %xmm0, %edx 38: shrl %cl, %edx 40: pinsrd $1, %ecx, %xmm0 46: movabsq $139845047042064, %rcx 56: movabsq $139845047042080, %rsi 66: movabsq $139845047042096, %r8 76: movabsq $139845047042112, %r9 86: movabsq $139845047042128, %r10 96: movabsq $139845047042144, %r11 106: xorps %xmm1, %xmm1 109: movabsq $139845047042160, %rbx 119: pinsrd $2, %edx, %xmm0 125: movabsq $139845047042176, %rdx 135: shrl %cl, %eax 137: pinsrd $3, %eax, %xmm0 143: pand (%rdx), %xmm0 147: cvtdq2ps %xmm0, %xmm0 150: mulps (%rbx), %xmm0 153: maxps %xmm1, %xmm0 156: minps (%r11), %xmm0 160: mulps (%r10), %xmm0 164: addps (%r9), %xmm0 168: andps (%r8), %xmm0 172: pshufd $1, %xmm0, %xmm1 177: pshufd $3, %xmm0, %xmm2 182: movaps %xmm0, %xmm3 185: movhlps %xmm3, %xmm3 188: punpcklwd %xmm2, %xmm3 192: pshufb (%rsi), %xmm3 197: punpcklwd %xmm1, %xmm0 201: pshufb (%rcx), %xmm0 206: por %xmm3, %xmm0 210: movd %xmm0, %eax 214: movl %eax, %ecx 216: shll $16, %ecx 219: andl $4278190080, %ecx 225: movzbl %al, %eax 228: orl %ecx, %eax 230: movl %eax, (%rdi) 232: popq %rbx 233: popq %rbp 234: ret define void @fetch_r8a8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $8, %edx 39: pinsrd $1, %edx, %xmm0 45: xorps %xmm1, %xmm1 48: movabsq $139845047042064, %rdx 58: movabsq $139845047042080, %rsi 68: pinsrd $2, %eax, %xmm0 74: shrl %cl, %ecx 76: pinsrd $3, %ecx, %xmm0 82: pand (%rsi), %xmm0 86: cvtdq2ps %xmm0, %xmm0 89: mulps (%rdx), %xmm0 92: shufps $4, %xmm1, %xmm0 96: pshufd $120, %xmm0, %xmm0 101: movdqa %xmm0, (%rdi) 105: popq %rbp 106: ret define void @fetch_r8a8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = bitcast i32 %6 to <4 x i8> %8 = bitcast <4 x i8> %7 to i32 %9 = and i32 %8, 255 %10 = shl i32 %8, 16 %11 = and i32 %10, -16777216 %12 = or i32 %9, %11 %13 = bitcast i32 %12 to <4 x i8> store <4 x i8> %13, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movl %eax, %ecx 9: shll $16, %ecx 12: orl %eax, %ecx 14: andl $4278190335, %ecx 20: movl %ecx, (%rdi) 22: popq %rbp 23: ret define void @fetch_a8r8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = insertelement <4 x i32> undef, i32 %6, i32 0 %8 = shufflevector <4 x i32> %7, <4 x i32> undef, <4 x i32> zeroinitializer %9 = lshr <4 x i32> %8, %10 = and <4 x i32> %9, %11 = sitofp <4 x i32> %10 to <4 x float> %12 = fmul <4 x float> %11, %13 = shufflevector <4 x float> %12, <4 x float> , <4 x i32> store <4 x float> %13, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: movd %eax, %xmm0 11: pshufd $0, %xmm0, %xmm0 16: pextrd $2, %xmm0, %eax 22: shrl %cl, %eax 24: pextrd $3, %xmm0, %ecx 30: pextrd $1, %xmm0, %edx 36: shrl $8, %edx 39: pinsrd $1, %edx, %xmm0 45: xorps %xmm1, %xmm1 48: movabsq $139845047042064, %rdx 58: movabsq $139845047042080, %rsi 68: pinsrd $2, %eax, %xmm0 74: shrl %cl, %ecx 76: pinsrd $3, %ecx, %xmm0 82: pand (%rsi), %xmm0 86: cvtdq2ps %xmm0, %xmm0 89: mulps (%rdx), %xmm0 92: shufps $1, %xmm1, %xmm0 96: pshufd $120, %xmm0, %xmm0 101: movdqa %xmm0, (%rdi) 105: popq %rbp 106: ret define void @fetch_a8r8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i16* %5 = load i16* %4, align 2 %6 = zext i16 %5 to i32 %7 = bitcast i32 %6 to <4 x i8> %8 = bitcast <4 x i8> %7 to i32 %9 = lshr i32 %8, 8 %10 = and i32 %9, 255 %11 = shl i32 %8, 24 %12 = or i32 %10, %11 %13 = bitcast i32 %12 to <4 x i8> store <4 x i8> %13, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movzwl (%rsi), %eax 7: roll $24, %eax 10: andl $4278190335, %eax 16: movl %eax, (%rdi) 18: popq %rbp 19: ret define void @fetch_r10g10b10a2_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4557004 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4557004, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r10g10b10a2_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4557004 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4557004, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r10g10b10a2_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4558615 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4558615, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_r10g10b10a2_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4558615 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4558615, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_b10g10r10a2_uscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = shufflevector <4 x float> %10, <4 x float> undef, <4 x i32> store <4 x float> %11, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $10, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $20, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $30, %eax 52: movabsq $139845047042064, %rcx 62: pinsrd $3, %eax, %xmm0 68: pand (%rcx), %xmm0 72: cvtdq2ps %xmm0, %xmm0 75: pshufd $198, %xmm0, %xmm0 80: movdqa %xmm0, (%rdi) 84: popq %rbp 85: ret define void @fetch_b10g10r10a2_uscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = insertelement <4 x i32> undef, i32 %5, i32 0 %7 = shufflevector <4 x i32> %6, <4 x i32> undef, <4 x i32> zeroinitializer %8 = lshr <4 x i32> %7, %9 = and <4 x i32> %8, %10 = sitofp <4 x i32> %9 to <4 x float> %11 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %10, <4 x float> zeroinitializer) %12 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %11, <4 x float> ) %13 = fmul <4 x float> %12, %14 = fadd <4 x float> %13, %15 = bitcast <4 x float> %14 to <4 x i32> %16 = and <4 x i32> %15, %17 = extractelement <4 x i32> %16, i32 0 %18 = extractelement <4 x i32> %16, i32 1 %19 = extractelement <4 x i32> %16, i32 2 %20 = extractelement <4 x i32> %16, i32 3 %21 = bitcast i32 %17 to <2 x i16> %22 = bitcast i32 %18 to <2 x i16> %23 = shufflevector <2 x i16> %21, <2 x i16> %22, <2 x i32> %24 = bitcast i32 %19 to <2 x i16> %25 = bitcast i32 %20 to <2 x i16> %26 = shufflevector <2 x i16> %24, <2 x i16> %25, <2 x i32> %27 = bitcast <2 x i16> %23 to <4 x i8> %28 = bitcast <2 x i16> %26 to <4 x i8> %29 = shufflevector <4 x i8> %27, <4 x i8> %28, <4 x i32> %30 = bitcast <4 x i8> %29 to i32 %31 = lshr i32 %30, 16 %32 = and i32 %31, 255 %33 = and i32 %30, -16711936 %34 = or i32 %32, %33 %35 = shl i32 %30, 16 %36 = and i32 %35, 16711680 %37 = or i32 %34, %36 %38 = bitcast i32 %37 to <4 x i8> store <4 x i8> %38, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movd (%rsi), %xmm0 8: pshufd $0, %xmm0, %xmm0 13: pextrd $3, %xmm0, %eax 19: pextrd $2, %xmm0, %ecx 25: pextrd $1, %xmm0, %edx 31: shrl $10, %edx 34: pinsrd $1, %edx, %xmm0 40: shrl $20, %ecx 43: pinsrd $2, %ecx, %xmm0 49: shrl $30, %eax 52: movabsq $139845047042064, %rcx 62: movabsq $139845047042080, %rdx 72: movabsq $139845047042096, %rsi 82: movabsq $139845047042112, %r8 92: movabsq $139845047042128, %r9 102: movabsq $139845047042144, %r10 112: xorps %xmm1, %xmm1 115: movabsq $139845047042160, %r11 125: pinsrd $3, %eax, %xmm0 131: pand (%r11), %xmm0 136: cvtdq2ps %xmm0, %xmm0 139: maxps %xmm1, %xmm0 142: minps (%r10), %xmm0 146: mulps (%r9), %xmm0 150: addps (%r8), %xmm0 154: andps (%rsi), %xmm0 157: pshufd $1, %xmm0, %xmm1 162: pshufd $3, %xmm0, %xmm2 167: movaps %xmm0, %xmm3 170: movhlps %xmm3, %xmm3 173: punpcklwd %xmm2, %xmm3 177: pshufb (%rdx), %xmm3 182: punpcklwd %xmm1, %xmm0 186: pshufb (%rcx), %xmm0 191: por %xmm3, %xmm0 195: movd %xmm0, %eax 199: movl %eax, %ecx 201: andl $4278255360, %ecx 207: movl %eax, %edx 209: shrl $16, %edx 212: movzbl %dl, %edx 215: orl %ecx, %edx 217: shll $16, %eax 220: andl $16711680, %eax 226: orl %edx, %eax 228: movl %eax, (%rdi) 230: popq %rbp 231: ret define void @fetch_b10g10r10a2_sscaled_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4561886 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4561886, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_b10g10r10a2_sscaled_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4561886 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4561886, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_b10g10r10a2_snorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4563497 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4563497, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_b10g10r10a2_snorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4563497 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4563497, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_etc1_rgb8_float(<4 x float>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4714407 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 store <4 x float> %6, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4714407, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm0 34: movaps %xmm0, (%rbx) 37: addq $24, %rsp 41: popq %rbx 42: popq %rbp 43: ret define void @fetch_etc1_rgb8_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = alloca <4 x float>, align 16 store <4 x float> zeroinitializer, <4 x float>* %4, align 16 %5 = getelementptr inbounds <4 x float>* %4, i64 0, i64 0 call void inttoptr (i64 4714407 to void (float*, i8*, i32, i32)*)(float* %5, i8* %1, i32 %2, i32 %3) %6 = load <4 x float>* %4, align 16 %7 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %6, <4 x float> zeroinitializer) %8 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %7, <4 x float> ) %9 = fmul <4 x float> %8, %10 = fadd <4 x float> %9, %11 = bitcast <4 x float> %10 to <4 x i32> %12 = and <4 x i32> %11, %13 = extractelement <4 x i32> %12, i32 0 %14 = extractelement <4 x i32> %12, i32 1 %15 = extractelement <4 x i32> %12, i32 2 %16 = extractelement <4 x i32> %12, i32 3 %17 = bitcast i32 %13 to <2 x i16> %18 = bitcast i32 %14 to <2 x i16> %19 = shufflevector <2 x i16> %17, <2 x i16> %18, <2 x i32> %20 = bitcast i32 %15 to <2 x i16> %21 = bitcast i32 %16 to <2 x i16> %22 = shufflevector <2 x i16> %20, <2 x i16> %21, <2 x i32> %23 = bitcast <2 x i16> %19 to <4 x i8> %24 = bitcast <2 x i16> %22 to <4 x i8> %25 = shufflevector <4 x i8> %23, <4 x i8> %24, <4 x i32> store <4 x i8> %25, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: pushq %rbx 5: subq $24, %rsp 9: movq %rdi, %rbx 12: xorps %xmm0, %xmm0 15: movaps %xmm0, -32(%rbp) 19: leaq -32(%rbp), %rdi 23: movl $4714407, %eax 28: callq *%rax 30: movaps -32(%rbp), %xmm1 34: xorps %xmm0, %xmm0 37: maxps %xmm0, %xmm1 40: movabsq $139845047042064, %rax 50: minps (%rax), %xmm1 53: movabsq $139845047042080, %rax 63: movabsq $139845047042096, %rcx 73: movabsq $139845047042112, %rdx 83: movabsq $139845047042128, %rsi 93: movabsq $139845047042144, %rdi 103: mulps (%rdi), %xmm1 106: addps (%rsi), %xmm1 109: andps (%rdx), %xmm1 112: pshufd $1, %xmm1, %xmm0 117: pshufd $3, %xmm1, %xmm2 122: movaps %xmm1, %xmm3 125: movhlps %xmm3, %xmm3 128: punpcklwd %xmm2, %xmm3 132: pshufb (%rcx), %xmm3 137: punpcklwd %xmm0, %xmm1 141: pshufb (%rax), %xmm1 146: por %xmm3, %xmm1 150: movd %xmm1, (%rbx) 154: addq $24, %rsp 158: popq %rbx 159: popq %rbp 160: ret define void @fetch_r8g8_r8b8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = lshr i32 %5, %6 %y = and i32 %7, 255 %u = and i32 %5, 65280 %8 = lshr i32 %5, 8 %9 = and i32 %8, 16711680 %10 = or i32 %y, %u %11 = or i32 %10, %9 %12 = or i32 %11, -16777216 %13 = bitcast i32 %12 to <4 x i8> %14 = extractelement <4 x i8> %13, i32 0 %15 = zext i8 %14 to i32 %16 = insertelement <4 x i32> undef, i32 %15, i32 0 %17 = extractelement <4 x i8> %13, i32 1 %18 = zext i8 %17 to i32 %19 = insertelement <4 x i32> %16, i32 %18, i32 1 %20 = extractelement <4 x i8> %13, i32 2 %21 = zext i8 %20 to i32 %22 = insertelement <4 x i32> %19, i32 %21, i32 2 %23 = extractelement <4 x i8> %13, i32 3 %24 = zext i8 %23 to i32 %25 = insertelement <4 x i32> %22, i32 %24, i32 3 %26 = sitofp <4 x i32> %25 to <4 x float> %27 = fmul <4 x float> %26, store <4 x float> %27, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: movb %dl, %cl 11: movl %eax, %edx 13: shrl %cl, %edx 15: movzbl %dl, %ecx 18: movl %eax, %edx 20: andl $65280, %edx 26: orl %ecx, %edx 28: shrl $8, %eax 31: andl $16711680, %eax 37: movabsq $139845047042064, %rcx 47: orl %edx, %eax 49: orl $4278190080, %eax 55: movd %eax, %xmm0 59: pextrb $1, %xmm0, %eax 65: pextrb $0, %xmm0, %edx 71: movd %edx, %xmm1 75: pinsrd $1, %eax, %xmm1 81: pextrb $2, %xmm0, %eax 87: pinsrd $2, %eax, %xmm1 93: pextrb $3, %xmm0, %eax 99: pinsrd $3, %eax, %xmm1 105: cvtdq2ps %xmm1, %xmm0 108: mulps (%rcx), %xmm0 111: movaps %xmm0, (%rdi) 114: popq %rbp 115: ret define void @fetch_r8g8_r8b8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = lshr i32 %5, %6 %y = and i32 %7, 255 %u = and i32 %5, 65280 %8 = lshr i32 %5, 8 %9 = and i32 %8, 16711680 %10 = or i32 %y, %u %11 = or i32 %10, %9 %12 = or i32 %11, -16777216 %13 = bitcast i32 %12 to <4 x i8> store <4 x i8> %13, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: movb %dl, %cl 11: movl %eax, %edx 13: shrl %cl, %edx 15: movzbl %dl, %ecx 18: movl %eax, %edx 20: andl $65280, %edx 26: orl %ecx, %edx 28: shrl $8, %eax 31: andl $16711680, %eax 37: orl %edx, %eax 39: orl $4278190080, %eax 45: movl %eax, (%rdi) 47: popq %rbp 48: ret define void @fetch_g8r8_b8r8_unorm_float(<4 x float>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = or i32 %6, 8 %8 = lshr i32 %5, %7 %y = and i32 %8, 255 %v = and i32 %5, 16711680 %u = shl i32 %5, 8 %9 = and i32 %u, 65280 %10 = or i32 %y, %9 %11 = or i32 %10, %v %12 = or i32 %11, -16777216 %13 = bitcast i32 %12 to <4 x i8> %14 = extractelement <4 x i8> %13, i32 0 %15 = zext i8 %14 to i32 %16 = insertelement <4 x i32> undef, i32 %15, i32 0 %17 = extractelement <4 x i8> %13, i32 1 %18 = zext i8 %17 to i32 %19 = insertelement <4 x i32> %16, i32 %18, i32 1 %20 = extractelement <4 x i8> %13, i32 2 %21 = zext i8 %20 to i32 %22 = insertelement <4 x i32> %19, i32 %21, i32 2 %23 = extractelement <4 x i8> %13, i32 3 %24 = zext i8 %23 to i32 %25 = insertelement <4 x i32> %22, i32 %24, i32 3 %26 = sitofp <4 x i32> %25 to <4 x float> %27 = fmul <4 x float> %26, store <4 x float> %27, <4 x float>* %0, align 16 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: leal 8(%rdx), %ecx 12: movl %eax, %edx 14: shrl %cl, %edx 16: movzbl %dl, %ecx 19: movl %eax, %edx 21: shll $8, %edx 24: movzwl %dx, %edx 27: movabsq $139845047042064, %rsi 37: orl %ecx, %edx 39: andl $16711680, %eax 45: orl %edx, %eax 47: orl $4278190080, %eax 53: movd %eax, %xmm0 57: pextrb $1, %xmm0, %eax 63: pextrb $0, %xmm0, %ecx 69: movd %ecx, %xmm1 73: pinsrd $1, %eax, %xmm1 79: pextrb $2, %xmm0, %eax 85: pinsrd $2, %eax, %xmm1 91: pextrb $3, %xmm0, %eax 97: pinsrd $3, %eax, %xmm1 103: cvtdq2ps %xmm1, %xmm0 106: mulps (%rsi), %xmm0 109: movaps %xmm0, (%rdi) 112: popq %rbp 113: ret define void @fetch_g8r8_b8r8_unorm_unorm8(<4 x i8>*, i8*, i32, i32) { entry: %4 = bitcast i8* %1 to i32* %5 = load i32* %4, align 4 %6 = shl i32 %2, 4 %7 = or i32 %6, 8 %8 = lshr i32 %5, %7 %y = and i32 %8, 255 %v = and i32 %5, 16711680 %u = shl i32 %5, 8 %9 = and i32 %u, 65280 %10 = or i32 %y, %9 %11 = or i32 %10, %v %12 = or i32 %11, -16777216 %13 = bitcast i32 %12 to <4 x i8> store <4 x i8> %13, <4 x i8>* %0, align 4 ret void } 0: pushq %rbp 1: movq %rsp, %rbp 4: movl (%rsi), %eax 6: shll $4, %edx 9: leal 8(%rdx), %ecx 12: movl %eax, %edx 14: shrl %cl, %edx 16: movzbl %dl, %ecx 19: movl %eax, %edx 21: shll $8, %edx 24: movzwl %dx, %edx 27: orl %ecx, %edx 29: andl $16711680, %eax 35: orl %edx, %eax 37: orl $4278190080, %eax 43: movl %eax, (%rdi) 45: popq %rbp 46: ret