diff --git a/src/gallium/auxiliary/gallivm/lp_bld_gather.c b/src/gallium/auxiliary/gallivm/lp_bld_gather.c index ccd0376..c2e777c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_gather.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_gather.c @@ -419,40 +419,58 @@ lp_build_gather(struct gallivm_state *gallivm, * cast to 2x32f type, so the fetch is always int and on top of that * we avoid the vec pad and use scalar zext due the above mentioned * issue). * Note this is optimized for x86 sse2 and up backend. Could be tweaked * for other archs if necessary... */ if (((src_width % 32) == 0) && ((src_width % dst_type.width) == 0) && (dst_type.length > 1)) { /* use vector fetch (if dst_type is vector) */ vec_fetch = TRUE; if (dst_type.floating) { fetch_type = lp_type_float_vec(dst_type.width, src_width); } else { fetch_type = lp_type_int_vec(dst_type.width, src_width); } /* intentionally not using lp_build_vec_type here */ src_type = LLVMVectorType(lp_build_elem_type(gallivm, fetch_type), fetch_type.length); fetch_dst_type = fetch_type; fetch_dst_type.length = dst_type.length; + } else if (((src_width % 16) == 0) && (dst_type.length > 1)) { + vec_fetch = TRUE; + + if (dst_type.floating) { + fetch_type = lp_type_float_vec(16, src_width); + } else { + fetch_type = lp_type_int_vec(16, src_width); + } + + src_type = LLVMVectorType(lp_build_elem_type(gallivm, fetch_type), + fetch_type.length); + if (dst_type.floating) { + fetch_dst_type = lp_type_float_vec(16, dst_type.width * dst_type.length); + } else { + fetch_dst_type = lp_type_int_vec(16, dst_type.width * dst_type.length); + } + fetch_dst_type.length = dst_type.length * 2; + } else { /* use scalar fetch */ vec_fetch = FALSE; if (dst_type.floating && ((src_width == 32) || (src_width == 64))) { fetch_type = lp_type_float(src_width); } else { fetch_type = lp_type_int(src_width); } src_type = lp_build_vec_type(gallivm, fetch_type); fetch_dst_type = fetch_type; fetch_dst_type.width = dst_type.width * dst_type.length; } if (length == 1) { /* Scalar */ res = lp_build_gather_elem_vec(gallivm, length, src_width, src_type, fetch_dst_type, aligned, base_ptr, offsets, 0, vector_justify); return LLVMBuildBitCast(gallivm->builder, res,