diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 894d0d2..c903695 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -345,13 +345,9 @@ int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) } #endif -static int is_const(int sel) +static bool is_const(const struct r600_bc_alu_src *src) { - if (sel > 255 && sel < 512) - return 1; - if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL) - return 1; - return 0; + return src->cb; } static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) @@ -362,9 +358,9 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu) alu->bank_swizzle = alu->bank_swizzle_force; return 0; } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); + swizzle_key = (is_const(&alu->src[0]) ? 4 : 0) + + (is_const(&alu->src[1]) ? 2 : 0) + + (is_const(&alu->src[2]) ? 1 : 0); alu->bank_swizzle = bank_swizzle_scl[swizzle_key]; return 0; @@ -378,9 +374,9 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu) alu->bank_swizzle = alu->bank_swizzle_force; return 0; } - swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + - (is_const(alu->src[1].sel) ? 2 : 0 ) + - (is_const(alu->src[2].sel) ? 1 : 0 ); + swizzle_key = (is_const(&alu->src[0]) ? 4 : 0) + + (is_const(&alu->src[1]) ? 2 : 0) + + (is_const(&alu->src[2]) ? 1 : 0); alu->bank_swizzle = bank_swizzle_vec[swizzle_key]; return 0; @@ -410,6 +406,115 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al return 0; } +static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type) +{ + unsigned int free_lines = 0; + unsigned int cache_line[3]; + unsigned int count = 0; + unsigned int i, j; + int r; + + for (i = 0; i < 3; ++i) { + bool found = false; + unsigned int line; + + if (!alu->src[i].cb) + continue; + + line = (alu->src[i].sel / 32) * 2; + + for (j = 0; j < count; ++j) { + if (cache_line[j] == line) { + found = true; + break; + } + } + + if (!found) + cache_line[count++] = line; + } + + assert(count < 3); + if (count >= 3) /* This should never happen, really. */ + return -ENOMEM; + + if (!bc->cf_last->kcache0_mode) + ++free_lines; + if (!bc->cf_last->kcache1_mode) + ++free_lines; + + j = count; + for (i = 0; i < count; ++i) { + if (cache_line[i] == bc->cf_last->kcache0_addr + && bc->cf_last->kcache0_mode == V_SQ_CF_KCACHE_LOCK_2) { + --j; + continue; + } + if (cache_line[i] == bc->cf_last->kcache1_addr + && bc->cf_last->kcache1_mode == V_SQ_CF_KCACHE_LOCK_2) { + --j; + continue; + } + } + + if (j > free_lines) { + if ((r = r600_bc_add_cf(bc))) { + return r; + } + bc->cf_last->inst = (type << 3); + } + + for (i = 0; i < count; ++i) { + if (cache_line[i] == bc->cf_last->kcache0_addr + && bc->cf_last->kcache0_mode == V_SQ_CF_KCACHE_LOCK_2) { + --j; + continue; + } + if (cache_line[i] == bc->cf_last->kcache1_addr + && bc->cf_last->kcache1_mode == V_SQ_CF_KCACHE_LOCK_2) { + --j; + continue; + } + + if (!bc->cf_last->kcache0_mode) { + bc->cf_last->kcache0_bank = 0; + bc->cf_last->kcache0_addr = cache_line[i]; + bc->cf_last->kcache0_mode = V_SQ_CF_KCACHE_LOCK_2; + continue; + } + + if (!bc->cf_last->kcache1_mode) { + bc->cf_last->kcache1_bank = 0; + bc->cf_last->kcache1_addr = cache_line[i]; + bc->cf_last->kcache1_mode = V_SQ_CF_KCACHE_LOCK_2; + continue; + } + } + + for (i = 0; i < 3; ++i) { + unsigned int line; + + if (!alu->src[i].cb) + continue; + + line = (alu->src[i].sel / 32) * 2; + + if (line == bc->cf_last->kcache0_addr) { + alu->src[i].sel = 128 + (alu->src[i].sel - (line * 16)); + alu->src[i].cb = false; + continue; + } + + if (line == bc->cf_last->kcache1_addr) { + alu->src[i].sel = 160 + (alu->src[i].sel - (line * 16)); + alu->src[i].cb = false; + continue; + } + } + + return 0; +} + int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) { struct r600_bc_alu *nalu = r600_bc_alu(); @@ -431,6 +536,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int } bc->cf_last->inst = (type << 3); } + + if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + free(nalu); + return r; + } + if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; LIST_INITHEAD(&nalu->bs_list); @@ -439,20 +550,20 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int } /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) * worst case */ - if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { + if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) { bc->force_add_cf = 1; } /* number of gpr == the last gpr used in any alu */ for (i = 0; i < 3; i++) { - if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) { - bc->ngpr = alu->src[i].sel + 1; + if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { + bc->ngpr = nalu->src[i].sel + 1; } /* compute how many literal are needed * either 2 or 4 literals */ - if (alu->src[i].sel == 253) { - if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) { - nalu->nliteral = (alu->src[i].chan + 2) & 0x6; + if (nalu->src[i].sel == 253) { + if (((nalu->src[i].chan + 2) & 0x6) > nalu->nliteral) { + nalu->nliteral = (nalu->src[i].chan + 2) & 0x6; } } } @@ -462,33 +573,16 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int nalu->nliteral = lalu->nliteral; } } - if (alu->dst.sel >= bc->ngpr) { - bc->ngpr = alu->dst.sel + 1; + if (nalu->dst.sel >= bc->ngpr) { + bc->ngpr = nalu->dst.sel + 1; } LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); /* each alu use 2 dwords */ bc->cf_last->ndw += 2; bc->ndw += 2; - /* The following configuration provides 64 128-bit constants. - * Each cacheline holds 16 128-bit constants and each - * kcache can lock 2 cachelines and there are 2 kcaches per - * ALU clause for a max of 64 constants. - * For supporting more than 64 constants, the code needs - * to be broken down into multiple ALU clauses. - */ - /* select the constant buffer (0-15) for each kcache */ - bc->cf_last->kcache0_bank = 0; - bc->cf_last->kcache1_bank = 0; - /* lock 2 cachelines per kcache; 4 total */ - bc->cf_last->kcache0_mode = V_SQ_CF_KCACHE_LOCK_2; - bc->cf_last->kcache1_mode = V_SQ_CF_KCACHE_LOCK_2; - /* set the cacheline offsets for each kcache */ - bc->cf_last->kcache0_addr = 0; - bc->cf_last->kcache1_addr = 2; - /* process cur ALU instructions for bank swizzle */ - if (alu->last) { + if (nalu->last) { check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); bc->cf_last->curr_bs_head = NULL; } diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index b147f0f..cdab33a 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -37,6 +37,7 @@ struct r600_bc_alu_src { unsigned neg; unsigned abs; unsigned rel; + bool cb; }; struct r600_bc_alu_dst { diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index bb5038c..4679a25 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -747,7 +747,10 @@ static int tgsi_src(struct r600_shader_ctx *ctx, r600_src->rel = V_SQ_REL_RELATIVE; r600_src->neg = tgsi_src->Register.Negate; r600_src->abs = tgsi_src->Register.Absolute; - r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; + if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) + r600_src->cb = true; + else + r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; return 0; } @@ -810,6 +813,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s alu.src[0].sel = r600_src[i].sel; alu.src[0].chan = k; alu.src[0].rel = r600_src[i].rel; + alu.src[0].cb = r600_src[i].cb; alu.dst.sel = treg; alu.dst.chan = k; alu.dst.write = 1; @@ -820,7 +824,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s return r; } r600_src[i].sel = treg; - r600_src[i].rel =0; + r600_src[i].rel = 0; + r600_src[i].cb = false; j--; } }