diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 894d0d2..c903695 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -345,13 +345,9 @@ int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
 }
 #endif
 
-static int is_const(int sel)
+static bool is_const(const struct r600_bc_alu_src *src)
 {
-	if (sel > 255 && sel < 512)
-		return 1;
-	if (sel >= V_SQ_ALU_SRC_0 && sel <= V_SQ_ALU_SRC_LITERAL)
-		return 1;
-	return 0;
+	return src->cb;
 }
 
 static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
@@ -362,9 +358,9 @@ static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
 		alu->bank_swizzle = alu->bank_swizzle_force;
 		return 0;
 	}
-	swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-		(is_const(alu->src[1].sel) ? 2 : 0 ) + 
-		(is_const(alu->src[2].sel) ? 1 : 0 );
+	swizzle_key = (is_const(&alu->src[0]) ? 4 : 0) +
+		(is_const(&alu->src[1]) ? 2 : 0) +
+		(is_const(&alu->src[2]) ? 1 : 0);
 
 	alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
 	return 0;
@@ -378,9 +374,9 @@ static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
 		alu->bank_swizzle = alu->bank_swizzle_force;
 		return 0;
 	}
-	swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-		(is_const(alu->src[1].sel) ? 2 : 0 ) + 
-		(is_const(alu->src[2].sel) ? 1 : 0 );
+	swizzle_key = (is_const(&alu->src[0]) ? 4 : 0) +
+		(is_const(&alu->src[1]) ? 2 : 0) +
+		(is_const(&alu->src[2]) ? 1 : 0);
 
 	alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
 	return 0;
@@ -410,6 +406,115 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al
 	return 0;
 }
 
+static int r600_bc_alloc_kcache_lines(struct r600_bc *bc, struct r600_bc_alu *alu, int type)
+{
+	unsigned int free_lines = 0;
+	unsigned int cache_line[3];
+	unsigned int count = 0;
+	unsigned int i, j;
+	int r;
+
+	for (i = 0; i < 3; ++i) {
+		bool found = false;
+		unsigned int line;
+
+		if (!alu->src[i].cb)
+			continue;
+
+		line = (alu->src[i].sel / 32) * 2;
+
+		for (j = 0; j < count; ++j) {
+			if (cache_line[j] == line) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			cache_line[count++] = line;
+	}
+
+	assert(count < 3);
+	if (count >= 3) /* This should never happen, really. */
+		return -ENOMEM;
+
+	if (!bc->cf_last->kcache0_mode)
+		++free_lines;
+	if (!bc->cf_last->kcache1_mode)
+		++free_lines;
+
+	j = count;
+	for (i = 0; i < count; ++i) {
+		if (cache_line[i] == bc->cf_last->kcache0_addr
+			&& bc->cf_last->kcache0_mode == V_SQ_CF_KCACHE_LOCK_2) {
+			--j;
+			continue;
+		}
+		if (cache_line[i] == bc->cf_last->kcache1_addr
+			&& bc->cf_last->kcache1_mode == V_SQ_CF_KCACHE_LOCK_2) {
+			--j;
+			continue;
+		}
+	}
+
+	if (j > free_lines) {
+		if ((r = r600_bc_add_cf(bc))) {
+			return r;
+		}
+		bc->cf_last->inst = (type << 3);
+	}
+
+	for (i = 0; i < count; ++i) {
+		if (cache_line[i] == bc->cf_last->kcache0_addr
+			&& bc->cf_last->kcache0_mode == V_SQ_CF_KCACHE_LOCK_2) {
+			--j;
+			continue;
+		}
+		if (cache_line[i] == bc->cf_last->kcache1_addr
+			&& bc->cf_last->kcache1_mode == V_SQ_CF_KCACHE_LOCK_2) {
+			--j;
+			continue;
+		}
+
+		if (!bc->cf_last->kcache0_mode) {
+			bc->cf_last->kcache0_bank = 0;
+			bc->cf_last->kcache0_addr = cache_line[i];
+			bc->cf_last->kcache0_mode = V_SQ_CF_KCACHE_LOCK_2;
+			continue;
+		}
+
+		if (!bc->cf_last->kcache1_mode) {
+			bc->cf_last->kcache1_bank = 0;
+			bc->cf_last->kcache1_addr = cache_line[i];
+			bc->cf_last->kcache1_mode = V_SQ_CF_KCACHE_LOCK_2;
+			continue;
+		}
+	}
+
+	for (i = 0; i < 3; ++i) {
+		unsigned int line;
+
+		if (!alu->src[i].cb)
+			continue;
+
+		line = (alu->src[i].sel / 32) * 2;
+
+		if (line == bc->cf_last->kcache0_addr) {
+			alu->src[i].sel = 128 + (alu->src[i].sel - (line * 16));
+			alu->src[i].cb = false;
+			continue;
+		}
+
+		if (line == bc->cf_last->kcache1_addr) {
+			alu->src[i].sel = 160 + (alu->src[i].sel - (line * 16));
+			alu->src[i].cb = false;
+			continue;
+		}
+	}
+
+	return 0;
+}
+
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
 	struct r600_bc_alu *nalu = r600_bc_alu();
@@ -431,6 +536,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 		}
 		bc->cf_last->inst = (type << 3);
 	}
+
+	if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) {
+		free(nalu);
+		return r;
+	}
+
 	if (!bc->cf_last->curr_bs_head) {
 		bc->cf_last->curr_bs_head = nalu;
 		LIST_INITHEAD(&nalu->bs_list);
@@ -439,20 +550,20 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 	}
 	/* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
 	 * worst case */
-	if (alu->last && (bc->cf_last->ndw >> 1) >= 120) {
+	if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) {
 		bc->force_add_cf = 1;
 	}
 	/* number of gpr == the last gpr used in any alu */
 	for (i = 0; i < 3; i++) {
-		if (alu->src[i].sel >= bc->ngpr && alu->src[i].sel < 128) {
-			bc->ngpr = alu->src[i].sel + 1;
+		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
+			bc->ngpr = nalu->src[i].sel + 1;
 		}
 		/* compute how many literal are needed
 		 * either 2 or 4 literals
 		 */
-		if (alu->src[i].sel == 253) {
-			if (((alu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
-				nalu->nliteral = (alu->src[i].chan + 2) & 0x6;
+		if (nalu->src[i].sel == 253) {
+			if (((nalu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
+				nalu->nliteral = (nalu->src[i].chan + 2) & 0x6;
 			}
 		}
 	}
@@ -462,33 +573,16 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 			nalu->nliteral = lalu->nliteral;
 		}
 	}
-	if (alu->dst.sel >= bc->ngpr) {
-		bc->ngpr = alu->dst.sel + 1;
+	if (nalu->dst.sel >= bc->ngpr) {
+		bc->ngpr = nalu->dst.sel + 1;
 	}
 	LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
 	/* each alu use 2 dwords */
 	bc->cf_last->ndw += 2;
 	bc->ndw += 2;
 
-	/* The following configuration provides 64 128-bit constants.
-	 * Each cacheline holds 16 128-bit constants and each
-	 * kcache can lock 2 cachelines and there are 2 kcaches per
-	 * ALU clause for a max of 64 constants.
-	 * For supporting more than 64 constants, the code needs
-	 * to be broken down into multiple ALU clauses.
-	 */
-	/* select the constant buffer (0-15) for each kcache */
-	bc->cf_last->kcache0_bank = 0;
-	bc->cf_last->kcache1_bank = 0;
-	/* lock 2 cachelines per kcache; 4 total */
-	bc->cf_last->kcache0_mode = V_SQ_CF_KCACHE_LOCK_2;
-	bc->cf_last->kcache1_mode = V_SQ_CF_KCACHE_LOCK_2;
-	/* set the cacheline offsets for each kcache */
-	bc->cf_last->kcache0_addr = 0;
-	bc->cf_last->kcache1_addr = 2;
-
 	/* process cur ALU instructions for bank swizzle */
-	if (alu->last) {
+	if (nalu->last) {
 		check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
 		bc->cf_last->curr_bs_head = NULL;
 	}
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index b147f0f..cdab33a 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -37,6 +37,7 @@ struct r600_bc_alu_src {
 	unsigned			neg;
 	unsigned			abs;
 	unsigned			rel;
+	bool				cb;
 };
 
 struct r600_bc_alu_dst {
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index bb5038c..4679a25 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -747,7 +747,10 @@ static int tgsi_src(struct r600_shader_ctx *ctx,
 		r600_src->rel = V_SQ_REL_RELATIVE;
 	r600_src->neg = tgsi_src->Register.Negate;
 	r600_src->abs = tgsi_src->Register.Absolute;
-	r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
+	if (tgsi_src->Register.File == TGSI_FILE_CONSTANT)
+		r600_src->cb = true;
+	else
+		r600_src->sel += ctx->file_offset[tgsi_src->Register.File];
 	return 0;
 }
 
@@ -810,6 +813,7 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
 				alu.src[0].sel = r600_src[i].sel;
 				alu.src[0].chan = k;
 				alu.src[0].rel = r600_src[i].rel;
+				alu.src[0].cb = r600_src[i].cb;
 				alu.dst.sel = treg;
 				alu.dst.chan = k;
 				alu.dst.write = 1;
@@ -820,7 +824,8 @@ static int tgsi_split_constant(struct r600_shader_ctx *ctx, struct r600_bc_alu_s
 					return r;
 			}
 			r600_src[i].sel = treg;
-			r600_src[i].rel =0;
+			r600_src[i].rel = 0;
+			r600_src[i].cb = false;
 			j--;
 		}
 	}