From 4f329a2a60a624446cc33813b5d371dc42b19f5a Mon Sep 17 00:00:00 2001 From: Maxqia Date: Sat, 18 Jul 2015 00:50:25 -0700 Subject: [PATCH] radeonsi: nuke anything related to TC This reverts commits 02ba733, 11b7636, 18a30c9, ca9c5b2, a1bbccf, 7c9ec6c, 7713d59, and 218b157. --- src/gallium/drivers/r600/r600_pipe.c | 6 -- src/gallium/drivers/radeon/r600_buffer_common.c | 48 ++----------- src/gallium/drivers/radeon/r600_pipe_common.c | 1 - src/gallium/drivers/radeon/r600_pipe_common.h | 16 ----- src/gallium/drivers/radeonsi/si_descriptors.c | 93 ++++++------------------- src/gallium/drivers/radeonsi/si_pipe.c | 6 -- src/gallium/drivers/radeonsi/si_pipe.h | 13 ++-- src/gallium/drivers/radeonsi/si_state.c | 18 +++-- src/gallium/drivers/radeonsi/si_state_draw.c | 12 ++-- src/gallium/drivers/radeonsi/sid.h | 4 +- 10 files changed, 46 insertions(+), 171 deletions(-) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index e845928..ad2124f 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -270,12 +270,6 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_POLYGON_OFFSET_CLAMP: return 1; - case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - return rscreen->b.info.drm_major == 2 && rscreen->b.info.drm_minor >= 43; - - case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: - return !R600_BIG_ENDIAN && rscreen->b.info.has_userptr; - case PIPE_CAP_COMPUTE: return rscreen->b.chip_class > R700; diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index fc5f6c2..b7306d7 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -185,7 +185,6 @@ bool r600_init_resource(struct r600_common_screen *rscreen, pb_reference(&old_buf, NULL); util_range_set_empty(&res->valid_buffer_range); - res->TC_L2_dirty = false; if (rscreen->debug_flags & DBG_VM && res->b.b.target == PIPE_BUFFER) { fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Buffer %u bytes\n", @@ -385,10 +384,11 @@ static const struct u_resource_vtbl r600_buffer_vtbl = NULL /* transfer_inline_write */ }; -static struct r600_resource * -r600_alloc_buffer_struct(struct pipe_screen *screen, - const struct pipe_resource *templ) +struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, + const struct pipe_resource *templ, + unsigned alignment) { + struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; struct r600_resource *rbuffer; rbuffer = MALLOC_STRUCT(r600_resource); @@ -398,17 +398,7 @@ r600_alloc_buffer_struct(struct pipe_screen *screen, rbuffer->b.b.screen = screen; rbuffer->b.vtbl = &r600_buffer_vtbl; rbuffer->buf = NULL; - rbuffer->TC_L2_dirty = false; util_range_init(&rbuffer->valid_buffer_range); - return rbuffer; -} - -struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, - const struct pipe_resource *templ, - unsigned alignment) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); if (!r600_init_resource(rscreen, rbuffer, templ->width0, alignment, TRUE)) { FREE(rbuffer); @@ -416,33 +406,3 @@ struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, } return &rbuffer->b.b; } - -struct pipe_resource * -r600_buffer_from_user_memory(struct pipe_screen *screen, - const struct pipe_resource *templ, - void *user_memory) -{ - struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; - struct radeon_winsys *ws = rscreen->ws; - struct r600_resource *rbuffer = r600_alloc_buffer_struct(screen, templ); - - rbuffer->domains = RADEON_DOMAIN_GTT; - util_range_add(&rbuffer->valid_buffer_range, 0, templ->width0); - - /* Convert a user pointer to a buffer. */ - rbuffer->buf = ws->buffer_from_ptr(ws, user_memory, templ->width0); - if (!rbuffer->buf) { - FREE(rbuffer); - return NULL; - } - - rbuffer->cs_buf = ws->buffer_get_cs_handle(rbuffer->buf); - - if (rscreen->info.r600_virtual_address) - rbuffer->gpu_address = - ws->buffer_get_virtual_address(rbuffer->cs_buf); - else - rbuffer->gpu_address = 0; - - return &rbuffer->b.b; -} diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index bcbf0b9..471d09e 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -867,7 +867,6 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen, rscreen->b.fence_finish = r600_fence_finish; rscreen->b.fence_reference = r600_fence_reference; rscreen->b.resource_destroy = u_resource_destroy_vtbl; - rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; if (rscreen->info.has_uvd) { rscreen->b.get_video_param = rvid_get_video_param; diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index a471426..ae10b2d 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -152,18 +152,6 @@ struct r600_resource { * the unsynchronized map flag and expect the driver to figure it out. */ struct util_range valid_buffer_range; - - /* For buffers only. This indicates that a write operation has been - * performed by TC L2, but the cache hasn't been flushed. - * Any hw block which doesn't use or bypasses TC L2 should check this - * flag and flush the cache before using the buffer. - * - * For example, TC L2 must be flushed if a buffer which has been - * modified by a shader store instruction is about to be used as - * an index buffer. The reason is that VGT DMA index fetching doesn't - * use TC L2. - */ - bool TC_L2_dirty; }; struct r600_transfer { @@ -458,10 +446,6 @@ bool r600_init_resource(struct r600_common_screen *rscreen, struct pipe_resource *r600_buffer_create(struct pipe_screen *screen, const struct pipe_resource *templ, unsigned alignment); -struct pipe_resource * -r600_buffer_from_user_memory(struct pipe_screen *screen, - const struct pipe_resource *templ, - void *user_memory); /* r600_common_pipe.c */ void r600_draw_rectangle(struct blitter_context *blitter, diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index bbfd36d..71a6645 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -72,7 +72,6 @@ static uint32_t null_descriptor[8] = { * packet. It's for preventing a read-after-write (RAW) hazard between two * CP DMA packets. */ #define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */ -#define CIK_CP_DMA_USE_L2 (1 << 2) /* Emit a CP DMA packet to do a copy from one buffer to another. * The size must fit in bits [20:0]. @@ -84,15 +83,13 @@ static void si_emit_cp_dma_copy_buffer(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; - uint32_t sel = flags & CIK_CP_DMA_USE_L2 ? - PKT3_CP_DMA_SRC_SEL(3) | PKT3_CP_DMA_DST_SEL(3) : 0; assert(size); assert((size & ((1<<21)-1)) == size); if (sctx->b.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */ + radeon_emit(cs, sync_flag); /* CP_SYNC [31] */ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */ radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ @@ -116,14 +113,13 @@ static void si_emit_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? PKT3_CP_DMA_CP_SYNC : 0; uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? PKT3_CP_DMA_CMD_RAW_WAIT : 0; - uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? PKT3_CP_DMA_DST_SEL(3) : 0; assert(size); assert((size & ((1<<21)-1)) == size); if (sctx->b.chip_class >= CIK) { radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); - radeon_emit(cs, sync_flag | dst_sel | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ + radeon_emit(cs, sync_flag | PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */ radeon_emit(cs, clear_value); /* DATA [31:0] */ radeon_emit(cs, 0); radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */ @@ -167,7 +163,7 @@ static void si_init_descriptors(struct si_context *sctx, * only once at context initialization. */ si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address, desc->buffer->b.b.width0, 0, - R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2); + R600_CP_DMA_SYNC); } static void si_release_descriptors(struct si_descriptors *desc) @@ -189,18 +185,8 @@ static void si_update_descriptors(struct si_context *sctx, desc->atom.num_dw += 4; /* second pointer update */ desc->atom.dirty = true; - - /* TODO: Investigate if these flushes can be removed after - * adding CE support. */ - /* The descriptors are read with the K cache. */ sctx->b.flags |= SI_CONTEXT_INV_KCACHE; - - /* Since SI uses uncached CP DMA to update descriptors, - * we have to flush TC L2, which is used to fetch constants - * along with KCACHE. */ - if (sctx->b.chip_class == SI) - sctx->b.flags |= SI_CONTEXT_INV_TC_L2; } else { desc->atom.dirty = false; } @@ -249,10 +235,11 @@ static void si_emit_descriptors(struct si_context *sctx, va_base = desc->buffer->gpu_address; /* Copy the descriptors to a new context slot. */ + /* XXX Consider using TC or L2 for this copy on CIK. */ si_emit_cp_dma_copy_buffer(sctx, va_base + new_context_id * desc->context_size, va_base + desc->current_context_id * desc->context_size, - desc->context_size, R600_CP_DMA_SYNC | CIK_CP_DMA_USE_L2); + desc->context_size, R600_CP_DMA_SYNC); va_base += new_context_id * desc->context_size; @@ -279,9 +266,7 @@ static void si_emit_descriptors(struct si_context *sctx, packet_size = 2 + desc->element_dw_size; radeon_emit(cs, PKT3(PKT3_WRITE_DATA, packet_size, 0)); - radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(sctx->b.chip_class == SI ? - PKT3_WRITE_DATA_DST_SEL_MEM_SYNC : - PKT3_WRITE_DATA_DST_SEL_TC_L2) | + radeon_emit(cs, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_TC_OR_L2) | PKT3_WRITE_DATA_WR_CONFIRM | PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME)); radeon_emit(cs, va & 0xFFFFFFFFUL); @@ -476,6 +461,8 @@ static void si_set_sampler_views(struct pipe_context *ctx, } } + sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2; si_update_descriptors(sctx, &samplers->views.desc); } @@ -700,6 +687,8 @@ void si_update_vertex_buffers(struct si_context *sctx) * on performance (confirmed by testing). New descriptors are always * uploaded to a fresh new buffer, so I don't think flushing the const * cache is needed. */ + sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | + SI_CONTEXT_INV_TC_L2; } @@ -883,36 +872,6 @@ static void si_set_streamout_targets(struct pipe_context *ctx, unsigned old_num_targets = sctx->b.streamout.num_targets; unsigned i, bufidx; - /* We are going to unbind the buffers. Mark which caches need to be flushed. */ - if (sctx->b.streamout.num_targets && sctx->b.streamout.begin_emitted) { - /* Since streamout uses vector writes which go through TC L2 - * and most other clients can use TC L2 as well, we don't need - * to flush it. - * - * The only case which requires flushing it is VGT DMA index - * fetching, which is a rare case. Thus, flag the TC L2 - * dirtiness in the resource and handle it when index fetching - * is used. - */ - for (i = 0; i < sctx->b.streamout.num_targets; i++) - if (sctx->b.streamout.targets[i]) - r600_resource(sctx->b.streamout.targets[i]->b.buffer)->TC_L2_dirty = true; - - /* Invalidate the scalar cache in case a streamout buffer is - * going to be used as a constant buffer. - * - * Invalidate TC L1, because streamout bypasses it (done by - * setting GLC=1 in the store instruction), but it can contain - * outdated data of streamout buffers. - * - * VS_PARTIAL_FLUSH is required if the buffers are going to be - * used as an input immediately. - */ - sctx->b.flags |= SI_CONTEXT_INV_KCACHE | - SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_VS_PARTIAL_FLUSH; - } - /* Streamout buffers must be bound in 2 places: * 1) in VGT by setting the VGT_STRMOUT registers * 2) as shader resources @@ -1130,7 +1089,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, bool is_framebuffer) { struct si_context *sctx = (struct si_context*)ctx; - unsigned flush_flags, tc_l2_flag; + unsigned flush_flags; if (!size) return; @@ -1155,22 +1114,19 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, uint64_t va = r600_resource(dst)->gpu_address + offset; /* Flush the caches where the resource is bound. */ - if (is_framebuffer) { + if (is_framebuffer) flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; - tc_l2_flag = 0; - } else { + else flush_flags = SI_CONTEXT_INV_TC_L1 | - (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) | + SI_CONTEXT_INV_TC_L2 | SI_CONTEXT_INV_KCACHE; - tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2; - } sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags; while (size) { unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); - unsigned dma_flags = tc_l2_flag; + unsigned dma_flags = 0; si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE); @@ -1201,9 +1157,6 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, /* Flush the caches again in case the 3D engine has been prefetching * the resource. */ sctx->b.flags |= flush_flags; - - if (tc_l2_flag) - r600_resource(dst)->TC_L2_dirty = true; } void si_copy_buffer(struct si_context *sctx, @@ -1211,7 +1164,7 @@ void si_copy_buffer(struct si_context *sctx, uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer) { - unsigned flush_flags, tc_l2_flag; + unsigned flush_flags; if (!size) return; @@ -1226,21 +1179,18 @@ void si_copy_buffer(struct si_context *sctx, src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resource is bound. */ - if (is_framebuffer) { + if (is_framebuffer) flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; - tc_l2_flag = 0; - } else { + else flush_flags = SI_CONTEXT_INV_TC_L1 | - (sctx->b.chip_class == SI ? SI_CONTEXT_INV_TC_L2 : 0) | + SI_CONTEXT_INV_TC_L2 | SI_CONTEXT_INV_KCACHE; - tc_l2_flag = sctx->b.chip_class == SI ? 0 : CIK_CP_DMA_USE_L2; - } sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | flush_flags; while (size) { - unsigned sync_flags = tc_l2_flag; + unsigned sync_flags = 0; unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT); si_need_cs_space(sctx, 7 + (sctx->b.flags ? sctx->cache_flush.num_dw : 0), FALSE); @@ -1272,9 +1222,6 @@ void si_copy_buffer(struct si_context *sctx, /* Flush the caches again in case the 3D engine has been prefetching * the resource. */ sctx->b.flags |= flush_flags; - - if (tc_l2_flag) - r600_resource(dst)->TC_L2_dirty = true; } /* INIT/DEINIT */ diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 2b6a6ff..d9c1618 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -255,12 +255,6 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param) case PIPE_CAP_TGSI_TEXCOORD: return 1; - case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: - return !SI_BIG_ENDIAN && sscreen->b.info.has_userptr; - - case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: - return sscreen->b.info.drm_major == 2 && sscreen->b.info.drm_minor >= 43; - case PIPE_CAP_TEXTURE_MULTISAMPLE: /* 2D tiling on CIK is supported since DRM 2.35.0 */ return sscreen->b.chip_class < CIK || diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index 67cb035..3e4e731 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -67,14 +67,13 @@ #define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6) #define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 7) /* Engine synchronization. */ -#define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8) -#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) -#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10) -#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 11) -#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 12) +#define SI_CONTEXT_PS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 8) +#define SI_CONTEXT_CS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9) +#define SI_CONTEXT_VGT_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 10) +#define SI_CONTEXT_VGT_STREAMOUT_SYNC (R600_CONTEXT_PRIVATE_FLAG << 11) /* Compute only. */ -#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 13) /* TODO: merge with TC? */ -#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 14) +#define SI_CONTEXT_FLUSH_WITH_INV_L2 (R600_CONTEXT_PRIVATE_FLAG << 12) /* TODO: merge with TC? */ +#define SI_CONTEXT_FLAG_COMPUTE (R600_CONTEXT_PRIVATE_FLAG << 13) #define SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER (SI_CONTEXT_FLUSH_AND_INV_CB | \ SI_CONTEXT_FLUSH_AND_INV_CB_META | \ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 0dd08a2..bb971fb 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2018,16 +2018,20 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, unsigned old_nr_samples = sctx->framebuffer.nr_samples; int i; + if (sctx->framebuffer.state.nr_cbufs) { + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB | + SI_CONTEXT_FLUSH_AND_INV_CB_META; + } + if (sctx->framebuffer.state.zsbuf) { + sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB | + SI_CONTEXT_FLUSH_AND_INV_DB_META; + } + /* Only flush TC when changing the framebuffer state, because * the only client not using TC that can change textures is - * the framebuffer. - * - * Flush all CB and DB caches here because all buffers can be used - * for write by both TC (with shader image stores) and CB/DB. - */ + * the framebuffer. */ sctx->b.flags |= SI_CONTEXT_INV_TC_L1 | - SI_CONTEXT_INV_TC_L2 | - SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER; + SI_CONTEXT_INV_TC_L2; util_copy_framebuffer_state(&sctx->framebuffer.state, state); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e85ed15..b628cec 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -408,9 +408,9 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato if (sctx->flags & SI_CONTEXT_INV_KCACHE) cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); - if (sctx->flags & SI_CONTEXT_INV_TC_L1) + if (sctx->flags & (SI_CONTEXT_INV_TC_L1 | R600_CONTEXT_STREAMOUT_FLUSH)) cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); - if (sctx->flags & SI_CONTEXT_INV_TC_L2) + if (sctx->flags & (SI_CONTEXT_INV_TC_L2 | R600_CONTEXT_STREAMOUT_FLUSH)) cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) { @@ -452,7 +452,8 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); - } else if (sctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { + } else if (sctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) { + /* Needed if streamout buffers are going to be used as a source. */ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0) | compute); radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); } @@ -589,11 +590,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } } - if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) { - sctx->b.flags |= SI_CONTEXT_INV_TC_L2; - r600_resource(ib.buffer)->TC_L2_dirty = false; - } - /* Check flush flags. */ if (sctx->b.flags) sctx->atoms.s.cache_flush->dirty = true; diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 35d5ee2..2087842 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -98,7 +98,7 @@ #define PKT3_WRITE_DATA_DST_SEL(x) ((x) << 8) #define PKT3_WRITE_DATA_DST_SEL_REG 0 #define PKT3_WRITE_DATA_DST_SEL_MEM_SYNC 1 -#define PKT3_WRITE_DATA_DST_SEL_TC_L2 2 +#define PKT3_WRITE_DATA_DST_SEL_TC_OR_L2 2 #define PKT3_WRITE_DATA_DST_SEL_GDS 3 #define PKT3_WRITE_DATA_DST_SEL_RESERVED_4 4 #define PKT3_WRITE_DATA_DST_SEL_MEM_ASYNC 5 @@ -164,12 +164,10 @@ /* 0 - SRC_ADDR * 1 - GDS (program SAS to 1 as well) * 2 - DATA - * 3 - SRC_ADDR using TC L2 (DMA_DATA only) */ #define PKT3_CP_DMA_DST_SEL(x) ((x) << 20) /* 0 - DST_ADDR * 1 - GDS (program DAS to 1 as well) - * 3 - DST_ADDR using TC L2 (DMA_DATA only) */ /* COMMAND */ #define PKT3_CP_DMA_CMD_SRC_SWAP(x) ((x) << 23) -- 2.4.6