diff --git i/src/amd/vulkan/radv_cmd_buffer.c w/src/amd/vulkan/radv_cmd_buffer.c index ccf762ead46..a31b42c95cd 100644 --- i/src/amd/vulkan/radv_cmd_buffer.c +++ w/src/amd/vulkan/radv_cmd_buffer.c @@ -1067,7 +1067,7 @@ static void radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, struct radv_image *image, VkImageLayout layout, - bool requires_cond_write) + bool requires_cond_exec) { uint32_t db_z_info = ds->db_z_info; uint32_t db_z_info_reg; @@ -1091,38 +1091,21 @@ radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, } /* When we don't know the last fast clear value we need to emit a - * conditional packet, otherwise we can update DB_Z_INFO directly. + * conditional packet that will eventually skip the following + * SET_CONTEXT_REG packet. */ - if (requires_cond_write) { - radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0)); - - const uint32_t write_space = 0 << 8; /* register */ - const uint32_t poll_space = 1 << 4; /* memory */ - const uint32_t function = 3 << 0; /* equal to the reference */ - const uint32_t options = write_space | poll_space | function; - radeon_emit(cmd_buffer->cs, options); - - /* poll address - location of the depth clear value */ + if (requires_cond_exec) { uint64_t va = radv_buffer_get_va(image->bo); - va += image->offset + image->clear_value_offset; - - /* In presence of stencil format, we have to adjust the base - * address because the first value is the stencil clear value. - */ - if (vk_format_is_stencil(image->vk_format)) - va += 4; + va += image->offset + image->tc_compat_zrange_offset; + radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); radeon_emit(cmd_buffer->cs, va); radeon_emit(cmd_buffer->cs, va >> 32); - - radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */ - radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */ - radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */ - radeon_emit(cmd_buffer->cs, 0u); /* write address high */ - radeon_emit(cmd_buffer->cs, db_z_info); - } else { - radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); + radeon_emit(cmd_buffer->cs, 0); + radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ } + + radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); } static void @@ -1269,6 +1252,25 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radeon_emit(cs, fui(ds_clear_value.depth)); } +static void +radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, + struct radv_image *image, + VkClearDepthStencilValue ds_clear_value) +{ + struct radeon_cmdbuf *cs = cmd_buffer->cs; + uint64_t va = radv_buffer_get_va(image->bo); + va += image->offset + image->tc_compat_zrange_offset; + uint32_t cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; + + radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); + radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) | + S_370_WR_CONFIRM(1) | + S_370_ENGINE_SEL(V_370_PFP)); + radeon_emit(cs, va); + radeon_emit(cs, va >> 32); + radeon_emit(cs, cond_val); +} + /** * Update the clear depth/stencil values for this image. */ @@ -1282,6 +1284,12 @@ radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, radv_set_ds_clear_metadata(cmd_buffer, image, ds_clear_value, aspects); + if (radv_image_is_tc_compat_htile(image) && + (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { + radv_update_tc_compat_zrange_metadata(cmd_buffer, image, + ds_clear_value); + } + radv_update_bound_fast_clear_ds(cmd_buffer, image, ds_clear_value, aspects); } diff --git i/src/amd/vulkan/radv_image.c w/src/amd/vulkan/radv_image.c index f447166d80c..9fcbaa3184c 100644 --- i/src/amd/vulkan/radv_image.c +++ w/src/amd/vulkan/radv_image.c @@ -870,6 +870,14 @@ radv_image_alloc_htile(struct radv_image *image) /* + 8 for storing the clear values */ image->clear_value_offset = image->htile_offset + image->surface.htile_size; image->size = image->clear_value_offset + 8; + if (radv_image_is_tc_compat_htile(image)) { + /* Metadata for the TC-compatible HTILE hardware bug which + * have to be fixed by updating ZRANGE_PRECISION when doing + * fast depth clears to 0.0f. + */ + image->tc_compat_zrange_offset = image->clear_value_offset + 8; + image->size = image->clear_value_offset + 16; + } image->alignment = align64(image->alignment, image->surface.htile_alignment); } @@ -1014,8 +1022,8 @@ radv_image_create(VkDevice _device, /* Otherwise, try to enable HTILE for depth surfaces. */ if (radv_image_can_enable_htile(image) && !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) { - radv_image_alloc_htile(image); image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE; + radv_image_alloc_htile(image); } else { image->surface.htile_size = 0; } diff --git i/src/amd/vulkan/radv_private.h w/src/amd/vulkan/radv_private.h index ac756f2c247..1ef5b215db7 100644 --- i/src/amd/vulkan/radv_private.h +++ w/src/amd/vulkan/radv_private.h @@ -1503,6 +1503,8 @@ struct radv_image { uint64_t clear_value_offset; uint64_t fce_pred_offset; + uint64_t tc_compat_zrange_offset; + /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */ VkDeviceMemory owned_memory; };