From e76a04e1e3b526e5a92fc4d898a30ece68085925 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 9 Sep 2014 15:18:57 -0400 Subject: [PATCH] R600/SI: radeonsi: Program RASTER_CONFIG for harvested GPUs v3 v2: - Write RASTER_CONFIG for all SEs. v3: - Set GRBM_GFX_INDEX.INSTANCE_BROADCAST_WRITES bit. - Set GRBM_GFX_INFEX.SH_BROADCAST_WRITES bit when done setting PA_SC_RASTER_CONFIG. - Get num_se and num_sh_per_se from kernel. https://bugs.freedesktop.org/show_bug.cgi?id=60879 CC: "10.3 10.2" --- src/gallium/drivers/radeonsi/si_state.c | 94 +++++++++++++++++++++-- src/gallium/drivers/radeonsi/sid.h | 8 +- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 3 + src/gallium/winsys/radeon/drm/radeon_winsys.h | 1 + 4 files changed, 100 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 1d6ae86..32b93d3 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2989,6 +2989,17 @@ void si_init_state_functions(struct si_context *sctx) sctx->b.b.draw_vbo = si_draw_vbo; } +static unsigned pkr_mask_to_map(unsigned mask) +{ + switch(mask & 0x3) { + case 0x0: + case 0x1: return 0x0; + case 0x2: return 0x3; + case 0x3: return 0x2; + } + return 0; +} + void si_init_config(struct si_context *sctx) { struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); @@ -3060,24 +3071,97 @@ void si_init_config(struct si_context *sctx) break; } } else { + unsigned raster_config = 0; + unsigned sh_per_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); + unsigned num_se = MAX2(sctx->screen->b.info.max_sh_per_se, 1); + unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask; + unsigned i; switch (sctx->screen->b.family) { case CHIP_TAHITI: case CHIP_PITCAIRN: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x2a00126a); + raster_config = 0x2a00126a; break; case CHIP_VERDE: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x0000124a); + raster_config = 0x0000124a; break; case CHIP_OLAND: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000082); + raster_config = 0x00000082; break; case CHIP_HAINAN: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); + raster_config = 0x00000000; break; default: - si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000000); + raster_config = 0x00000000; break; } + + if (rb_mask) { + /* XXX: I can't figure out what the *_XSEL and *_YSEL + * fields are for, so I'm leaving them as their default + * values. */ + unsigned pkr_mask = (sh_per_se | 0x1); + unsigned se0_pkr0 = rb_mask & pkr_mask; + unsigned se0_pkr1 = (rb_mask >>= sh_per_se) & pkr_mask; + unsigned se1_pkr0 = (rb_mask >>= sh_per_se) & pkr_mask; + unsigned se1_pkr1 = (rb_mask >>= sh_per_se) & pkr_mask; + unsigned se_map = 0; + unsigned se0_pkr_map = 0; + unsigned se1_pkr_map = 0; + unsigned se0_pkr0_rb_map = 0; + unsigned se0_pkr1_rb_map = 0; + unsigned se1_pkr0_rb_map = 0; + unsigned se1_pkr1_rb_map = 0; + if (!se0_pkr0 && !se0_pkr1) { + /* se0 disabled */ + se_map |= 0x1; + } + if (se1_pkr0 || se1_pkr1) { + /* se1 enabled */ + se_map |= 0x2; + } + if (!se0_pkr0) { + /* se0 pkr0 disabled */ + se0_pkr_map |= 0x1; + } + if (se0_pkr1) { + /* se0 pkr1 enabled */ + se0_pkr_map |= 0x2; + } + if (!se1_pkr0) { + /* se1 pkr0 disabled */ + se1_pkr_map |= 0x1; + } + if (se1_pkr1) { + /* se1 pkr1 enabled */ + se1_pkr_map |= 0x2; + } + + se0_pkr0_rb_map = pkr_mask_to_map(se0_pkr0); + se0_pkr1_rb_map = pkr_mask_to_map(se0_pkr1); + se1_pkr0_rb_map = pkr_mask_to_map(se1_pkr0); + se1_pkr1_rb_map = pkr_mask_to_map(se1_pkr1); + + assert(!se0_pkr0 || !se1_pkr0 || (se0_pkr0_rb_map == se1_pkr0_rb_map)); + assert(!se0_pkr1 || !se1_pkr1 || (se0_pkr1_rb_map == se1_pkr1_rb_map)); + raster_config &= C_028350_RB_MAP_PKR0; + raster_config |= S_028350_RB_MAP_PKR0(se0_pkr0_rb_map); + raster_config &= C_028350_RB_MAP_PKR1; + raster_config |= S_028350_RB_MAP_PKR1(se0_pkr1_rb_map); + raster_config &= C_028350_PKR_MAP; + raster_config |= S_028350_PKR_MAP(se0_pkr_map); + raster_config &= C_028350_SE_MAP; + raster_config |= S_028350_SE_MAP(se_map); + + } + for (i = 0; i < num_se; i++) { + si_pm4_set_reg(pm4, GRBM_GFX_INDEX, + SE_INDEX(i) | + SH_BROADCAST_WRITES | + INSTANCE_BROADCAST_WRITES); + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); + } + si_pm4_set_reg(pm4, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES | SH_BROADCAST_WRITES); } si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h index 3241725..11406a9 100644 --- a/src/gallium/drivers/radeonsi/sid.h +++ b/src/gallium/drivers/radeonsi/sid.h @@ -204,7 +204,13 @@ * 6. COMMAND [29:22] | BYTE_COUNT [20:0] */ - +#define GRBM_GFX_INDEX 0x802C +#define INSTANCE_INDEX(x) ((x) << 0) +#define SH_INDEX(x) ((x) << 8) +#define SE_INDEX(x) ((x) << 16) +#define SH_BROADCAST_WRITES (1 << 29) +#define INSTANCE_BROADCAST_WRITES (1 << 30) +#define SE_BROADCAST_WRITES (1 << 31) #define R_0084FC_CP_STRMOUT_CNTL 0x0084FC #define S_0084FC_OFFSET_UPDATE_DONE(x) (((x) & 0x1) << 0) #define R_0085F0_CP_COHER_CNTL 0x0085F0 diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 3b695f9..41d23d9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -327,6 +327,9 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.max_sclk); ws->info.max_sclk /= 1000; + radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, + &ws->info.si_backend_enabled_mask); + ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); /* Generation-specific queries. */ diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 69bf6ed..8a39c3c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -230,6 +230,7 @@ struct radeon_info { boolean si_tile_mode_array_valid; uint32_t si_tile_mode_array[32]; + uint32_t si_backend_enabled_mask; boolean cik_macrotile_mode_array_valid; uint32_t cik_macrotile_mode_array[16]; -- 1.8.1.5