From 4d556c19595befc3db93cda46ff7ff373f067194 Mon Sep 17 00:00:00 2001 From: Peter Zubaj Date: Sun, 7 Oct 2007 19:25:19 +0200 Subject: [PATCH] R300 texture tiling. --- src/mesa/drivers/dri/r300/r300_cmdbuf.c | 58 +++++ src/mesa/drivers/dri/r300/r300_cmdbuf.h | 12 + src/mesa/drivers/dri/r300/r300_context.h | 17 ++- src/mesa/drivers/dri/r300/r300_reg.h | 1 + src/mesa/drivers/dri/r300/r300_state.c | 4 + src/mesa/drivers/dri/r300/r300_texmem.c | 237 ++++++++------------- src/mesa/drivers/dri/r300/r300_texstate.c | 338 ++++++++++++++++++----------- 7 files changed, 397 insertions(+), 270 deletions(-) diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c index 9eca41f..2ed993d 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c @@ -573,6 +573,64 @@ void r300EmitBlit(r300ContextPtr rmesa, cmd[7].u = (w << 16) | h; } +void r300EmitBlit2D(r300ContextPtr rmesa, + GLuint color_fmt, + GLuint src_dst_tile, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint scl, GLint scr, + GLint sct, GLint scb, + GLint dstx, GLint dsty, + GLuint w, GLuint h) +{ + drm_r300_cmd_header_t *cmd; + + if (RADEON_DEBUG & DEBUG_IOCTL) + fprintf(stderr, + "%s sc %x/%x %d,%d - %d,%d dst: %x/%x %d,%d sz: %dx%d\n", + __FUNCTION__, src_pitch, src_offset, scl, scr, sct, scb, + dst_pitch, dst_offset, dstx, dsty, w, h); + +/* assert((src_pitch & 63) == 0); + assert((dst_pitch & 63) == 0); + assert((src_offset & 1023) == 0); + assert((dst_offset & 1023) == 0); + assert(w < (1 << 16)); + assert(h < (1 << 16));*/ + GLuint blit_format; + + switch (color_fmt) { + case 1: + blit_format = RADEON_GMC_DST_8BPP_CI; + break; + case 2: + blit_format = RADEON_GMC_DST_16BPP; + break; + case 4: + blit_format = RADEON_GMC_DST_32BPP; + break; + default: + blit_format = RADEON_GMC_DST_8BPP_CI; + break; + } + + cmd = (drm_r300_cmd_header_t *) r300AllocCmdBuf(rmesa, 11, __FUNCTION__); + + cmd[0].u = 0; + cmd[0].header.cmd_type = R300_CMD_BLIT; + cmd[1].u = blit_format; + cmd[2].u = src_dst_tile; + cmd[3].u = src_pitch; + cmd[4].u = src_offset; + cmd[5].u = dst_pitch; + cmd[6].u = dst_offset; + cmd[7].u = (scl << 16) | scr; + cmd[8].u = (sct << 16) | scb; + cmd[9].u = (dsty << 16) | dstx; + cmd[10].u = (h << 16) | w; +} void r300EmitWait(r300ContextPtr rmesa, GLuint flags) { drm_r300_cmd_header_t *cmd; diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h index acb6e38..45299bb 100644 --- a/src/mesa/drivers/dri/r300/r300_cmdbuf.h +++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h @@ -107,6 +107,18 @@ extern void r300EmitBlit(r300ContextPtr rmesa, GLuint dst_offset, GLint srcx, GLint srcy, GLint dstx, GLint dsty, GLuint w, GLuint h); + +extern void r300EmitBlit2D(r300ContextPtr rmesa, + GLuint color_fmt, + GLuint src_dst_tile, + GLuint src_pitch, + GLuint src_offset, + GLuint dst_pitch, + GLuint dst_offset, + GLint scl, GLint scr, + GLint sct, GLint scb, + GLint dstx, GLint dsty, + GLuint w, GLuint h); extern void r300EmitWait(r300ContextPtr rmesa, GLuint flags); extern void r300EmitLOAD_VBPNTR(r300ContextPtr rmesa, int start); diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h index be69097..e368cf8 100644 --- a/src/mesa/drivers/dri/r300/r300_context.h +++ b/src/mesa/drivers/dri/r300/r300_context.h @@ -167,6 +167,20 @@ struct r300_dma { }; /* Texture related */ +typedef struct drm_r300_tex_image_t { + unsigned int size; + unsigned int dst_offset; /* dst ofset for blitter - relative to texture start */ + unsigned int dst_pitch; /* dst pitch for blitter */ + unsigned int src_pitch; /* src pitch for blitter */ + unsigned int src_dst_tile; /* value of register 1704 and 1700 - tile bits */ + + unsigned int clip_x, clip_y; /* destination clip x, y */ + unsigned int clip_width, clip_height; /* destination clip width and height */ + + unsigned int x, y; + unsigned int width, height; + const void __user *data; +} drm_r300_tex_image_t; typedef struct r300_tex_obj r300TexObj, *r300TexObjPtr; @@ -185,7 +199,8 @@ struct r300_tex_obj { brought into the texunit. */ - drm_radeon_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; + drm_r300_tex_image_t image[6][RADEON_MAX_TEXTURE_LEVELS]; + GLuint transfer_size; /* used to set blit format */ /* Six, for the cube faces */ GLboolean image_override; /* Image overridden by GLX_EXT_tfp */ diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index 1baa74c..ac9994b 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -950,6 +950,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) # define R300_TXO_MACRO_TILE (1 << 2) # define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_UNKNOWN_TILE (1 << 4) # define R300_TXO_OFFSET_MASK 0xffffffe0 # define R300_TXO_OFFSET_SHIFT 5 /* END: Guess from R200 */ diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c index 088216c..4d46b85 100644 --- a/src/mesa/drivers/dri/r300/r300_state.c +++ b/src/mesa/drivers/dri/r300/r300_state.c @@ -1311,6 +1311,10 @@ static void r300SetupTextures(GLcontext * ctx) if (t->offset & R300_TXO_MICRO_TILE) { WARN_ONCE("micro tiling enabled!\n"); } + + if (t->offset & R300_TXO_UNKNOWN_TILE) { + WARN_ONCE("unknown tiling enabled!\n"); + } r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 + hw_tmu] = 0x0; diff --git a/src/mesa/drivers/dri/r300/r300_texmem.c b/src/mesa/drivers/dri/r300/r300_texmem.c index 723601a..2f39351 100644 --- a/src/mesa/drivers/dri/r300/r300_texmem.c +++ b/src/mesa/drivers/dri/r300/r300_texmem.c @@ -81,6 +81,7 @@ void r300DestroyTexObj(r300ContextPtr rmesa, r300TexObjPtr t) * Texture image conversions */ +/* this is not used */ static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, r300TexObjPtr t, struct gl_texture_image *texImage, @@ -89,7 +90,7 @@ static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, GLint width, GLint height) { const struct gl_texture_format *texFormat = texImage->TexFormat; - GLuint srcPitch, dstPitch; +/* GLuint srcPitch, dstPitch; */ int blit_format; int srcOffset; @@ -98,28 +99,28 @@ static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, * I.e. x==0, y==0, width=texWidth, height=texWidth. If this is ever * changed, the src pitch will have to change. */ - switch (texFormat->TexelBytes) { + switch (t->transfer_size) { case 1: blit_format = R300_CP_COLOR_FORMAT_CI8; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; +/* srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes;*/ break; case 2: blit_format = R300_CP_COLOR_FORMAT_RGB565; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; +/* srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes;*/ break; case 4: blit_format = R300_CP_COLOR_FORMAT_ARGB8888; - srcPitch = t->image[0][0].width * texFormat->TexelBytes; - dstPitch = t->image[0][0].width * texFormat->TexelBytes; +/* srcPitch = t->image[0][0].width * texFormat->TexelBytes; + dstPitch = t->image[0][0].width * texFormat->TexelBytes;*/ break; - case 8: +/* case 8: case 16: blit_format = R300_CP_COLOR_FORMAT_CI8; srcPitch = t->image[0][0].width * texFormat->TexelBytes; dstPitch = t->image[0][0].width * texFormat->TexelBytes; - break; + break;*/ default: return; } @@ -140,7 +141,7 @@ static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, r300EmitWait(rmesa, R300_WAIT_3D); - r300EmitBlit(rmesa, blit_format, + /*r300EmitBlit(rmesa, blit_format, srcPitch, srcOffset, dstPitch, @@ -148,11 +149,13 @@ static void r300UploadGARTClientSubImage(r300ContextPtr rmesa, x, y, t->image[0][hwlevel].x + x, - t->image[0][hwlevel].y + y, width, height); + t->image[0][hwlevel].y + y, width, height);*/ r300EmitWait(rmesa, R300_WAIT_2D); } +#if 0 +/* not used for now */ static void r300UploadRectSubImage(r300ContextPtr rmesa, r300TexObjPtr t, struct gl_texture_image *texImage, @@ -161,7 +164,7 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa, const struct gl_texture_format *texFormat = texImage->TexFormat; int blit_format, dstPitch, done; - switch (texFormat->TexelBytes) { + switch (t->transfer_size) { case 1: blit_format = R300_CP_COLOR_FORMAT_CI8; break; @@ -171,10 +174,10 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa, case 4: blit_format = R300_CP_COLOR_FORMAT_ARGB8888; break; - case 8: +/* case 8: case 16: blit_format = R300_CP_COLOR_FORMAT_CI8; - break; + break;*/ default: return; } @@ -215,11 +218,11 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa, */ GLuint srcPitch; srcPitch = texImage->RowStride * texFormat->TexelBytes; - r300EmitBlit(rmesa, + /*r300EmitBlit(rmesa, blit_format, srcPitch, r300GartOffsetFromVirtual(rmesa, texImage->Data), - dstPitch, t->bufAddr, 0, 0, 0, 0, width, height); + dstPitch, t->bufAddr, 0, 0, 0, 0, width, height);*/ } else { /* Data not in GART memory, or bad pitch. */ @@ -262,11 +265,11 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa, /* Blit to framebuffer */ - r300EmitBlit(rmesa, + /*r300EmitBlit(rmesa, blit_format, dstPitch, GET_START(®ion), dstPitch | (t->tile_bits >> 16), - t->bufAddr, 0, 0, 0, done, width, lines); + t->bufAddr, 0, 0, 0, done, width, lines);*/ r300EmitWait(rmesa, R300_WAIT_2D); #ifdef USER_BUFFERS @@ -279,6 +282,8 @@ static void r300UploadRectSubImage(r300ContextPtr rmesa, } } +#endif + /** * Upload the texture image associated with texture \a t at the specified * level at the address relative to \a start. @@ -289,18 +294,14 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, GLuint face) { struct gl_texture_image *texImage = NULL; - GLuint offset; GLint imageWidth, imageHeight; - GLint ret; - drm_radeon_texture_t tex; - drm_radeon_tex_image_t tmp; const int level = hwlevel + t->base.firstLevel; - + if (RADEON_DEBUG & DEBUG_TEXTURE) { fprintf(stderr, - "%s( %p, %p ) level/width/height/face = %d/%d/%d/%u\n", + "%s( %p, %p ) level/x/y/width/height/face = %d/%d/%d/%d/%d/%u\n", __FUNCTION__, (void *)t, (void *)t->base.tObj, level, - width, height, face); + x, y, width, height, face); } ASSERT(face < 6); @@ -326,15 +327,8 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, return; } - if (t->base.tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - assert(level == 0); - assert(hwlevel == 0); - if (RADEON_DEBUG & DEBUG_TEXTURE) - fprintf(stderr, "%s: image data is rectangular\n", - __FUNCTION__); - r300UploadRectSubImage(rmesa, t, texImage, x, y, width, height); - return; - } else if (texImage->IsClientData) { + /* use normal blit for rect textures */ + if (texImage->IsClientData) { if (RADEON_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "%s: image data is in GART client storage\n", @@ -348,10 +342,10 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, imageWidth = texImage->Width; imageHeight = texImage->Height; - - offset = t->bufAddr + t->base.totalSize / 6 * face; - + if (RADEON_DEBUG & (DEBUG_TEXTURE | DEBUG_IOCTL)) { + GLuint offset = t->bufAddr; + GLint imageX = 0; GLint imageY = 0; GLint blitX = t->image[face][hwlevel].x; @@ -366,119 +360,68 @@ static void r300UploadSubImage(r300ContextPtr rmesa, r300TexObjPtr t, (GLuint) offset, hwlevel, level); } - t->image[face][hwlevel].data = texImage->Data; - - /* Init the DRM_RADEON_TEXTURE command / drm_radeon_texture_t struct. - * NOTE: we're always use a 1KB-wide blit and I8 texture format. - * We used to use 1, 2 and 4-byte texels and used to use the texture - * width to dictate the blit width - but that won't work for compressed - * textures. (Brian) - * NOTE: can't do that with texture tiling. (sroland) - */ - tex.offset = offset; - tex.image = &tmp; - /* copy (x,y,width,height,data) */ - memcpy(&tmp, &t->image[face][hwlevel], sizeof(tmp)); - - if (texImage->TexFormat->TexelBytes > 4) { - const int log2TexelBytes = - (3 + (texImage->TexFormat->TexelBytes >> 4)); - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = - MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / - 64, 1); - tex.height = imageHeight; - tex.width = imageWidth << log2TexelBytes; - tex.offset += (tmp.x << log2TexelBytes) & ~1023; - tmp.x = tmp.x % (1024 >> log2TexelBytes); - tmp.width = tmp.width << log2TexelBytes; - } else if (texImage->TexFormat->TexelBytes) { - /* use multi-byte upload scheme */ - tex.height = imageHeight; - tex.width = imageWidth; - switch (texImage->TexFormat->TexelBytes) { - case 1: - tex.format = RADEON_TXFORMAT_I8; - break; - case 2: - tex.format = RADEON_TXFORMAT_AI88; - break; - case 4: - tex.format = RADEON_TXFORMAT_ARGB8888; - break; - } - tex.pitch = - MAX2((texImage->Width * texImage->TexFormat->TexelBytes) / - 64, 1); - tex.offset += tmp.x & ~1023; - tmp.x = tmp.x % 1024; - - if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* need something like "tiled coordinates" ? */ - tmp.y = tmp.x / (tex.pitch * 128) * 2; - tmp.x = - tmp.x % (tex.pitch * 128) / 2 / - texImage->TexFormat->TexelBytes; - tex.pitch |= RADEON_DST_TILE_MICRO >> 22; - } else { - tmp.x = tmp.x >> (texImage->TexFormat->TexelBytes >> 1); - } -#if 1 - if ((t->tile_bits & R300_TXO_MACRO_TILE) && - (texImage->Width * texImage->TexFormat->TexelBytes >= 256) - && ((!(t->tile_bits & R300_TXO_MICRO_TILE) - && (texImage->Height >= 8)) - || (texImage->Height >= 16))) { - /* weird: R200 disables macro tiling if mip width is smaller than 256 bytes, - OR if height is smaller than 8 automatically, but if micro tiling is active - the limit is height 16 instead ? */ - tex.pitch |= RADEON_DST_TILE_MACRO >> 22; - } -#endif - } else { - /* In case of for instance 8x8 texture (2x2 dxt blocks), - padding after the first two blocks is needed (only - with dxt1 since 2 dxt3/dxt5 blocks already use 32 Byte). */ - /* set tex.height to 1/4 since 1 "macropixel" (dxt-block) - has 4 real pixels. Needed so the kernel module reads - the right amount of data. */ - tex.format = RADEON_TXFORMAT_I8; /* any 1-byte texel format */ - tex.pitch = (R300_BLIT_WIDTH_BYTES / 64); - tex.height = (imageHeight + 3) / 4; - tex.width = (imageWidth + 3) / 4; - if ((t->format & R300_TX_FORMAT_DXT1) == R300_TX_FORMAT_DXT1) { - tex.width *= 8; + int done; + + for (done = 0; done < height;) { + + int lines = + MIN2(t->image[face][hwlevel].height - done, RADEON_BUFFER_SIZE / t->image[face][hwlevel].dst_pitch); + int src_pitch; + int dst_pitch; + char *tex; + + dst_pitch = t->image[face][hwlevel].dst_pitch; + src_pitch = t->image[face][hwlevel].src_pitch; + + tex = (char *)texImage->Data + done * src_pitch; + + struct r300_dma_region region; + + memset(®ion, 0, sizeof(region)); + r300AllocDmaRegion(rmesa, ®ion, lines * dst_pitch, + 1024); + + /* Copy texdata to dma: + */ + + if (src_pitch == dst_pitch) { + memcpy(region.address + region.start, tex, + lines * dst_pitch); } else { - tex.width *= 16; - } - } + char *buf = region.address + region.start; + int i; - LOCK_HARDWARE(&rmesa->radeon); - do { - ret = - drmCommandWriteRead(rmesa->radeon.dri.fd, - DRM_RADEON_TEXTURE, &tex, - sizeof(drm_radeon_texture_t)); - if (ret) { - if (RADEON_DEBUG & DEBUG_IOCTL) - fprintf(stderr, - "DRM_RADEON_TEXTURE: again!\n"); - usleep(1); + for (i = 0; i < lines; i++) { + memcpy(buf, tex, src_pitch); + buf += dst_pitch; + tex += src_pitch; + } } - } while (ret == -EAGAIN); + + r300EmitWait(rmesa, R300_WAIT_3D); - UNLOCK_HARDWARE(&rmesa->radeon); - - if (ret) { - fprintf(stderr, "DRM_RADEON_TEXTURE: return = %d\n", ret); - fprintf(stderr, " offset=0x%08x\n", offset); - fprintf(stderr, " image width=%d height=%d\n", - imageWidth, imageHeight); - fprintf(stderr, " blit width=%d height=%d data=%p\n", - t->image[face][hwlevel].width, - t->image[face][hwlevel].height, - t->image[face][hwlevel].data); - _mesa_exit(-1); + /* Blit to framebuffer + */ + r300EmitBlit2D(rmesa, + t->transfer_size, + t->image[face][hwlevel].src_dst_tile, + dst_pitch, + GET_START(®ion), + dst_pitch, + t->bufAddr + t->image[face][hwlevel].dst_offset, + t->image[face][hwlevel].clip_x, t->image[face][hwlevel].clip_x + t->image[face][hwlevel].clip_width, + t->image[face][hwlevel].clip_y, t->image[face][hwlevel].clip_y + t->image[face][hwlevel].clip_height, + t->image[face][hwlevel].x, t->image[face][hwlevel].y + done, + t->image[face][hwlevel].width, lines); + + r300EmitWait(rmesa, R300_WAIT_2D); + +#ifdef USER_BUFFERS + r300_mem_use(rmesa, region.buf->id); +#endif + r300ReleaseDmaRegion(rmesa, ®ion, __FUNCTION__); + + done += lines; } } diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c index 1d2909f..f5aae08 100644 --- a/src/mesa/drivers/dri/r300/r300_texstate.c +++ b/src/mesa/drivers/dri/r300/r300_texstate.c @@ -137,11 +137,21 @@ static void r300SetTexImages(r300ContextPtr rmesa, r300TexObjPtr t = (r300TexObjPtr) tObj->DriverData; const struct gl_texture_image *baseImage = tObj->Image[0][tObj->BaseLevel]; - GLint curOffset, blitWidth; + GLint curOffset; + GLint width_coef, height_coef, transfer_size, min_pitch; GLint i, texelBytes; GLint numLevels; GLint log2Width, log2Height, log2Depth; - + + int tile_idx, face_cnt;; + + const GLint micro_width_size[] = { 8, 4, 4, 2, 2 }; + const GLint micro_height_size[] = { 4, 4, 2, 2, 1 }; + + const GLint macro_width_size[] = { 64, 32, 32, 16, 16 }; + const GLint macro_height_size[] = { 32, 32, 16, 16, 8 }; + + /* Set the hardware texture format */ if (!t->image_override @@ -164,134 +174,224 @@ static void r300SetTexImages(r300ContextPtr rmesa, log2Depth = tObj->Image[0][t->base.firstLevel]->DepthLog2; numLevels = t->base.lastLevel - t->base.firstLevel + 1; - + assert(numLevels <= RADEON_MAX_TEXTURE_LEVELS); /* Calculate mipmap offsets and dimensions for blitting (uploading) * The idea is that we lay out the mipmap levels within a block of - * memory organized as a rectangle of width BLIT_WIDTH_BYTES. + * memory organized as a rectangle. */ curOffset = 0; - blitWidth = R300_BLIT_WIDTH_BYTES; t->tile_bits = 0; + + /* micro tiling will by allway on + * if texture is less then minimal micro tile it will be aligned to this size + * + * macro tiling will be turned on only for textures with size which is valid for + * macro tiling + */ - /* figure out if this texture is suitable for tiling. */ -#if 0 /* Disabled for now */ - if (texelBytes) { - if ((tObj->Target != GL_TEXTURE_RECTANGLE_NV) && - /* texrect might be able to use micro tiling too in theory? */ - (baseImage->Height > 1)) { - - /* allow 32 (bytes) x 1 mip (which will use two times the space - the non-tiled version would use) max if base texture is large enough */ - if ((numLevels == 1) || - (((baseImage->Width * texelBytes / - baseImage->Height) <= 32) - && (baseImage->Width * texelBytes > 64)) - || - ((baseImage->Width * texelBytes / - baseImage->Height) <= 16)) { - t->tile_bits |= R300_TXO_MICRO_TILE; + /* 2D blit transfer size */ + transfer_size = 1; + /* height coeficient for compresed textures */ + height_coef = 1; + /* widt coeficient - for DXT3/5 and 8 byte textures */ + width_coef = 1; + /* minimum hardware pitch */ + min_pitch = 8; + + assert(texelBytes >= 0 && texelBytes <= 16); + + switch (texelBytes) { + case 1: + tile_idx = 0; + break; + case 2: + transfer_size = 2; + tile_idx = 1; + break; + case 4: + transfer_size = 4; + tile_idx = 2; + min_pitch = 16; + break; + case 8: + transfer_size = 4; + width_coef = 2; + tile_idx = 3; + min_pitch = 16; + break; + case 16: + transfer_size = 4; + width_coef = 4; + tile_idx = 4; + min_pitch = 32; + break; + case 0: + default: + if ((tObj->Image[0][t->base.firstLevel]->IsCompressed)) { + height_coef = 4; + transfer_size = 2; + if ((t->format & R300_TX_FORMAT_DXT1) != R300_TX_FORMAT_DXT1) { + width_coef = 2; + tile_idx = 4; + min_pitch = 32; + } else { + min_pitch = 16; + tile_idx = 3; + } + } - } - - if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { - /* we can set macro tiling even for small textures, they will be untiled anyway */ - t->tile_bits |= R300_TXO_MACRO_TILE; - } + break; } -#endif - + + assert(tile_idx >= 0); + + if (tObj->Target == GL_TEXTURE_CUBE_MAP) + face_cnt = 6; + else + face_cnt = 1; + + t->transfer_size = transfer_size; + t->tile_bits = 0; + for (i = 0; i < numLevels; i++) { const struct gl_texture_image *texImage; GLuint size; - + texImage = tObj->Image[0][i + t->base.firstLevel]; - if (!texImage) - break; - - /* find image size in bytes */ - if (texImage->IsCompressed) { - if ((t->format & R300_TX_FORMAT_DXT1) == - R300_TX_FORMAT_DXT1) { - // fprintf(stderr,"DXT 1 %d %08X\n", texImage->Width, t->format); - if ((texImage->Width + 3) < 8) /* width one block */ - size = texImage->CompressedSize * 4; - else if ((texImage->Width + 3) < 16) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } else { - /* DXT3/5, 16 bytes per block */ - WARN_ONCE - ("DXT 3/5 suffers from multitexturing problems!\n"); - // fprintf(stderr,"DXT 3/5 %d\n", texImage->Width); - if ((texImage->Width + 3) < 8) - size = texImage->CompressedSize * 2; - else - size = texImage->CompressedSize; - } - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - size = - ((texImage->Width * texelBytes + - 63) & ~63) * texImage->Height; - blitWidth = 64 / texelBytes; - } else if (t->tile_bits & R300_TXO_MICRO_TILE) { - /* tile pattern is 16 bytes x2. mipmaps stay 32 byte aligned, - though the actual offset may be different (if texture is less than - 32 bytes width) to the untiled case */ - int w = (texImage->Width * texelBytes * 2 + 31) & ~31; - size = - (w * ((texImage->Height + 1) / 2)) * - texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } else { - int w = (texImage->Width * texelBytes + 31) & ~31; - size = w * texImage->Height * texImage->Depth; - blitWidth = MAX2(texImage->Width, 64 / texelBytes); - } - assert(size > 0); - + if (RADEON_DEBUG & DEBUG_TEXTURE) fprintf(stderr, "w=%d h=%d d=%d tb=%d intFormat=%d\n", texImage->Width, texImage->Height, texImage->Depth, texImage->TexFormat->TexelBytes, - texImage->InternalFormat); - - /* Align to 32-byte offset. It is faster to do this unconditionally - * (no branch penalty). - */ - + texImage->InternalFormat); + + t->image[0][i].x = 0; + t->image[0][i].y = 0; + + t->image[0][i].clip_x = 0; + t->image[0][i].clip_y = 0; + + /* blit clipping and source pitch */ + if (texImage->IsCompressed) { + /* for compressed textures minimum sorce pitch is 16 or 8 bytes */ + if ((t->format & R300_TX_FORMAT_DXT1) != R300_TX_FORMAT_DXT1) { + t->image[0][i].clip_width = MAX2(width_coef * texImage->Width, 16); + t->image[0][i].src_pitch = MAX2(t->image[0][i].src_pitch = texImage->Width * 4, 16); + } else { + t->image[0][i].clip_width = MAX2(width_coef * texImage->Width, 8); + t->image[0][i].src_pitch = MAX2(t->image[0][i].src_pitch = texImage->Width * 2, 8); + } + } else { + t->image[0][i].clip_width = width_coef * texImage->Width; + t->image[0][i].src_pitch = texImage->Width * texelBytes; + } + + t->image[0][i].clip_height = texImage->Height / height_coef; + t->image[0][i].clip_height = MAX2(t->image[0][i].clip_height, 1); + + /* align offset */ curOffset = (curOffset + 0x1f) & ~0x1f; - - if (texelBytes) { - /* fix x and y coords up later together with offset */ - t->image[0][i].x = curOffset; - t->image[0][i].y = 0; - t->image[0][i].width = - MIN2(size / texelBytes, blitWidth); - t->image[0][i].height = - (size / texelBytes) / t->image[0][i].width; + + t->image[0][i].dst_offset = curOffset; + + /* set both - later it will be disable if tiling can not be performed */ + t->image[0][i].src_dst_tile |= (R300_TXO_MACRO_TILE >> 2) | (R300_TXO_MICRO_TILE >> 2); + + /* two byte forma textures need special tiling */ + if (!texImage->IsCompressed && t->transfer_size == 2) + t->image[0][i].src_dst_tile |= (R300_TXO_UNKNOWN_TILE >> 2); + + /* check macro tiling */ + if (texImage->IsCompressed && + (texImage->Width < 64 || texImage->Height < 64)) + t->image[0][i].src_dst_tile &= ~(R300_TXO_MACRO_TILE >> 2); + else if (texImage->Width < macro_width_size[tile_idx] || + texImage->Height < macro_height_size[tile_idx] || + texImage->Width % macro_width_size[tile_idx] != 0 || + texImage->Height % macro_height_size[tile_idx] != 0 || + tile_idx == 4) + t->image[0][i].src_dst_tile &= ~(R300_TXO_MACRO_TILE >> 2); + + /* DXT3, DXT5 are not micro tiled */ + if ((texImage->IsCompressed) && ((t->format & R300_TX_FORMAT_DXT1) != R300_TX_FORMAT_DXT1)) + t->image[0][i].src_dst_tile &= ~(R300_TXO_MICRO_TILE >> 2); + + if (tObj->Target != GL_TEXTURE_RECTANGLE_NV) { + /* for micro tiling pad texture to micro tile */ + t->image[0][i].width = (texImage->Width + (micro_width_size[tile_idx]) - 1) & ~(micro_width_size[tile_idx] - 1); } else { - t->image[0][i].x = curOffset % R300_BLIT_WIDTH_BYTES; - t->image[0][i].y = curOffset / R300_BLIT_WIDTH_BYTES; - t->image[0][i].width = - MIN2(size, R300_BLIT_WIDTH_BYTES); - t->image[0][i].height = size / t->image[0][i].width; + t->image[0][i].width = ((texImage->Width + 15) & ~15); + } + + t->image[0][i].height = texImage->Height; + + t->image[0][i].width = t->image[0][i].width * width_coef; + + /* compressed textures have other minimum width */ + if (texImage->IsCompressed) + t->image[0][i].width = MAX2(t->image[0][i].width, 4 * width_coef); + + t->image[0][i].height = t->image[0][i].height / height_coef; + + t->image[0][i].height = MAX2(t->image[0][i].height, 1); + + /* pad textures to micro tile */ + int num_tiles_x = (t->image[0][i].width / height_coef) / micro_width_size[tile_idx]; + if ((t->image[0][i].width / height_coef) % micro_width_size[tile_idx]) + num_tiles_x++; + + int num_tiles_y = t->image[0][i].height / micro_height_size[tile_idx]; + if (t->image[0][i].height % micro_height_size[tile_idx]) + num_tiles_y++; + + num_tiles_x = MAX2(num_tiles_x, 1); + num_tiles_y = MAX2(num_tiles_y, 1); + + // size of micro tile is always 128 bytes, macro 2048 + if (texImage->IsCompressed) + size = num_tiles_x * num_tiles_y * micro_width_size[tile_idx] * micro_height_size[tile_idx] * t->transfer_size * height_coef; + else + size = num_tiles_x * num_tiles_y * micro_width_size[tile_idx] * micro_height_size[tile_idx] * t->transfer_size; + + size = MAX2(size, 32); + + /* micro tiled texure size is paded to 32, but others needs to be aligned to 32 too */ + size = (size + 0x1f) & ~0x1f; + + assert(size > 0); + + /* set first level tiling to hw */ + if (i == 0) { + t->tile_bits = (t->image[0][i].src_dst_tile << 2); + + /* two byte format textures need special tiling - do not set micro tiling */ + if (!texImage->IsCompressed && t->transfer_size == 2) + t->tile_bits &= ~R300_TXO_MICRO_TILE; } + + t->image[0][i].dst_offset = curOffset; - if (RADEON_DEBUG & DEBUG_TEXTURE) + /* set blit pitch - used for pitch texture register too */ + t->image[0][i].dst_pitch = MAX2(t->image[0][i].width * t->transfer_size, min_pitch); + + if (RADEON_DEBUG & DEBUG_TEXTURE) { fprintf(stderr, - "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\n", + "level %d: %dx%d x=%d y=%d w=%d h=%d size=%d at %d\ncx=%d cy=%d cw=%d ch=%d\n", i, texImage->Width, texImage->Height, t->image[0][i].x, t->image[0][i].y, t->image[0][i].width, t->image[0][i].height, - size, curOffset); + size, curOffset, + t->image[0][i].clip_x, t->image[0][i].clip_y, + t->image[0][i].clip_width, t->image[0][i].clip_height); + } - curOffset += size; + t->image[0][i].size = size; + curOffset += size * face_cnt; } - + /* Align the total size of texture memory block. */ t->base.totalSize = @@ -305,8 +405,16 @@ static void r300SetTexImages(r300ContextPtr rmesa, t->image[face][i].x = t->image[0][i].x; t->image[face][i].y = t->image[0][i].y; t->image[face][i].width = t->image[0][i].width; - t->image[face][i].height = - t->image[0][i].height; + t->image[face][i].height = t->image[0][i].height; + + t->image[face][i].dst_offset = t->image[0][i].size * face + t->image[0][i].dst_offset; + t->image[face][i].src_pitch = t->image[0][i].src_pitch; + t->image[face][i].dst_pitch = t->image[0][i].dst_pitch; + t->image[face][i].src_dst_tile = t->image[0][i].src_dst_tile; + t->image[face][i].clip_x = t->image[0][i].clip_x; + t->image[face][i].clip_y = t->image[0][i].clip_y; + t->image[face][i].clip_width = t->image[0][i].clip_width; + t->image[face][i].clip_height = t->image[0][i].clip_height; } } t->base.totalSize *= 6; /* total texmem needed */ @@ -323,27 +431,13 @@ static void r300SetTexImages(r300ContextPtr rmesa, | ((tObj->Image[0][t->base.firstLevel]->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)) | ((numLevels - 1) << R300_TX_MAX_MIP_LEVEL_SHIFT); - - /* Only need to round to nearest 32 for textures, but the blitter - * requires 64-byte aligned pitches, and we may/may not need the - * blitter. NPOT only! - */ - if (baseImage->IsCompressed) { - t->pitch = - (tObj->Image[0][t->base.firstLevel]->Width + 63) & ~(63); - } else if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { - unsigned int align = blitWidth - 1; - t->pitch = ((tObj->Image[0][t->base.firstLevel]->Width * - texelBytes) + 63) & ~(63); + + /* set pitch */ + t->pitch = t->image[0][0].dst_pitch; + if (tObj->Target == GL_TEXTURE_RECTANGLE_NV) { t->size |= R300_TX_SIZE_TXPITCH_EN; if (!t->image_override) - t->pitch_reg = - (((tObj->Image[0][t->base.firstLevel]->Width) + - align) & ~align) - 1; - } else { - t->pitch = - ((tObj->Image[0][t->base.firstLevel]->Width * - texelBytes) + 63) & ~(63); + t->pitch_reg = t->pitch / (t->transfer_size * width_coef) - 1; } t->dirty_state = TEX_ALL; -- 1.5.2.4