diff --git a/src/atidri.c b/src/atidri.c index 01d449a..8b4ede3 100644 --- a/src/atidri.c +++ b/src/atidri.c @@ -785,6 +785,7 @@ static Bool ATIDRIAgpInit( ScreenPtr pSc pATIDRIServer->agpMode = ATI_DEFAULT_AGP_MODE; pATIDRIServer->bufferSize = ATI_DEFAULT_BUFFER_SIZE; pATIDRIServer->ringSize = 16; /* 16 kB ring */ + pATIDRIServer->sgSize = 32; /* 32 kB sg list */ if ( drmAgpAcquire( pATI->drmFD ) < 0 ) { xf86DrvMsg( pScreen->myNum, X_WARNING, "[agp] AGP not available\n" ); @@ -817,6 +818,9 @@ static Bool ATIDRIAgpInit( ScreenPtr pSc xf86DrvMsg(pScreen->myNum, X_INFO, "[agp] Using %d kB for DMA descriptor ring\n", pATIDRIServer->ringSize); + xf86DrvMsg(pScreen->myNum, X_INFO, + "[agp] Using %d kB for DMA descriptor sg-list\n", pATIDRIServer->sgSize); + if (pATI->OptionBufferSize) { if (pATI->OptionBufferSize < 1 || pATI->OptionBufferSize > pATIDRIServer->agpSize ) { xf86DrvMsg( pScreen->myNum, X_ERROR, "[agp] Illegal DMA buffers size: %d MB\n", @@ -844,8 +848,12 @@ static Bool ATIDRIAgpInit( ScreenPtr pSc pATIDRIServer->ringStart = pATIDRIServer->agpOffset; pATIDRIServer->ringMapSize = pATIDRIServer->ringSize*1024; /* ringSize is in kB */ + /* Reserve space for the DMA descriptor sg-list */ + pATIDRIServer->sgStart = pATIDRIServer->ringStart + pATIDRIServer->ringMapSize; + pATIDRIServer->sgMapSize = pATIDRIServer->sgSize*1024; /* sgSize is in kB */ + /* Reserve space for the vertex buffer */ - pATIDRIServer->bufferStart = pATIDRIServer->ringStart + pATIDRIServer->ringMapSize; + pATIDRIServer->bufferStart = pATIDRIServer->sgStart + pATIDRIServer->sgMapSize; pATIDRIServer->bufferMapSize = pATIDRIServer->bufferSize*1024*1024; /* Reserve the rest for AGP textures */ @@ -880,6 +888,27 @@ static Bool ATIDRIAgpInit( ScreenPtr pSc "[agp] Ring mapped at 0x%08lx\n", (unsigned long)pATIDRIServer->ringMap ); + /* Map DMA descriptor sg-list */ + if ( drmAddMap( pATI->drmFD, pATIDRIServer->sgStart, pATIDRIServer->sgMapSize, + DRM_AGP, DRM_RESTRICTED, &pATIDRIServer->sgHandle ) < 0 ) { + xf86DrvMsg( pScreen->myNum, X_ERROR, + "[agp] Could not add sg-list mapping\n" ); + return FALSE; + } + xf86DrvMsg( pScreen->myNum, X_INFO, + "[agp] sg-list handle = 0x%08x\n", + pATIDRIServer->sgHandle ); + + if ( drmMap( pATI->drmFD, pATIDRIServer->sgHandle, + pATIDRIServer->sgMapSize, &pATIDRIServer->sgMap ) < 0 ) { + xf86DrvMsg( pScreen->myNum, X_ERROR, + "[agp] Could not map sg-list\n" ); + return FALSE; + } + xf86DrvMsg( pScreen->myNum, X_INFO, + "[agp] Ring mapped at 0x%08lx\n", + (unsigned long)pATIDRIServer->sgMap ); + /* Map vertex buffers */ if ( drmAddMap( pATI->drmFD, pATIDRIServer->bufferStart, pATIDRIServer->bufferMapSize, DRM_AGP, 0, &pATIDRIServer->bufferHandle ) < 0 ) { @@ -1012,6 +1041,7 @@ static Bool ATIDRIKernelInit( ScreenPtr info.fb_offset = pATI->LinearBase; info.mmio_offset = pATIDRIServer->regsHandle; info.ring_offset = pATIDRIServer->ringHandle; + info.sg_offset = pATIDRIServer->sgHandle; info.buffers_offset = pATIDRIServer->bufferHandle; info.agp_textures_offset = pATIDRIServer->agpTexHandle; diff --git a/src/atilock.c b/src/atilock.c index fd93128..83ca948 100644 --- a/src/atilock.c +++ b/src/atilock.c @@ -82,8 +82,15 @@ #endif /* AVOID_CPIO */ #ifdef XF86DRI_DEVEL if (pATI->irq > 0) + { + /* Enable VBLANK interrupt - handled by DRM */ outr(CRTC_INT_CNTL, (inr(CRTC_INT_CNTL) & ~CRTC_INT_ACKS) | - CRTC_VBLANK_INT_EN); /* Enable VBLANK interrupt - handled by DRM */ + CRTC_VBLANK_INT_EN); + + /* Enable BM_EOL interrupt - handled by DRM */ + outr(CRTC_INT_CNTL, (inr(CRTC_INT_CNTL) & ~CRTC_INT_ACKS) | + CRTC_BUSMASTER_EOL_INT_EN); + } #endif /* XF86DRI_DEVEL */ diff --git a/src/atimach64exa.c b/src/atimach64exa.c index 6a9c473..662796b 100644 --- a/src/atimach64exa.c +++ b/src/atimach64exa.c @@ -57,6 +57,7 @@ #include "config.h" #endif #include +#include #include "ati.h" #include "atichip.h" @@ -387,24 +388,135 @@ ( static void Mach64DoneSolid(PixmapPtr pPixmap) { } -/* - * Memcpy-based UTS. - */ +#if 1 +typedef unsigned long cairo_perf_ticks_t; + +typedef struct _cairo_perf_timer +{ + struct timeval tv_start; + struct timeval tv_stop; +} cairo_perf_timer_t; + +static cairo_perf_timer_t timer; + +void +cairo_perf_timer_start (void) { + gettimeofday (&timer.tv_start, NULL); +} + +void +cairo_perf_timer_stop (void) { + gettimeofday (&timer.tv_stop, NULL); +} + +cairo_perf_ticks_t +cairo_perf_timer_elapsed (void) { + cairo_perf_ticks_t ticks; + + ticks = (timer.tv_stop.tv_sec - timer.tv_start.tv_sec) * 1000000; + ticks += (timer.tv_stop.tv_usec - timer.tv_start.tv_usec); + + return ticks; +} + +static double up = 0.0, dn = 0.0; +static unsigned ut = 0, dt = 0; +#endif + +#include "drm.h" +#include "mach64_drm.h" + static Bool Mach64UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch) { + ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum]; + ATIPtr pATI = ATIPTR(pScreenInfo); + char *dst = pDst->devPrivate.ptr; + int dst_offset = exaGetPixmapOffset(pDst); int dst_pitch = exaGetPixmapPitch(pDst); int bpp = pDst->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; + drm_mach64_sysblit_t blit; + int ret; + + cairo_perf_ticks_t ticks_blit; + double t; + int v = h; + +#ifdef XF86DRI_DEVEL + + dst_offset += (x * cpp) + (y * dst_pitch); + + if (!pATI->directRenderingEnabled) + goto uts_fallback; + + if (pATI->irq <= 0) + goto uts_fallback; + + /* Fallback for small or narrow pixmaps: + * memcpy-based UTS gets 155 MB/s on average, the following allows only for + * pixmaps large enough to get an average throughput of 125 MB/s which is + * close to what memcpy gets, while offloading the CPU. + */ + if (h <= 64 || wBytes <= 256) + goto uts_fallback; + + /* Byte alignments should match */ + if ((((unsigned long)src & 3) != (dst_offset & 3)) || + ((src_pitch & 3) != (dst_pitch & 3))) + goto uts_fallback; + + /* Pixmap transfer should require at most 2K DMA descriptors */ + if (h > 1024 || wBytes > 4096) + goto uts_fallback; + + blit.h = h; + blit.w_bytes = wBytes; + blit.fb_addr = dst_offset; + blit.fb_pitch = dst_pitch; + blit.mem_addr = src; + blit.mem_pitch = src_pitch; + blit.to_fb = 1; + exaWaitSync(pDst->drawable.pScreen); + cairo_perf_timer_start(); + + ret = drmCommandWrite(pATI->drmFD, DRM_MACH64_SYSBLIT, &blit, + sizeof(blit)); + + cairo_perf_timer_stop(); + + ticks_blit = cairo_perf_timer_elapsed(); + +#if 0 + t = 1.0 * v * wBytes / ticks_blit; + + if (ret == 0) { + ut++; + up += t; + ErrorF("Up (%4d,%4d) %6.1f MB/s %9lu %6.1f MB/s\n", wBytes, v, t, ticks_blit, up / ut); + } + + ErrorF("Up ret=%d\n", ret); +#endif + + if (ret == 0) + return TRUE; + +#endif /* XF86DRI_DEVEL */ + +uts_fallback: /* memcpy-based UTS */ + dst += (x * cpp) + (y * dst_pitch); + exaWaitSync(pDst->drawable.pScreen); + while (h--) { memcpy(dst, src, wBytes); src += src_pitch; @@ -414,24 +526,97 @@ Mach64UploadToScreen(PixmapPtr pDst, int return TRUE; } -/* - * Memcpy-based DFS. - */ static Bool Mach64DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch) { + ScrnInfoPtr pScreenInfo = xf86Screens[pSrc->drawable.pScreen->myNum]; + ATIPtr pATI = ATIPTR(pScreenInfo); + char *src = pSrc->devPrivate.ptr; + int src_offset = exaGetPixmapOffset(pSrc); int src_pitch = exaGetPixmapPitch(pSrc); int bpp = pSrc->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; + drm_mach64_sysblit_t blit; + int ret; + + int v = h; + cairo_perf_ticks_t ticks_blit; + double t; + +#ifdef XF86DRI_DEVEL + + src_offset += (x * cpp) + (y * src_pitch); + + if (!pATI->directRenderingEnabled) + goto dfs_fallback; + + if (pATI->irq <= 0) + goto dfs_fallback; + + /* Fallback for small or narrow pixmaps: + * memcpy-based DFS gets 6.5 MB/s on average, the following allows for + * a wide range of short or big pixmaps for which dmablit gets around + * 13 MB/s and 72 MB/s respectively. + */ + if (w * h <= 24 * 24 || h <= 0 || wBytes <= 64) + goto dfs_fallback; + + /* Byte alignments should match */ + if ((((unsigned long)dst & 3) != (src_offset & 3)) || + ((src_pitch & 3) != (dst_pitch & 3))) + goto dfs_fallback; + + /* Pixmap transfer should require at most 2K DMA descriptors */ + if (h > 1024 || wBytes > 4096) + goto dfs_fallback; + + blit.h = h; + blit.w_bytes = wBytes; + blit.fb_addr = src_offset; + blit.fb_pitch = src_pitch; + blit.mem_addr = dst; + blit.mem_pitch = dst_pitch; + blit.to_fb = 0; + exaWaitSync(pSrc->drawable.pScreen); + cairo_perf_timer_start(); + + ret = drmCommandWrite(pATI->drmFD, DRM_MACH64_SYSBLIT, &blit, + sizeof(blit)); + + cairo_perf_timer_stop(); + + ticks_blit = cairo_perf_timer_elapsed(); + +#if 0 + t = 1.0 * v * wBytes / ticks_blit; + + if (ret == 0) { + dt++; + dn += t; + ErrorF("Dn (%4d,%4d) %6.1f MB/s %9lu %6.1f MB/s\n", wBytes, v, t, ticks_blit, dn / dt); + } + + ErrorF("Dn ret=%d\n", ret); +#endif + + if (ret == 0) + return TRUE; + +#endif /* XF86DRI_DEVEL */ + +dfs_fallback: /* memcpy-based DFS */ + src += (x * cpp) + (y * src_pitch); + exaWaitSync(pSrc->drawable.pScreen); + while (h--) { memcpy(dst, src, wBytes); src += src_pitch; diff --git a/src/mach64_common.h b/src/mach64_common.h index 1fb765a..d213658 100644 --- a/src/mach64_common.h +++ b/src/mach64_common.h @@ -92,6 +92,7 @@ typedef struct { unsigned long fb_offset; unsigned long mmio_offset; unsigned long ring_offset; + unsigned long sg_offset; unsigned long buffers_offset; unsigned long agp_textures_offset; } drmMach64Init; diff --git a/src/mach64_dri.h b/src/mach64_dri.h index 7061931..8c78523 100644 --- a/src/mach64_dri.h +++ b/src/mach64_dri.h @@ -47,14 +47,21 @@ typedef struct { /* DMA descriptor ring */ unsigned long ringStart; /* Offset into AGP space */ - drm_handle_t ringHandle; /* Handle from drmAddMap */ + drm_handle_t ringHandle; /* Handle from drmAddMap */ drmSize ringMapSize; /* Size of map */ int ringSize; /* Size of ring (in kB) */ drmAddress ringMap; /* Map */ + /* DMA descriptor sg-list */ + unsigned long sgStart; /* Offset into AGP space */ + drm_handle_t sgHandle; /* Handle from drmAddMap */ + drmSize sgMapSize; /* Size of map */ + int sgSize; /* Size of sg-list (in kB) */ + drmAddress sgMap; /* Map */ + /* vertex buffer data */ unsigned long bufferStart; /* Offset into AGP space */ - drm_handle_t bufferHandle; /* Handle from drmAddMap */ + drm_handle_t bufferHandle; /* Handle from drmAddMap */ drmSize bufferMapSize; /* Size of map */ int bufferSize; /* Size of buffers (in MB) */ drmAddress bufferMap; /* Map */ @@ -64,7 +71,7 @@ typedef struct { /* AGP Texture data */ unsigned long agpTexStart; /* Offset into AGP space */ - drm_handle_t agpTexHandle; /* Handle from drmAddMap */ + drm_handle_t agpTexHandle; /* Handle from drmAddMap */ drmSize agpTexMapSize; /* Size of map */ int agpTexSize; /* Size of AGP tex space (in MB) */ drmAddress agpTexMap; /* Map */