diff --git a/src/aticonfig.c b/src/aticonfig.c index 9823652..9630559 100644 --- a/src/aticonfig.c +++ b/src/aticonfig.c @@ -147,6 +147,7 @@ # define ShadowFB PublicOption[AT # define SWCursor PublicOption[ATI_OPTION_SWCURSOR].value.bool # define AccelMethod PublicOption[ATI_OPTION_ACCELMETHOD].value.str # define RenderAccel PublicOption[ATI_OPTION_RENDER_ACCEL].value.bool +# define UpDownAccel PublicOption[ATI_OPTION_UPDOWN_ACCEL].value.bool # define LCDSync PrivateOption[ATI_OPTION_LCDSYNC].value.bool # define ReferenceClock \ @@ -314,6 +315,10 @@ #if defined(USE_EXA) pATI->RenderAccelEnabled = FALSE; if (pATI->useEXA && RenderAccel) pATI->RenderAccelEnabled = TRUE; + + pATI->UpDownAccelEnabled = FALSE; + if (pATI->useEXA && UpDownAccel) + pATI->UpDownAccelEnabled = TRUE; #endif } diff --git a/src/atidri.c b/src/atidri.c index 2b80859..7c0e64c 100644 --- a/src/atidri.c +++ b/src/atidri.c @@ -814,7 +814,7 @@ static Bool ATIDRIAgpInit( ScreenPtr pSc pATIDRIServer->agpSize = ATI_DEFAULT_AGP_SIZE; pATIDRIServer->agpMode = ATI_DEFAULT_AGP_MODE; pATIDRIServer->bufferSize = ATI_DEFAULT_BUFFER_SIZE; - pATIDRIServer->ringSize = 16; /* 16 kB ring */ + pATIDRIServer->ringSize = 32; /* 32 kB ring */ if ( drmAgpAcquire( pATI->drmFD ) < 0 ) { xf86DrvMsg( pScreen->myNum, X_WARNING, "[agp] AGP not available\n" ); @@ -978,7 +978,7 @@ static Bool ATIDRIPciInit( ScreenPtr pSc ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo; pATIDRIServer->bufferSize = ATI_DEFAULT_BUFFER_SIZE; - pATIDRIServer->ringSize = 16; /* 16 kB ring */ + pATIDRIServer->ringSize = 32; /* 32 kB ring */ if ( !ATIDRISetBufSize( pScreen, (unsigned)(-1) ) ) return FALSE; diff --git a/src/atilock.c b/src/atilock.c index fd93128..83ca948 100644 --- a/src/atilock.c +++ b/src/atilock.c @@ -82,8 +82,15 @@ #endif /* AVOID_CPIO */ #ifdef XF86DRI_DEVEL if (pATI->irq > 0) + { + /* Enable VBLANK interrupt - handled by DRM */ outr(CRTC_INT_CNTL, (inr(CRTC_INT_CNTL) & ~CRTC_INT_ACKS) | - CRTC_VBLANK_INT_EN); /* Enable VBLANK interrupt - handled by DRM */ + CRTC_VBLANK_INT_EN); + + /* Enable BM_EOL interrupt - handled by DRM */ + outr(CRTC_INT_CNTL, (inr(CRTC_INT_CNTL) & ~CRTC_INT_ACKS) | + CRTC_BUSMASTER_EOL_INT_EN); + } #endif /* XF86DRI_DEVEL */ diff --git a/src/atimach64exa.c b/src/atimach64exa.c index 6a9c473..dbc9833 100644 --- a/src/atimach64exa.c +++ b/src/atimach64exa.c @@ -69,6 +69,7 @@ #include "atiregs.h" #ifdef XF86DRI_DEVEL #include "mach64_dri.h" #include "mach64_sarea.h" +#include "mach64_common.h" #endif #ifdef USE_EXA @@ -388,19 +389,73 @@ ( static void Mach64DoneSolid(PixmapPtr pPixmap) { } /* - * Memcpy-based UTS. + * DMA BitBlt based UTS/DFS. + * + * EXA hits more optimized paths when it does not have to fallback because of + * missing UTS/DFS. Fallback to local memcpy-based UTS/DFS when dmablit fails. */ static Bool Mach64UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch) { + ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum]; + ATIPtr pATI = ATIPTR(pScreenInfo); + char *dst = pDst->devPrivate.ptr; + int dst_offset = exaGetPixmapOffset(pDst); int dst_pitch = exaGetPixmapPitch(pDst); int bpp = pDst->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; +#ifdef XF86DRI_DEVEL + + drmMach64SysBlit blit; + int ret; + + dst_offset += (x * cpp) + (y * dst_pitch); + + if (!pATI->UpDownAccelEnabled) + goto uts_fallback; + + /* Fallback for small or narrow pixmaps: + * memcpy-based UTS gets 155 MB/s on average, the following allows only for + * pixmaps large enough to get an average throughput of 125 MB/s which is + * close to what memcpy gets, while offloading the CPU. + */ + if (h <= 64 || wBytes <= 256) + goto uts_fallback; + + /* Byte alignments should match */ + if ((((unsigned long)src & 3) != (dst_offset & 3)) || + ((src_pitch & 3) != (dst_pitch & 3))) + goto uts_fallback; + + /* Pixmap transfer should require at most 2K DMA descriptors */ + if (h > 1024 || wBytes > 4096) + goto uts_fallback; + + ATIDRISync(pScreenInfo); + + blit.h = h; + blit.w_bytes = wBytes; + blit.fb_addr = dst_offset; + blit.fb_pitch = dst_pitch; + blit.mem_addr = src; + blit.mem_pitch = src_pitch; + blit.to_fb = 1; + + ret = drmCommandWrite(pATI->drmFD, DRM_MACH64_SYSBLIT, &blit, + sizeof(blit)); + + if (ret == 0) + return TRUE; + +#endif /* XF86DRI_DEVEL */ + +uts_fallback: /* memcpy-based UTS */ + exaWaitSync(pDst->drawable.pScreen); dst += (x * cpp) + (y * dst_pitch); @@ -414,20 +469,68 @@ Mach64UploadToScreen(PixmapPtr pDst, int return TRUE; } -/* - * Memcpy-based DFS. - */ static Bool Mach64DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch) { + ScrnInfoPtr pScreenInfo = xf86Screens[pSrc->drawable.pScreen->myNum]; + ATIPtr pATI = ATIPTR(pScreenInfo); + char *src = pSrc->devPrivate.ptr; + int src_offset = exaGetPixmapOffset(pSrc); int src_pitch = exaGetPixmapPitch(pSrc); int bpp = pSrc->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; +#ifdef XF86DRI_DEVEL + + drmMach64SysBlit blit; + int ret; + + src_offset += (x * cpp) + (y * src_pitch); + + if (!pATI->UpDownAccelEnabled) + goto dfs_fallback; + + /* Fallback for small or narrow pixmaps: + * memcpy-based DFS gets 6.5 MB/s on average, the following allows for + * a wide range of short or big pixmaps for which dmablit gets around + * 13 MB/s and 72 MB/s respectively. + */ + if (w * h <= 24 * 24 || h <= 0 || wBytes <= 64) + goto dfs_fallback; + + /* Byte alignments should match */ + if ((((unsigned long)dst & 3) != (src_offset & 3)) || + ((src_pitch & 3) != (dst_pitch & 3))) + goto dfs_fallback; + + /* Pixmap transfer should require at most 2K DMA descriptors */ + if (h > 1024 || wBytes > 4096) + goto dfs_fallback; + + ATIDRISync(pScreenInfo); + + blit.h = h; + blit.w_bytes = wBytes; + blit.fb_addr = src_offset; + blit.fb_pitch = src_pitch; + blit.mem_addr = dst; + blit.mem_pitch = dst_pitch; + blit.to_fb = 0; + + ret = drmCommandWrite(pATI->drmFD, DRM_MACH64_SYSBLIT, &blit, + sizeof(blit)); + + if (ret == 0) + return TRUE; + +#endif /* XF86DRI_DEVEL */ + +dfs_fallback: /* memcpy-based DFS */ + exaWaitSync(pSrc->drawable.pScreen); src += (x * cpp) + (y * src_pitch); @@ -659,9 +762,6 @@ Bool ATIMach64ExaInit(ScreenPtr pScreen) pExa->Copy = Mach64Copy; pExa->DoneCopy = Mach64DoneCopy; - /* EXA hits more optimized paths when it does not have to fallback because - * of missing UTS/DFS, hook memcpy-based UTS/DFS. - */ pExa->UploadToScreen = Mach64UploadToScreen; pExa->DownloadFromScreen = Mach64DownloadFromScreen; diff --git a/src/atioption.c b/src/atioption.c index 42fbd56..c755304 100644 --- a/src/atioption.c +++ b/src/atioption.c @@ -220,6 +220,13 @@ #endif /* TV_OUT */ FALSE }, { + ATI_OPTION_UPDOWN_ACCEL, + "UpDownAccel", + OPTV_BOOLEAN, + {0, }, + FALSE + }, + { -1, NULL, OPTV_NONE, diff --git a/src/atioption.h b/src/atioption.h index 4bdfb04..1d0201f 100644 --- a/src/atioption.h +++ b/src/atioption.h @@ -64,7 +64,8 @@ #endif /* TV_OUT */ ATI_OPTION_SHADOW_FB, ATI_OPTION_SWCURSOR, ATI_OPTION_ACCELMETHOD, - ATI_OPTION_RENDER_ACCEL + ATI_OPTION_RENDER_ACCEL, + ATI_OPTION_UPDOWN_ACCEL } ATIPublicOptionType; #ifdef TV_OUT diff --git a/src/atiscreen.c b/src/atiscreen.c index e849941..c6f1b0c 100644 --- a/src/atiscreen.c +++ b/src/atiscreen.c @@ -612,6 +612,33 @@ #ifdef XF86DRI_DEVEL #endif /* XF86DRI_DEVEL */ +#ifdef USE_EXA + + if (pATI->useEXA) { + + /* Check for UTS/DFS acceleration after IRQ installation */ + if (pATI->UpDownAccelEnabled) { + +#ifdef XF86DRI_DEVEL + + if (!pATI->directRenderingEnabled || (pATI->irq <= 0)) + +#endif /* XF86DRI_DEVEL */ + + { + xf86DrvMsg(pScreen->myNum, X_INFO, + "Data transfer acceleration between the host memory and " + "the framebuffer requires enabling DRM and IRQ.\n"); + pATI->UpDownAccelEnabled = FALSE; + } + } + + xf86DrvMsg(pScreen->myNum, X_INFO, "Data transfer acceleration %s\n", + pATI->UpDownAccelEnabled ? "enabled" : "disabled"); + } + +#endif /* USE_EXA */ + return TRUE; } diff --git a/src/atistruct.h b/src/atistruct.h index 39a7e98..03c361a 100644 --- a/src/atistruct.h +++ b/src/atistruct.h @@ -309,6 +309,7 @@ #ifdef USE_XAA #endif #ifdef USE_EXA Bool RenderAccelEnabled; + Bool UpDownAccelEnabled; Mach64ContextRegs3D m3d; #endif diff --git a/src/mach64_common.h b/src/mach64_common.h index f1f765a..3f783e1 100644 --- a/src/mach64_common.h +++ b/src/mach64_common.h @@ -46,6 +46,7 @@ #define DRM_MACH64_VERTEX 0x05 #define DRM_MACH64_BLIT 0x06 #define DRM_MACH64_FLUSH 0x07 #define DRM_MACH64_GETPARAM 0x08 +#define DRM_MACH64_SYSBLIT 0x09 /* Buffer flags for clears */ @@ -127,4 +128,17 @@ typedef struct { #define MACH64_PARAM_FRAMES_QUEUED 1 #define MACH64_PARAM_IRQ_NR 2 +typedef struct drm_mach64_sysblit { + unsigned int h; + unsigned int w_bytes; + + unsigned int fb_addr; + unsigned int fb_pitch; + + char *mem_addr; + unsigned int mem_pitch; + + int to_fb; +} drmMach64SysBlit; + #endif /* __MACH64_COMMON_H__ */