diff --git a/src/atidri.c b/src/atidri.c index 93de504..183aa0c 100644 --- a/src/atidri.c +++ b/src/atidri.c @@ -785,6 +785,7 @@ static Bool ATIDRIAgpInit( ScreenPtr pSc pATIDRIServer->agpMode = ATI_DEFAULT_AGP_MODE; pATIDRIServer->bufferSize = ATI_DEFAULT_BUFFER_SIZE; pATIDRIServer->ringSize = 16; /* 16 kB ring */ + pATIDRIServer->ringSize = 32; /* hijack for sysblit */ if ( drmAgpAcquire( pATI->drmFD ) < 0 ) { xf86DrvMsg( pScreen->myNum, X_WARNING, "[agp] AGP not available\n" ); diff --git a/src/atimach64exa.c b/src/atimach64exa.c index 6a9c473..5f8b9c7 100644 --- a/src/atimach64exa.c +++ b/src/atimach64exa.c @@ -57,6 +57,7 @@ #include "config.h" #endif #include +#include #include "ati.h" #include "atichip.h" @@ -387,6 +388,44 @@ ( static void Mach64DoneSolid(PixmapPtr pPixmap) { } +#if 1 +typedef unsigned long cairo_perf_ticks_t; + +typedef struct _cairo_perf_timer +{ + struct timeval tv_start; + struct timeval tv_stop; +} cairo_perf_timer_t; + +static cairo_perf_timer_t timer; + +void +cairo_perf_timer_start (void) { + gettimeofday (&timer.tv_start, NULL); +} + +void +cairo_perf_timer_stop (void) { + gettimeofday (&timer.tv_stop, NULL); +} + +cairo_perf_ticks_t +cairo_perf_timer_elapsed (void) { + cairo_perf_ticks_t ticks; + + ticks = (timer.tv_stop.tv_sec - timer.tv_start.tv_sec) * 1000000; + ticks += (timer.tv_stop.tv_usec - timer.tv_start.tv_usec); + + return ticks; +} + +static double up = 0.0, dn = 0.0; +static unsigned ut = 0, dt = 0; +#endif + +#include "drm.h" +#include "mach64_drm.h" + /* * Memcpy-based UTS. */ @@ -394,17 +433,78 @@ static Bool Mach64UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch) { + ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum]; + ATIPtr pATI = ATIPTR(pScreenInfo); + char *dst = pDst->devPrivate.ptr; + int dst_offset = exaGetPixmapOffset(pDst); int dst_pitch = exaGetPixmapPitch(pDst); int bpp = pDst->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; + drm_mach64_sysblit_t blit; + int ret; + + cairo_perf_ticks_t ticks_blit; + double t; + int v = h; + + dst_offset += (x * cpp) + (y * dst_pitch); + + if (!pATI->directRenderingEnabled) + goto uts_fallback; + + if (w * h <= 32 * 32 || h <= 32 || wBytes <= 64) + goto uts_fallback; + + if ((((unsigned long)src & 3) != (dst_offset & 3)) || + ((src_pitch & 3) != (dst_pitch & 3))) + goto uts_fallback; + + if (h > 1024 || wBytes > 4096) + goto uts_fallback; + + blit.h = h; + blit.w_bytes = wBytes; + blit.fb_addr = dst_offset; + blit.fb_pitch = dst_pitch; + blit.mem_addr = src; + blit.mem_pitch = src_pitch; + blit.to_fb = 1; + exaWaitSync(pDst->drawable.pScreen); + cairo_perf_timer_start(); + + ret = drmCommandWrite(pATI->drmFD, DRM_MACH64_SYSBLIT, &blit, + sizeof(blit)); + + cairo_perf_timer_stop(); + + ticks_blit = cairo_perf_timer_elapsed(); + +#if 1 + t = 1.0 * v * wBytes / ticks_blit; + + if (ret == 0) { + ut++; + up += t; + ErrorF("Up (%4d,%4d) %6.1f MB/s %9lu %6.1f MB/s\n", wBytes, v, t, ticks_blit, up / ut); + } +#endif + + ErrorF("Up ret=%d\n", ret); + + if (ret == 0) + return TRUE; + +uts_fallback: dst += (x * cpp) + (y * dst_pitch); + exaWaitSync(pDst->drawable.pScreen); + while (h--) { memcpy(dst, src, wBytes); src += src_pitch; @@ -421,17 +521,93 @@ static Bool Mach64DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch) { + ScrnInfoPtr pScreenInfo = xf86Screens[pSrc->drawable.pScreen->myNum]; + ATIPtr pATI = ATIPTR(pScreenInfo); + char *src = pSrc->devPrivate.ptr; + int src_offset = exaGetPixmapOffset(pSrc); int src_pitch = exaGetPixmapPitch(pSrc); int bpp = pSrc->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; + drm_mach64_sysblit_t blit; + int ret; + + int v = h; + cairo_perf_ticks_t ticks_blit; + double t; + + src_offset += (x * cpp) + (y * src_pitch); + + if (!pATI->directRenderingEnabled) + goto dfs_fallback; + + /* Fallback for small or narrow pixmaps; narrow pixmaps are filtered + * because each line has a fixed overhead of 16 bytes for the descriptor, + * mach64 also dictates a pitch of 64 bytes. + * + * TODO: profile for cutoffs + */ + if (w * h <= 24 * 24 || h <= 8 || wBytes <= 64) + goto dfs_fallback; + + /* The byte alignment of the two addresses, i.e. bits dst_offset[1:0] and + * bits src[1:0] should match. + */ + if ((((unsigned long)dst & 3) != (src_offset & 3)) || + ((src_pitch & 3) != (dst_pitch & 3))) + goto dfs_fallback; + + /* A descriptor entry consists of 4 DWORDs, i.e. 16 bytes, and 4 KB can be + * transfered per descriptor. The following check allows for a pixmap as + * large as (1024,1024). For a minimum PAGE_SIZE of 4 KB, each line spans + * at most 2 pages, i.e. a total of 2K descriptors are required or a + * descriptor table of 32 KB. + */ + if (h > 1024 || wBytes > 4096) + goto dfs_fallback; + + blit.h = h; + blit.w_bytes = wBytes; + blit.fb_addr = src_offset; + blit.fb_pitch = src_pitch; + blit.mem_addr = dst; + blit.mem_pitch = dst_pitch; + blit.to_fb = 0; + exaWaitSync(pSrc->drawable.pScreen); + cairo_perf_timer_start(); + + ret = drmCommandWrite(pATI->drmFD, DRM_MACH64_SYSBLIT, &blit, + sizeof(blit)); + + cairo_perf_timer_stop(); + + ticks_blit = cairo_perf_timer_elapsed(); + +#if 1 + t = 1.0 * v * wBytes / ticks_blit; + + if (ret == 0) { + dt++; + dn += t; + ErrorF("Dn (%4d,%4d) %6.1f MB/s %9lu %6.1f MB/s\n", wBytes, v, t, ticks_blit, dn / dt); + } +#endif + + ErrorF("Dn ret=%d\n", ret); + + if (ret == 0) + return TRUE; + +dfs_fallback: src += (x * cpp) + (y * src_pitch); + exaWaitSync(pSrc->drawable.pScreen); + while (h--) { memcpy(dst, src, wBytes); src += src_pitch;