From 5cf988c19590162338fc53cb5abb6589372709b8 Mon Sep 17 00:00:00 2001 From: Zetok Zalbavar Date: Thu, 1 Sep 2016 21:18:09 +0100 Subject: [PATCH] Revert "drm/radeon: Allow write-combined CPU mappings of BOs in GTT (v2)" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 02376d8282b88f07d0716da6155094c8760b1a13. Fixes kernel freeze/crash of radeon driver on my R9 290 GPU after DPM resume failure. With commit reverted I've noticed slight microstutters that happen from time to time when gaming – they're fairly infrequent and barely noticeable, so they don't really matter, given that way more important problem is fixed. Found thanks to bisect from Kajzer @ freedesktop bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66963#c270 --- drivers/gpu/drm/radeon/cik.c | 6 +- drivers/gpu/drm/radeon/cik_sdma.c | 46 ++++-- drivers/gpu/drm/radeon/evergreen.c | 12 +- drivers/gpu/drm/radeon/r600.c | 6 +- drivers/gpu/drm/radeon/radeon.h | 3 +- drivers/gpu/drm/radeon/radeon_benchmark.c | 4 +- drivers/gpu/drm/radeon/radeon_device.c | 3 +- drivers/gpu/drm/radeon/radeon_fb.c | 3 +- drivers/gpu/drm/radeon/radeon_gart.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 19 ++- drivers/gpu/drm/radeon/radeon_object.c | 74 +-------- drivers/gpu/drm/radeon/radeon_object.h | 5 +- drivers/gpu/drm/radeon/radeon_prime.c | 6 +- drivers/gpu/drm/radeon/radeon_ring.c | 248 +++++++++++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_sa.c | 4 +- drivers/gpu/drm/radeon/radeon_test.c | 5 +- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 15 +- drivers/gpu/drm/radeon/radeon_vce.c | 4 + drivers/gpu/drm/radeon/radeon_vm.c | 14 +- drivers/gpu/drm/radeon/si_dma.c | 48 +++++- 21 files changed, 386 insertions(+), 143 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 8ac82df..7ba749f 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4459,7 +4459,7 @@ static int cik_mec_init(struct radeon_device *rdev) r = radeon_bo_create(rdev, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, + RADEON_GEM_DOMAIN_GTT, NULL, &rdev->mec.hpd_eop_obj); if (r) { dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -4625,8 +4625,8 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) r = radeon_bo_create(rdev, sizeof(struct bonaire_mqd), PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, - NULL, &rdev->ring[idx].mqd_obj); + RADEON_GEM_DOMAIN_GTT, NULL, + &rdev->ring[idx].mqd_obj); if (r) { dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); return r; diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 9c351dc..b4fb2a1 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -847,19 +847,39 @@ void cik_sdma_vm_write_pages(struct radeon_device *rdev, uint64_t value; unsigned ndw; - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; - - /* for non-physically contiguous pages (system) */ - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, - SDMA_WRITE_SUB_OPCODE_LINEAR, 0); - ib->ptr[ib->length_dw++] = pe; - ib->ptr[ib->length_dw++] = upper_32_bits(pe); - ib->ptr[ib->length_dw++] = ndw; - for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & R600_PTE_SYSTEM) { + trace_radeon_vm_set_page(pe, addr, count, incr, flags); + + if (flags == R600_PTE_GART) { + uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; + while (count) { + unsigned bytes = count * 8; + if (bytes > 0x1FFFF8) + bytes = 0x1FFFF8; + + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = bytes; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(src); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + + pe += bytes; + src += bytes; + count -= bytes / 8; + } + } else if (flags & R600_PTE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe); + ib->ptr[ib->length_dw++] = ndw; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { value = radeon_vm_map_gart(rdev, addr); } else if (flags & R600_PTE_VALID) { value = addr; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 34f7a29..76c0485 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4254,8 +4254,7 @@ int sumo_rlc_init(struct radeon_device *rdev) /* save restore block */ if (rdev->rlc.save_restore_obj == NULL) { r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, - NULL, &rdev->rlc.save_restore_obj); + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); return r; @@ -4333,8 +4332,7 @@ int sumo_rlc_init(struct radeon_device *rdev) if (rdev->rlc.clear_state_obj == NULL) { r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, - NULL, &rdev->rlc.clear_state_obj); + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); sumo_rlc_fini(rdev); @@ -4408,10 +4406,8 @@ int sumo_rlc_init(struct radeon_device *rdev) if (rdev->rlc.cp_table_size) { if (rdev->rlc.cp_table_obj == NULL) { - r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, - PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, - NULL, &rdev->rlc.cp_table_obj); + r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r); sumo_rlc_fini(rdev); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index f86ab69..926226a 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1504,7 +1504,7 @@ int r600_vram_scratch_init(struct radeon_device *rdev) if (rdev->vram_scratch.robj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - 0, NULL, NULL, &rdev->vram_scratch.robj); + NULL, &rdev->vram_scratch.robj); if (r) { return r; } @@ -3443,8 +3443,8 @@ int r600_ih_ring_alloc(struct radeon_device *rdev) if (rdev->ih.ring_obj == NULL) { r = radeon_bo_create(rdev, rdev->ih.ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, - NULL, NULL, &rdev->ih.ring_obj); + RADEON_GEM_DOMAIN_GTT, + NULL, &rdev->ih.ring_obj); if (r) { DRM_ERROR("radeon: failed to create ih ring buffer (%d).\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 007be29..5c35dbd 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -491,7 +491,6 @@ struct radeon_bo { struct ttm_placement placement; struct ttm_buffer_object tbo; struct ttm_bo_kmap_obj kmap; - u32 flags; unsigned pin_count; void *kptr; u32 tiling_flags; @@ -575,7 +574,7 @@ int radeon_gem_init(struct radeon_device *rdev); void radeon_gem_fini(struct radeon_device *rdev); int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size, int alignment, int initial_domain, - u32 flags, bool kernel, + bool discardable, bool kernel, struct drm_gem_object **obj); int radeon_mode_dumb_create(struct drm_file *file_priv, diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c index 87d5fb2..d45570a 100644 --- a/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -94,7 +94,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, int time; n = RADEON_BENCHMARK_ITERATIONS; - r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, 0, NULL, NULL, &sobj); + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, NULL, &sobj); if (r) { goto out_cleanup; } @@ -106,7 +106,7 @@ static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size, if (r) { goto out_cleanup; } - r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, 0, NULL, NULL, &dobj); + r = radeon_bo_create(rdev, size, PAGE_SIZE, true, ddomain, NULL, &dobj); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index d0826fb..42f57f5 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -465,8 +465,7 @@ int radeon_wb_init(struct radeon_device *rdev) if (rdev->wb.wb_obj == NULL) { r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, - &rdev->wb.wb_obj); + RADEON_GEM_DOMAIN_GTT, NULL, &rdev->wb.wb_obj); if (r) { dev_warn(rdev->dev, "(%d) create WB bo failed\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 0e3143a..9e469ae 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -126,7 +126,8 @@ static int radeonfb_create_pinned_object(struct radeon_fbdev *rfbdev, aligned_size = ALIGN(size, PAGE_SIZE); ret = radeon_gem_object_create(rdev, aligned_size, 0, RADEON_GEM_DOMAIN_VRAM, - 0, true, &gobj); + false, true, + &gobj); if (ret) { printk(KERN_ERR "failed to allocate framebuffer (%d)\n", aligned_size); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index c4777c8..5208df7 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -128,7 +128,7 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev) if (rdev->gart.robj == NULL) { r = radeon_bo_create(rdev, rdev->gart.table_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - 0, NULL, NULL, &rdev->gart.robj); + NULL, &rdev->gart.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index e26c963..6faf464 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -43,7 +43,7 @@ void radeon_gem_object_free(struct drm_gem_object *gobj) int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size, int alignment, int initial_domain, - u32 flags, bool kernel, + bool discardable, bool kernel, struct drm_gem_object **obj) { struct radeon_bo *robj; @@ -67,8 +67,7 @@ int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size, } retry: - r = radeon_bo_create(rdev, size, alignment, kernel, initial_domain, - flags, NULL, NULL, &robj); + r = radeon_bo_create(rdev, size, alignment, kernel, initial_domain, NULL, &robj); if (r) { if (r != -ERESTARTSYS) { if (initial_domain == RADEON_GEM_DOMAIN_VRAM) { @@ -258,8 +257,8 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, /* create a gem object to contain this object in */ args->size = roundup(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, args->alignment, - args->initial_domain, args->flags, - false, &gobj); + args->initial_domain, false, + false, &gobj); if (r) { up_read(&rdev->exclusive_lock); r = radeon_gem_handle_lockup(rdev, r); @@ -636,6 +635,11 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, args->operation = RADEON_VA_RESULT_ERROR; return -EINVAL; } + if (!(args->flags & RADEON_VM_PAGE_SNOOPED)) { + dev_err(&dev->pdev->dev, "only supported snooped mapping for now\n"); + args->operation = RADEON_VA_RESULT_ERROR; + return -EINVAL; + } switch (args->operation) { case RADEON_VA_MAP: @@ -750,8 +754,9 @@ int radeon_mode_dumb_create(struct drm_file *file_priv, args->size = ALIGN(args->size, PAGE_SIZE); r = radeon_gem_object_create(rdev, args->size, 0, - RADEON_GEM_DOMAIN_VRAM, 0, - false, &gobj); + RADEON_GEM_DOMAIN_VRAM, + false, ttm_bo_type_device, + &gobj); if (r) return -ENOMEM; diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 2d901bf..9003640 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -119,17 +119,8 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) } if (domain & RADEON_GEM_DOMAIN_GTT) { - if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_TT; - - } else if ((rbo->flags & RADEON_GEM_GTT_WC) || - (rbo->rdev->flags & RADEON_IS_AGP)) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_WC | - TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_TT; + if (rbo->rdev->flags & RADEON_IS_AGP) { + rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_TT; } else { rbo->placements[c].fpfn = 0; rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | @@ -138,17 +129,8 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) } if (domain & RADEON_GEM_DOMAIN_CPU) { - if (rbo->flags & RADEON_GEM_GTT_UC) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_SYSTEM; - - } else if ((rbo->flags & RADEON_GEM_GTT_WC) || - rbo->rdev->flags & RADEON_IS_AGP) { - rbo->placements[c].fpfn = 0; - rbo->placements[c++].flags = TTM_PL_FLAG_WC | - TTM_PL_FLAG_UNCACHED | - TTM_PL_FLAG_SYSTEM; + if (rbo->rdev->flags & RADEON_IS_AGP) { + rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_SYSTEM; } else { rbo->placements[c].fpfn = 0; rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | @@ -176,10 +158,8 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) } int radeon_bo_create(struct radeon_device *rdev, - unsigned long size, int byte_align, bool kernel, - u32 domain, u32 flags, struct sg_table *sg, - struct reservation_object *resv, - struct radeon_bo **bo_ptr) + unsigned long size, int byte_align, bool kernel, u32 domain, + struct sg_table *sg, struct radeon_bo **bo_ptr) { struct radeon_bo *bo; enum ttm_bo_type type; @@ -214,46 +194,8 @@ int radeon_bo_create(struct radeon_device *rdev, INIT_LIST_HEAD(&bo->list); INIT_LIST_HEAD(&bo->va); bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | - RADEON_GEM_DOMAIN_GTT | - RADEON_GEM_DOMAIN_CPU); - - bo->flags = flags; - /* PCI GART is always snooped */ - if (!(rdev->flags & RADEON_IS_PCIE)) - bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); - - /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx - * See https://bugs.freedesktop.org/show_bug.cgi?id=91268 - */ - if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635) - bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); - -#ifdef CONFIG_X86_32 - /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit - * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 - */ - bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); -#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) - /* Don't try to enable write-combining when it can't work, or things - * may be slow - * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 - */ - -#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ - thanks to write-combining - - if (bo->flags & RADEON_GEM_GTT_WC) - DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " - "better performance thanks to write-combining\n"); - bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); -#else - /* For architectures that don't support WC memory, - * mask out the WC flag from the BO - */ - if (!drm_arch_can_wc_memory()) - bo->flags &= ~RADEON_GEM_GTT_WC; -#endif - + RADEON_GEM_DOMAIN_GTT | + RADEON_GEM_DOMAIN_CPU); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ down_read(&rdev->pm.mclk_lock); diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index d8d295e..a3a27d4 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -124,7 +124,7 @@ extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, extern int radeon_bo_create(struct radeon_device *rdev, unsigned long size, int byte_align, - bool kernel, u32 domain, u32 flags, + bool kernel, u32 domain, struct sg_table *sg, struct reservation_object *resv, struct radeon_bo **bo_ptr); @@ -172,8 +172,7 @@ static inline void * radeon_sa_bo_cpu_addr(struct radeon_sa_bo *sa_bo) extern int radeon_sa_bo_manager_init(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, - unsigned size, u32 align, u32 domain, - u32 flags); + unsigned size, u32 align, u32 domain); extern void radeon_sa_bo_manager_fini(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager); extern int radeon_sa_bo_manager_start(struct radeon_device *rdev, diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index f3609c9..6c86fbf 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -66,10 +66,8 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, struct radeon_bo *bo; int ret; - ww_mutex_lock(&resv->lock, NULL); - ret = radeon_bo_create(rdev, attach->dmabuf->size, PAGE_SIZE, false, - RADEON_GEM_DOMAIN_GTT, 0, sg, resv, &bo); - ww_mutex_unlock(&resv->lock); + ret = radeon_bo_create(rdev, size, PAGE_SIZE, false, + RADEON_GEM_DOMAIN_GTT, sg, &bo); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 8c78723..a83a973 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -28,6 +28,252 @@ */ #include #include "radeon.h" +#include "atom.h" + +/* + * IB + * IBs (Indirect Buffers) and areas of GPU accessible memory where + * commands are stored. You can put a pointer to the IB in the + * command ring and the hw will fetch the commands from the IB + * and execute them. Generally userspace acceleration drivers + * produce command buffers which are send to the kernel and + * put in IBs for execution by the requested ring. + */ +static int radeon_debugfs_sa_init(struct radeon_device *rdev); + +/** + * radeon_ib_get - request an IB (Indirect Buffer) + * + * @rdev: radeon_device pointer + * @ring: ring index the IB is associated with + * @ib: IB object returned + * @size: requested IB size + * + * Request an IB (all asics). IBs are allocated using the + * suballocator. + * Returns 0 on success, error on failure. + */ +int radeon_ib_get(struct radeon_device *rdev, int ring, + struct radeon_ib *ib, struct radeon_vm *vm, + unsigned size) +{ + int r; + + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256); + if (r) { + dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); + return r; + } + + r = radeon_semaphore_create(rdev, &ib->semaphore); + if (r) { + return r; + } + + ib->ring = ring; + ib->fence = NULL; + ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); + ib->vm = vm; + if (vm) { + /* ib pool is bound at RADEON_VA_IB_OFFSET in virtual address + * space and soffset is the offset inside the pool bo + */ + ib->gpu_addr = ib->sa_bo->soffset + RADEON_VA_IB_OFFSET; + } else { + ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); + } + ib->is_const_ib = false; + + return 0; +} + +/** + * radeon_ib_free - free an IB (Indirect Buffer) + * + * @rdev: radeon_device pointer + * @ib: IB object to free + * + * Free an IB (all asics). + */ +void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) +{ + radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); + radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); + radeon_fence_unref(&ib->fence); +} + +/** + * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring + * + * @rdev: radeon_device pointer + * @ib: IB object to schedule + * @const_ib: Const IB to schedule (SI only) + * + * Schedule an IB on the associated ring (all asics). + * Returns 0 on success, error on failure. + * + * On SI, there are two parallel engines fed from the primary ring, + * the CE (Constant Engine) and the DE (Drawing Engine). Since + * resource descriptors have moved to memory, the CE allows you to + * prime the caches while the DE is updating register state so that + * the resource descriptors will be already in cache when the draw is + * processed. To accomplish this, the userspace driver submits two + * IBs, one for the CE and one for the DE. If there is a CE IB (called + * a CONST_IB), it will be put on the ring prior to the DE IB. Prior + * to SI there was just a DE IB. + */ +int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, + struct radeon_ib *const_ib) +{ + struct radeon_ring *ring = &rdev->ring[ib->ring]; + int r = 0; + + if (!ib->length_dw || !ring->ready) { + /* TODO: Nothings in the ib we should report. */ + dev_err(rdev->dev, "couldn't schedule ib\n"); + return -EINVAL; + } + + /* 64 dwords should be enough for fence too */ + r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_SYNCS * 8); + if (r) { + dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); + return r; + } + + /* grab a vm id if necessary */ + if (ib->vm) { + struct radeon_fence *vm_id_fence; + vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring); + radeon_semaphore_sync_to(ib->semaphore, vm_id_fence); + } + + /* sync with other rings */ + r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to sync rings (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + if (ib->vm) + radeon_vm_flush(rdev, ib->vm, ib->ring); + + if (const_ib) { + radeon_ring_ib_execute(rdev, const_ib->ring, const_ib); + radeon_semaphore_free(rdev, &const_ib->semaphore, NULL); + } + radeon_ring_ib_execute(rdev, ib->ring, ib); + r = radeon_fence_emit(rdev, &ib->fence, ib->ring); + if (r) { + dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); + radeon_ring_unlock_undo(rdev, ring); + return r; + } + if (const_ib) { + const_ib->fence = radeon_fence_ref(ib->fence); + } + + if (ib->vm) + radeon_vm_fence(rdev, ib->vm, ib->fence); + + radeon_ring_unlock_commit(rdev, ring); + return 0; +} + +/** + * radeon_ib_pool_init - Init the IB (Indirect Buffer) pool + * + * @rdev: radeon_device pointer + * + * Initialize the suballocator to manage a pool of memory + * for use as IBs (all asics). + * Returns 0 on success, error on failure. + */ +int radeon_ib_pool_init(struct radeon_device *rdev) +{ + int r; + + if (rdev->ib_pool_ready) { + return 0; + } + r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, + RADEON_IB_POOL_SIZE*64*1024, + RADEON_GPU_PAGE_SIZE, + RADEON_GEM_DOMAIN_GTT); + if (r) { + return r; + } + + r = radeon_sa_bo_manager_start(rdev, &rdev->ring_tmp_bo); + if (r) { + return r; + } + + rdev->ib_pool_ready = true; + if (radeon_debugfs_sa_init(rdev)) { + dev_err(rdev->dev, "failed to register debugfs file for SA\n"); + } + return 0; +} + +/** + * radeon_ib_pool_fini - Free the IB (Indirect Buffer) pool + * + * @rdev: radeon_device pointer + * + * Tear down the suballocator managing the pool of memory + * for use as IBs (all asics). + */ +void radeon_ib_pool_fini(struct radeon_device *rdev) +{ + if (rdev->ib_pool_ready) { + radeon_sa_bo_manager_suspend(rdev, &rdev->ring_tmp_bo); + radeon_sa_bo_manager_fini(rdev, &rdev->ring_tmp_bo); + rdev->ib_pool_ready = false; + } +} + +/** + * radeon_ib_ring_tests - test IBs on the rings + * + * @rdev: radeon_device pointer + * + * Test an IB (Indirect Buffer) on each ring. + * If the test fails, disable the ring. + * Returns 0 on success, error if the primary GFX ring + * IB test fails. + */ +int radeon_ib_ring_tests(struct radeon_device *rdev) +{ + unsigned i; + int r; + + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + struct radeon_ring *ring = &rdev->ring[i]; + + if (!ring->ready) + continue; + + r = radeon_ib_test(rdev, i, ring); + if (r) { + ring->ready = false; + rdev->needs_reset = false; + + if (i == RADEON_RING_TYPE_GFX_INDEX) { + /* oh, oh, that's really bad */ + DRM_ERROR("radeon: failed testing IB on GFX ring (%d).\n", r); + rdev->accel_working = false; + return r; + + } else { + /* still not good, but we can live with it */ + DRM_ERROR("radeon: failed testing IB on ring %d (%d).\n", i, r); + } + } + } + return 0; +} /* * Rings @@ -383,7 +629,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, + RADEON_GEM_DOMAIN_GTT, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c index 197b157..15fd572 100644 --- a/drivers/gpu/drm/radeon/radeon_sa.c +++ b/drivers/gpu/drm/radeon/radeon_sa.c @@ -49,7 +49,7 @@ static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager); int radeon_sa_bo_manager_init(struct radeon_device *rdev, struct radeon_sa_manager *sa_manager, - unsigned size, u32 align, u32 domain, u32 flags) + unsigned size, u32 align, u32 domain) { int i, r; @@ -65,7 +65,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev, } r = radeon_bo_create(rdev, size, align, true, - domain, flags, NULL, NULL, &sa_manager->bo); + domain, NULL, &sa_manager->bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate bo for manager\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 79181816..cf5aace 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -67,7 +67,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) } r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM, - 0, NULL, NULL, &vram_obj); + NULL, &vram_obj); if (r) { DRM_ERROR("Failed to create VRAM object\n"); goto out_cleanup; @@ -87,8 +87,7 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag) struct radeon_fence *fence = NULL; r = radeon_bo_create(rdev, size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL, - gtt_obj + i); + RADEON_GEM_DOMAIN_GTT, NULL, gtt_obj + i); if (r) { DRM_ERROR("Failed to create GTT object %d\n", i); goto out_lclean; diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 90f7394..b8d61bd 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -897,7 +897,7 @@ int radeon_ttm_init(struct radeon_device *rdev) radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->stollen_vga_memory); if (r) { return r; diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6fe9e4e..578381b 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -141,8 +141,7 @@ int radeon_uvd_init(struct radeon_device *rdev) RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE + RADEON_GPU_PAGE_SIZE; r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, - NULL, &rdev->uvd.vcpu_bo); + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo); if (r) { dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r); return r; @@ -734,8 +733,10 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - RADEON_GPU_PAGE_SIZE; - uint32_t *msg = rdev->uvd.cpu_addr + offs; - uint64_t addr = rdev->uvd.gpu_addr + offs; + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); + if (r) + return r; int r, i; @@ -770,8 +771,10 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - RADEON_GPU_PAGE_SIZE; - uint32_t *msg = rdev->uvd.cpu_addr + offs; - uint64_t addr = rdev->uvd.gpu_addr + offs; + r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true, + RADEON_GEM_DOMAIN_VRAM, NULL, &bo); + if (r) + return r; int r, i; diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index c1c619f..04387c6 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -140,8 +140,12 @@ int radeon_vce_init(struct radeon_device *rdev) else size = vce_v2_0_bo_size(rdev); r = radeon_bo_create(rdev, size, PAGE_SIZE, true, +<<<<<<< HEAD RADEON_GEM_DOMAIN_VRAM, 0, NULL, NULL, &rdev->vce.vcpu_bo); +======= + RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo); +>>>>>>> parent of 02376d8... drm/radeon: Allow write-combined CPU mappings of BOs in GTT (v2) if (r) { dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index a135874..d4b6ed5 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -548,8 +548,7 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, RADEON_GPU_PAGE_SIZE, true, - RADEON_GEM_DOMAIN_VRAM, 0, - NULL, NULL, &pt); + RADEON_GEM_DOMAIN_VRAM, NULL, &pt); if (r) return r; @@ -940,10 +939,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, bo_va->flags &= ~RADEON_VM_PAGE_VALID; bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; - bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; - if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) - bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; - if (mem) { addr = mem->start << PAGE_SHIFT; if (mem->mem_type != TTM_PL_SYSTEM) { @@ -951,9 +946,6 @@ int radeon_vm_bo_update(struct radeon_device *rdev, } if (mem->mem_type == TTM_PL_TT) { bo_va->flags |= RADEON_VM_PAGE_SYSTEM; - if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) - bo_va->flags |= RADEON_VM_PAGE_SNOOPED; - } else { addr += rdev->vm_manager.vram_base_offset; } @@ -1203,8 +1195,8 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) } r = radeon_bo_create(rdev, pd_size, align, true, - RADEON_GEM_DOMAIN_VRAM, 0, NULL, - NULL, &vm->page_directory); + RADEON_GEM_DOMAIN_VRAM, NULL, + &vm->page_directory); if (r) return r; diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 8320792..74aae93 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -111,10 +111,50 @@ void si_dma_vm_write_pages(struct radeon_device *rdev, uint64_t value; unsigned ndw; - while (count) { - ndw = count * 2; - if (ndw > 0xFFFFE) - ndw = 0xFFFFE; + trace_radeon_vm_set_page(pe, addr, count, incr, flags); + + if (flags == R600_PTE_GART) { + uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; + while (count) { + unsigned bytes = count * 8; + if (bytes > 0xFFFF8) + bytes = 0xFFFF8; + + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, + 1, 0, 0, bytes); + ib->ptr[ib->length_dw++] = lower_32_bits(pe); + ib->ptr[ib->length_dw++] = lower_32_bits(src); + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; + + pe += bytes; + src += bytes; + count -= bytes / 8; + } + } else if (flags & R600_PTE_SYSTEM) { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; + + /* for non-physically contiguous pages (system) */ + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + for (; ndw > 0; ndw -= 2, --count, pe += 8) { + value = radeon_vm_map_gart(rdev, addr); + value &= 0xFFFFFFFFFFFFF000ULL; + addr += incr; + value |= flags; + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); + } + } + } else { + while (count) { + ndw = count * 2; + if (ndw > 0xFFFFE) + ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); -- 2.7.3