From 229ce00ff2f8e62a0a16d4d898fe043e93388eb5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 31 Jan 2013 13:35:05 -0500 Subject: [PATCH 2/2] drm/radeon: use IBs for VM page table updates For very large page table updates, we can exceed the size of the ring. To avoid this, use an IB to perform the page table update. Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni.c | 30 +++++++++++-------- drivers/gpu/drm/radeon/radeon.h | 6 ++- drivers/gpu/drm/radeon/radeon_asic.h | 8 ++++- drivers/gpu/drm/radeon/radeon_gart.c | 45 +++++++++++++++++++++++------ drivers/gpu/drm/radeon/si.c | 52 ++++++++++++++++++--------------- 5 files changed, 90 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 835992d..4defaf5 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1843,19 +1843,21 @@ uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags) * cayman_vm_set_page - update the page tables using the CP * * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * - * Update the page tables using the CP (cayman-si). + * Update the page tables using the CP (cayman/TN). */ -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void cayman_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); uint64_t value; unsigned ndw; @@ -1866,9 +1868,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, if (ndw > 0x3FFF) ndw = 0x3FFF; - radeon_ring_write(ring, PACKET3(PACKET3_ME_WRITE, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_ME_WRITE, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; for (; ndw > 1; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -1880,8 +1882,8 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { @@ -1891,9 +1893,9 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -1905,10 +1907,12 @@ void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); } } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index a08f657..cd7b0e5 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1179,7 +1179,9 @@ struct radeon_asic { void (*fini)(struct radeon_device *rdev); u32 pt_ring_index; - void (*set_page)(struct radeon_device *rdev, uint64_t pe, + void (*set_page)(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); } vm; @@ -1801,7 +1803,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) -#define radeon_asic_vm_set_page(rdev, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (pe), (addr), (count), (incr), (flags))) +#define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) #define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp)) #define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp)) #define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp)) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 15d70e6..8bfb821 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -473,7 +473,9 @@ int cayman_vm_init(struct radeon_device *rdev); void cayman_vm_fini(struct radeon_device *rdev); void cayman_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); uint32_t cayman_vm_page_flags(struct radeon_device *rdev, uint32_t flags); -void cayman_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void cayman_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); int evergreen_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib); @@ -503,7 +505,9 @@ int si_irq_set(struct radeon_device *rdev); int si_irq_process(struct radeon_device *rdev); int si_vm_init(struct radeon_device *rdev); void si_vm_fini(struct radeon_device *rdev); -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void si_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 6e24f84..d95c61d 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -929,6 +929,7 @@ uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) */ static int radeon_vm_update_pdes(struct radeon_device *rdev, struct radeon_vm *vm, + struct radeon_ib *ib, uint64_t start, uint64_t end) { static const uint32_t incr = RADEON_VM_PTE_COUNT * 8; @@ -971,7 +972,7 @@ retry: ((last_pt + incr * count) != pt)) { if (count) { - radeon_asic_vm_set_page(rdev, last_pde, + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, incr, RADEON_VM_PAGE_VALID); } @@ -985,7 +986,7 @@ retry: } if (count) { - radeon_asic_vm_set_page(rdev, last_pde, last_pt, count, + radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count, incr, RADEON_VM_PAGE_VALID); } @@ -1009,6 +1010,7 @@ retry: */ static void radeon_vm_update_ptes(struct radeon_device *rdev, struct radeon_vm *vm, + struct radeon_ib *ib, uint64_t start, uint64_t end, uint64_t dst, uint32_t flags) { @@ -1038,7 +1040,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, if ((last_pte + 8 * count) != pte) { if (count) { - radeon_asic_vm_set_page(rdev, last_pte, + radeon_asic_vm_set_page(rdev, ib, last_pte, last_dst, count, RADEON_GPU_PAGE_SIZE, flags); @@ -1056,7 +1058,8 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, } if (count) { - radeon_asic_vm_set_page(rdev, last_pte, last_dst, count, + radeon_asic_vm_set_page(rdev, ib, last_pte, + last_dst, count, RADEON_GPU_PAGE_SIZE, flags); } } @@ -1082,6 +1085,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, unsigned ridx = rdev->asic->vm.pt_ring_index; struct radeon_ring *ring = &rdev->ring[ridx]; struct radeon_semaphore *sem = NULL; + struct radeon_ib ib; struct radeon_bo_va *bo_va; unsigned nptes, npdes, ndw; uint64_t addr; @@ -1140,9 +1144,8 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, /* assume two extra pdes in case the mapping overlaps the borders */ npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2; - /* estimate number of dw needed */ - /* semaphore, fence and padding */ - ndw = 32; + /* padding, etc. */ + ndw = 64; if (RADEON_VM_BLOCK_SIZE > 11) /* reserve space for one header for every 2k dwords */ @@ -1161,8 +1164,25 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, /* reserve space for pde addresses */ ndw += npdes * 2; - r = radeon_ring_lock(rdev, ring, ndw); + /* update too big for an IB */ + if (ndw > 0xfffff) + return -ENOMEM; + + r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib.sa_bo, ndw * 4, 256, true); + if (r) { + return r; + } + ib.ring = ridx; + ib.fence = NULL; + ib.ptr = radeon_sa_bo_cpu_addr(ib.sa_bo); + ib.vm = NULL; + ib.gpu_addr = radeon_sa_bo_gpu_addr(ib.sa_bo); + ib.is_const_ib = false; + ib.length_dw = 0; + + r = radeon_ring_lock(rdev, ring, 64); if (r) { + radeon_sa_bo_free(rdev, &ib.sa_bo, NULL); return r; } @@ -1171,23 +1191,28 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev, radeon_fence_note_sync(vm->fence, ridx); } - r = radeon_vm_update_pdes(rdev, vm, bo_va->soffset, bo_va->eoffset); + r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset); if (r) { + radeon_sa_bo_free(rdev, &ib.sa_bo, NULL); radeon_ring_unlock_undo(rdev, ring); return r; } - radeon_vm_update_ptes(rdev, vm, bo_va->soffset, bo_va->eoffset, + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, addr, bo_va->flags); + radeon_ring_ib_execute(rdev, ib.ring, &ib); + radeon_fence_unref(&vm->fence); r = radeon_fence_emit(rdev, &vm->fence, ridx); if (r) { + radeon_sa_bo_free(rdev, &ib.sa_bo, NULL); radeon_ring_unlock_undo(rdev, ring); return r; } radeon_ring_unlock_commit(rdev, ring); radeon_semaphore_free(rdev, &sem, vm->fence); + radeon_sa_bo_free(rdev, &ib.sa_bo, vm->fence); radeon_fence_unref(&vm->last_flush); return 0; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index ae8b482..57fc2e0 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -2920,19 +2920,21 @@ void si_vm_fini(struct radeon_device *rdev) * si_vm_set_page - update the page tables using the CP * * @rdev: radeon_device pointer + * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: access flags * - * Update the page tables using the CP (cayman-si). + * Update the page tables using the CP (SI). */ -void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, +void si_vm_set_page(struct radeon_device *rdev, + struct radeon_ib *ib, + uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - struct radeon_ring *ring = &rdev->ring[rdev->asic->vm.pt_ring_index]; uint32_t r600_flags = cayman_vm_page_flags(rdev, flags); uint64_t value; unsigned ndw; @@ -2943,11 +2945,11 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, if (ndw > 0x3FFE) ndw = 0x3FFE; - radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, ndw)); - radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | - WRITE_DATA_DST_SEL(1))); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe)); + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw); + ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(1)); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe); for (; ndw > 2; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -2959,8 +2961,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { @@ -2972,9 +2974,9 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, ndw = 0xFFFFE; /* for non-physically contiguous pages (system) */ - radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw)); - radeon_ring_write(ring, pe); - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw); + ib->ptr[ib->length_dw++] = pe; + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; for (; ndw > 0; ndw -= 2, --count, pe += 8) { if (flags & RADEON_VM_PAGE_SYSTEM) { value = radeon_vm_map_gart(rdev, addr); @@ -2986,8 +2988,8 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, } addr += incr; value |= r600_flags; - radeon_ring_write(ring, value); - radeon_ring_write(ring, upper_32_bits(value)); + ib->ptr[ib->length_dw++] = value; + ib->ptr[ib->length_dw++] = upper_32_bits(value); } } } else { @@ -3001,20 +3003,22 @@ void si_vm_set_page(struct radeon_device *rdev, uint64_t pe, else value = 0; /* for physically contiguous pages (vram) */ - radeon_ring_write(ring, DMA_PTE_PDE_PACKET(ndw)); - radeon_ring_write(ring, pe); /* dst addr */ - radeon_ring_write(ring, upper_32_bits(pe) & 0xff); - radeon_ring_write(ring, r600_flags); /* mask */ - radeon_ring_write(ring, 0); - radeon_ring_write(ring, value); /* value */ - radeon_ring_write(ring, upper_32_bits(value)); - radeon_ring_write(ring, incr); /* increment size */ - radeon_ring_write(ring, 0); + ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); + ib->ptr[ib->length_dw++] = pe; /* dst addr */ + ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; + ib->ptr[ib->length_dw++] = r600_flags; /* mask */ + ib->ptr[ib->length_dw++] = 0; + ib->ptr[ib->length_dw++] = value; /* value */ + ib->ptr[ib->length_dw++] = upper_32_bits(value); + ib->ptr[ib->length_dw++] = incr; /* increment size */ + ib->ptr[ib->length_dw++] = 0; pe += ndw * 4; addr += (ndw / 2) * incr; count -= ndw / 2; } } + while (ib->length_dw & 0x7) + ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0); } } -- 1.7.7.5