From b252903afcb1139dd87605b79f41e0dc325f9ece Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 17 Aug 2017 16:37:49 -0400 Subject: drm/amdgpu: Fix huge page updates with CPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correctly detect system memory mappings when using CPU and don't use huge pages for them. Avoid incorrectly translating a physical page table GPU address when splitting a huge page while mapping system memory. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6b1343e5541d..ba475af99332 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1277,7 +1277,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, /* In the case of a mixed PT the PDE must point to it*/ if (p->adev->asic_type < CHIP_VEGA10 || nptes != AMDGPU_VM_PTE_COUNT(p->adev) || - p->func == amdgpu_vm_do_copy_ptes || + p->src || !(flags & AMDGPU_PTE_VALID)) { dst = amdgpu_bo_gpu_offset(entry->bo); @@ -1294,9 +1294,23 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, entry->addr = (dst | flags); if (use_cpu_update) { + /* In case a huge page is replaced with a system + * memory mapping, p->pages_addr != NULL and + * amdgpu_vm_cpu_set_ptes would try to translate dst + * through amdgpu_vm_map_gart. But dst is already a + * GPU address (of the page table). Disable + * amdgpu_vm_map_gart temporarily. + */ + dma_addr_t *tmp; + + tmp = p->pages_addr; + p->pages_addr = NULL; + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); pde = pd_addr + (entry - parent->entries) * 8; amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); + + p->pages_addr = tmp; } else { if (parent->bo->shadow) { pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); -- cgit v1.2.3 From 2e8f9fbe985e930055eb55323b8491cc668b178f Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 18 Aug 2017 15:50:17 +0200 Subject: drm/amdgpu: fix and cleanup shadow handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the shadow flag on the shadow and not the parent, always bind shadow BOs during allocation instead of manually, use the reservation_object wrappers to grab the lock. This fixes a couple of issues with binding the shadow BOs as well as correctly evicting them when memory becomes tight. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ---- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 46 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 ------ 3 files changed, 23 insertions(+), 37 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f7ffb029f6d5..e630d918fefc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2622,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev, goto err; } - r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem); - if (r) { - DRM_ERROR("%p bind failed\n", bo->shadow); - goto err; - } - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, NULL, fence, true); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e7e899190bef..9e495da0bb03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -91,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev, if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; - places[c].lpfn = 0; + if (flags & AMDGPU_GEM_CREATE_SHADOW) + places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT; + else + places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | @@ -446,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, if (bo->shadow) return 0; - bo->flags |= AMDGPU_GEM_CREATE_SHADOW; - memset(&placements, 0, - (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); - - amdgpu_ttm_placement_init(adev, &placement, - placements, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC); + memset(&placements, 0, sizeof(placements)); + amdgpu_ttm_placement_init(adev, &placement, placements, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW); r = amdgpu_bo_create_restricted(adev, size, byte_align, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, + AMDGPU_GEM_CREATE_CPU_GTT_USWC | + AMDGPU_GEM_CREATE_SHADOW, NULL, &placement, bo->tbo.resv, 0, @@ -484,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev, { struct ttm_placement placement = {0}; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; + uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW; int r; - memset(&placements, 0, - (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place)); + memset(&placements, 0, sizeof(placements)); + amdgpu_ttm_placement_init(adev, &placement, placements, + domain, parent_flags); - amdgpu_ttm_placement_init(adev, &placement, - placements, domain, flags); - - r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, - domain, flags, sg, &placement, - resv, init_value, bo_ptr); + r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain, + parent_flags, sg, &placement, resv, + init_value, bo_ptr); if (r) return r; - if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) { - if (!resv) { - r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL); - WARN_ON(r != 0); - } + if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) { + if (!resv) + WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, + NULL)); r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr)); if (!resv) - ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock); + reservation_object_unlock((*bo_ptr)->tbo.resv); if (r) amdgpu_bo_unref(bo_ptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ba475af99332..96ec4e2b56e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -165,14 +165,6 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, unsigned i; int r; - if (parent->bo->shadow) { - struct amdgpu_bo *shadow = parent->bo->shadow; - - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); - if (r) - return r; - } - if (use_cpu_for_update) { r = amdgpu_bo_kmap(parent->bo, NULL); if (r) -- cgit v1.2.3 From 457e0fee04b0c6c57a28a10e68b16c5f1386c80c Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 22 Aug 2017 12:50:46 +0200 Subject: drm/amdgpu: remove the GART copy hack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This isn't used since we don't map evicted BOs to GART any more. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Roger He Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 96ec4e2b56e9..3bd430e180b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1616,7 +1616,6 @@ error_free: * * @adev: amdgpu_device pointer * @exclusive: fence we need to sync to - * @gtt_flags: flags as they are used for GTT * @pages_addr: DMA addresses to use for mapping * @vm: requested vm * @mapping: mapped range and flags to use for the update @@ -1630,7 +1629,6 @@ error_free: */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, - uint64_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, @@ -1685,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } if (pages_addr) { - if (flags == gtt_flags) - src = adev->gart.table_addr + - (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8; - else - max_entries = min(max_entries, 16ull * 1024ull); + max_entries = min(max_entries, 16ull * 1024ull); addr = 0; } else if (flags & AMDGPU_PTE_VALID) { addr += adev->vm_manager.vram_base_offset; @@ -1734,10 +1728,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; - uint64_t gtt_flags, flags; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; struct dma_fence *exclusive; + uint64_t flags; int r; if (clear || !bo_va->base.bo) { @@ -1757,15 +1751,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, exclusive = reservation_object_get_excl(bo->tbo.resv); } - if (bo) { + if (bo) flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); - gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) && - adev == amdgpu_ttm_adev(bo->tbo.bdev)) ? - flags : 0; - } else { + else flags = 0x0; - gtt_flags = ~0x0; - } spin_lock(&vm->status_lock); if (!list_empty(&bo_va->base.vm_status)) @@ -1773,8 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, exclusive, - gtt_flags, pages_addr, vm, + r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, mapping, flags, nodes, &bo_va->last_pt_update); if (r) -- cgit v1.2.3