From 70773bef4e091ff6d2a91e3dfb4f29013eb81f1f Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:09:49 +0200 Subject: drm/amdgpu: update userqueue BOs and PDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch updates the VM_IOCTL to allow userspace to synchronize the mapping/unmapping of a BO in the page table. The major changes are: - it adds a drm_timeline object as an input parameter to the VM IOCTL. - this object is used by the kernel to sync the update of the BO in the page table during the mapping of the object. - the kernel also synchronizes the tlb flush of the page table entry of this object during the unmapping (Added in this series: https://patchwork.freedesktop.org/series/131276/ and https://patchwork.freedesktop.org/patch/584182/) - the userspace can wait on this timeline, and then the BO is ready to be consumed by the GPU. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: - remove the eviction fence coupling V3: - added the drm timeline support instead of input/output fence (Christian) V4: - made timeline 64-bit (Christian) - bug fix (Arvind) V5: GLCTS bug fix (Arvind) V6: Rename syncobj_handle -> timeline_syncobj_out Rename point -> timeline_point_in (Marek) V7: Addressed review comments from Christian: - do not send last_update fence in case of vm_clear_freed, instead return the fence from gen_va_update_vm - move the functions to update bo_mapping to amdgpu_gem.c - do not use amdgpu_userq_update_vm anymore in userq_create() V8: Addressed review comments from Christian: - Split amdgpu_gem_update_bo_mapping function. - amdgpu_gem_va_update_vm should return stub for error. V9: Addressed review comments from Christian: - Rename the function amdgpu_gem_update_timeline_node. - amdgpu_gem_update_timeline_node should be void function. - when timeline_point is zero don't allocate a chain and call drm_syncobj_replace_fence() instead of drm_syncobj_add_point(). V11: rebase V12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. V13: Fix the review comment by renaming timeline syncobj (Marek) Cc: Alex Deucher Cc: Felix Kuehling Cc: Christian König Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 113 +++++++++++++++++++++++++++++--- 1 file changed, 104 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 69429df09477..542a1b70f2ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_display.h" @@ -44,6 +45,75 @@ #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +static int +amdgpu_gem_update_timeline_node(struct drm_file *filp, + uint32_t syncobj_handle, + uint64_t point, + struct drm_syncobj **syncobj, + struct dma_fence_chain **chain) +{ + if (!syncobj_handle) + return 0; + + /* Find the sync object */ + *syncobj = drm_syncobj_find(filp, syncobj_handle); + if (!*syncobj) + return -ENOENT; + + if (!point) + return 0; + + /* Allocate the chain node */ + *chain = dma_fence_chain_alloc(); + if (!*chain) { + drm_syncobj_put(*syncobj); + return -ENOMEM; + } + + return 0; +} + +static void +amdgpu_gem_update_bo_mapping(struct drm_file *filp, + struct amdgpu_bo_va *bo_va, + uint32_t operation, + uint64_t point, + struct dma_fence *fence, + struct drm_syncobj *syncobj, + struct dma_fence_chain *chain) +{ + struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct dma_fence *last_update; + + if (!syncobj) + return; + + /* Find the last update fence */ + switch (operation) { + case AMDGPU_VA_OP_MAP: + case AMDGPU_VA_OP_REPLACE: + if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)) + last_update = vm->last_update; + else + last_update = bo_va->last_pt_update; + break; + case AMDGPU_VA_OP_UNMAP: + case AMDGPU_VA_OP_CLEAR: + last_update = fence; + break; + default: + return; + } + + /* Add fence to timeline */ + if (!point) + drm_syncobj_replace_fence(syncobj, last_update); + else + drm_syncobj_add_point(syncobj, chain, last_update, point); +} + static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo = vmf->vma->vm_private_data; @@ -638,18 +708,23 @@ out: * * Update the bo_va directly after setting its address. Errors are not * vital here, so they are not reported back to userspace. + * + * Returns resulting fence if freed BO(s) got cleared from the PT. + * otherwise stub fence in case of error. */ -static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_va *bo_va, - uint32_t operation) +static struct dma_fence * +amdgpu_gem_va_update_vm(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_bo_va *bo_va, + uint32_t operation) { + struct dma_fence *fence = dma_fence_get_stub(); int r; if (!amdgpu_vm_ready(vm)) - return; + return fence; - r = amdgpu_vm_clear_freed(adev, vm, NULL); + r = amdgpu_vm_clear_freed(adev, vm, &fence); if (r) goto error; @@ -665,6 +740,8 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, error: if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); + + return fence; } /** @@ -713,6 +790,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo *abo; struct amdgpu_bo_va *bo_va; + struct drm_syncobj *timeline_syncobj = NULL; + struct dma_fence_chain *timeline_chain = NULL; + struct dma_fence *fence; struct drm_exec exec; uint64_t va_flags; uint64_t vm_size; @@ -827,9 +907,24 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) - amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, - args->operation); + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { + + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + + fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, + args->operation); + + if (!r) + amdgpu_gem_update_bo_mapping(filp, bo_va, + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + } error: drm_exec_fini(&exec); -- cgit v1.2.3 From 38c67ec9aa4be7bc0ae55e7bebe95f615fa09a1e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 25 Sep 2024 18:10:41 +0200 Subject: drm/amdgpu: Add input fence to sync bo map/unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds input fences to VM_IOCTL for buffer object. The kernel will map/unmap the BO only when the fence is signaled. The UAPI for the same has been approved here: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 V2: Bug fix (Arvind) V3: Bug fix (Arvind) V4: Rename UAPI objects as per UAPI review (Marek) V5: Addressed review comemnts from Christian - function should return error. - Add 'TODO' comment - The input fence should be independent of the operation. V6: Addressed review comemnts from Christian - Release the memory allocated by memdup_user(). V7: Addressed review comemnts from Christian - Drop the debug print and add "return r;" for the error handling. V11: Rebase v12: Fix 32-bit holes issue in sturct drm_amdgpu_gem_va. v13: Fix deadlock issue. v14: Fix merge conflict. v15: Fix review comment by renaming syncobj handles. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Arvind Yadav Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 46 +++++++++++++++++++++++++++++++++ include/uapi/drm/amdgpu_drm.h | 4 +++ 2 files changed, 50 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 542a1b70f2ee..bdf46cb4327f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -45,6 +45,45 @@ #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +static int +amdgpu_gem_add_input_fence(struct drm_file *filp, + uint64_t syncobj_handles_array, + uint32_t num_syncobj_handles) +{ + struct dma_fence *fence; + uint32_t *syncobj_handles; + int ret, i; + + if (!num_syncobj_handles) + return 0; + + syncobj_handles = memdup_user(u64_to_user_ptr(syncobj_handles_array), + sizeof(uint32_t) * num_syncobj_handles); + if (IS_ERR(syncobj_handles)) + return PTR_ERR(syncobj_handles); + + for (i = 0; i < num_syncobj_handles; i++) { + + if (!syncobj_handles[i]) { + ret = -EINVAL; + goto free_memdup; + } + + ret = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0, &fence); + if (ret) + goto free_memdup; + + dma_fence_wait(fence, false); + + /* TODO: optimize async handling */ + dma_fence_put(fence); + } + +free_memdup: + kfree(syncobj_handles); + return ret; +} + static int amdgpu_gem_update_timeline_node(struct drm_file *filp, uint32_t syncobj_handle, @@ -854,6 +893,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, abo = NULL; } + r = amdgpu_gem_add_input_fence(filp, + args->input_fence_syncobj_handles, + args->num_syncobj_handles); + if (r) + goto error_put_gobj; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { @@ -928,6 +973,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, error: drm_exec_fini(&exec); +error_put_gobj: drm_gem_object_put(gobj); return r; } diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 02cf03e811d5..0910a6f8c5f2 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -884,6 +884,10 @@ struct drm_amdgpu_gem_va { * at vm_timeline_point. */ __u32 vm_timeline_syncobj_out; + /** the number of syncobj handles in @input_fence_syncobj_handles */ + __u32 num_syncobj_handles; + /** Array of sync object handle to wait for given input fences */ + __u64 input_fence_syncobj_handles; }; #define AMDGPU_HW_IP_GFX 0 -- cgit v1.2.3 From 5f2f78314c5c3e4d87d638eea5a9592e89947e9e Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 28 Nov 2023 19:52:30 +0530 Subject: Revert "drm/amdgpu: don't allow userspace to create a doorbell BO" This reverts commit 6be2ad4f0073c541146caa66c5ae936c955a8224. This patch was to block userspace to use doorbell manager UAPI until usermode queue UAPI gets approved. UQ UAPI got approved in the following MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/392 Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Alex Deucher Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index bdf46cb4327f..06171eab6e92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -430,10 +430,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint32_t handle, initial_domain; int r; - /* reject DOORBELLs until userspace code to use it is available */ - if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL) - return -EINVAL; - /* reject invalid gem flags */ if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | -- cgit v1.2.3 From fb796c308767606bbd6c2e1bf4fa9446ec68ce51 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Tue, 27 Aug 2024 15:44:43 +0530 Subject: drm/amdgpu: add gfx eviction fence helpers This patch adds basic eviction fence framework for the gfx buffers. The idea is to: - One eviction fence is created per gfx process, at kms_open. - This fence is attached to all the gem buffers created by this process. - This fence is detached to all the gem buffers at postclose_kms. This framework will be further used for usermode queues. V2: Addressed review comments from Christian - keep fence_ctx and fence_seq directly in fpriv - evcition_fence should be dynamically allocated - do not save eviction fence instance in BO, there could be many such fences attached to one BO - use dma_resv_replace_fence() in detach V3: Addressed review comments from Christian - eviction fence create and destroy functions should be called only once from fpriv create/destroy - use dma_fence_put() in eviction_fence_destroy V4: Addressed review comments from Christian: - create a separate ev_fence_mgr structure - cleanup fence init part - do not add a domain for fence owner KGD V5: Addressed review comments from Christian: - drop the dma_fence_is_signaled check - use a local variable to access evf_mgr->ev_fence under the spin_lock() multiple places - remove the vm->is_compute_ctx check to attach gfx eviction fence, in gem_object_open V6: Addressed review comments from Christian: - drop the return value from eviction_fence_signal - reserve_fence should be the first thing inside the attach_eviction_fence function, also keep the resv_add_fence inside the lock - remove the unwanted ev_fence check inside detach function - fix wrong variable check in eviction_fence_init function - return the error value of eviction_fence_init to the caller, dont keep it void. - fail gem_object_open if attaching of eviction_fence fails - detach the eviction fence only when amdgpu_vm_is_bo_always_valid is not true. V7: Addressed review comments from Christian: - Do not add a uq_mgr ptr in ev_fence, rather add evf_mgr V8: Move eviction fence enabling into separate patch for CI Cc: Christian Koenig Cc: Alex Deucher Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 144 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h | 63 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 + 6 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 1cc307904089..0b0c8ec46516 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -66,7 +66,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ - amdgpu_cper.o amdgpu_userq_fence.o + amdgpu_cper.o amdgpu_userq_fence.o amdgpu_eviction_fence.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index be10fbe293e4..8c6c3c5451e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -114,6 +114,7 @@ #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" #include "amdgpu_userqueue.h" +#include "amdgpu_eviction_fence.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif @@ -490,7 +491,6 @@ struct amdgpu_flip_work { bool async; }; - /* * file private structure */ @@ -504,6 +504,10 @@ struct amdgpu_fpriv { struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; struct amdgpu_userq_mgr userq_mgr; + + /* Eviction fence infra */ + struct amdgpu_eviction_fence_mgr evf_mgr; + /** GPU partition selection */ uint32_t xcp_id; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c new file mode 100644 index 000000000000..056798e2b050 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include +#include "amdgpu.h" + +static const char * +amdgpu_eviction_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu"; +} + +static const char * +amdgpu_eviction_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_eviction_fence *ef; + + ef = container_of(f, struct amdgpu_eviction_fence, base); + return ef->timeline_name; +} + +static const struct dma_fence_ops amdgpu_eviction_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_eviction_fence_get_driver_name, + .get_timeline_name = amdgpu_eviction_fence_get_timeline_name, +}; + +void amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + spin_lock(&evf_mgr->ev_fence_lock); + dma_fence_signal(&evf_mgr->ev_fence->base); + spin_unlock(&evf_mgr->ev_fence_lock); +} + +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + ev_fence = kzalloc(sizeof(*ev_fence), GFP_KERNEL); + if (!ev_fence) + return NULL; + + ev_fence->evf_mgr = evf_mgr; + get_task_comm(ev_fence->timeline_name, current); + spin_lock_init(&ev_fence->lock); + dma_fence_init(&ev_fence->base, &amdgpu_eviction_fence_ops, + &ev_fence->lock, evf_mgr->ev_fence_ctx, + atomic_inc_return(&evf_mgr->ev_fence_seq)); + return ev_fence; +} + +void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + + if (!ev_fence) + return; + + /* Last unref of ev_fence */ + dma_fence_put(&evf_mgr->ev_fence->base); +} + +int amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *ef; + struct amdgpu_eviction_fence *ev_fence; + struct dma_resv *resv = bo->tbo.base.resv; + int ret; + + if (!resv) + return 0; + + ret = dma_resv_reserve_fences(resv, 1); + if (ret) { + DRM_DEBUG_DRIVER("Failed to resv fence space\n"); + return ret; + } + + spin_lock(&evf_mgr->ev_fence_lock); + ev_fence = evf_mgr->ev_fence; + if (ev_fence) { + ef = dma_fence_get(&ev_fence->base); + dma_resv_add_fence(resv, ef, DMA_RESV_USAGE_BOOKKEEP); + } + spin_unlock(&evf_mgr->ev_fence_lock); + return 0; +} + +void amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo) +{ + struct dma_fence *stub = dma_fence_get_stub(); + + dma_resv_replace_fences(bo->tbo.base.resv, evf_mgr->ev_fence_ctx, + stub, DMA_RESV_USAGE_BOOKKEEP); + dma_fence_put(stub); +} + +int amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr) +{ + struct amdgpu_eviction_fence *ev_fence; + + /* This needs to be done one time per open */ + atomic_set(&evf_mgr->ev_fence_seq, 0); + evf_mgr->ev_fence_ctx = dma_fence_context_alloc(1); + spin_lock_init(&evf_mgr->ev_fence_lock); + + ev_fence = amdgpu_eviction_fence_create(evf_mgr); + if (!ev_fence) { + DRM_ERROR("Failed to craete eviction fence\n"); + return -ENOMEM; + } + + spin_lock(&evf_mgr->ev_fence_lock); + evf_mgr->ev_fence = ev_fence; + spin_unlock(&evf_mgr->ev_fence_lock); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h new file mode 100644 index 000000000000..aba12a43f433 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef AMDGPU_EV_FENCE_H_ +#define AMDGPU_EV_FENCE_H_ + +struct amdgpu_eviction_fence { + struct dma_fence base; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; + struct amdgpu_eviction_fence_mgr *evf_mgr; +}; + +struct amdgpu_eviction_fence_mgr { + u64 ev_fence_ctx; + atomic_t ev_fence_seq; + spinlock_t ev_fence_lock; + struct amdgpu_eviction_fence *ev_fence; +}; + +/* Eviction fence helper functions */ +struct amdgpu_eviction_fence * +amdgpu_eviction_fence_create(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr); + +int +amdgpu_eviction_fence_attach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +void +amdgpu_eviction_fence_detach(struct amdgpu_eviction_fence_mgr *evf_mgr, + struct amdgpu_bo *bo); + +int +amdgpu_eviction_fence_init(struct amdgpu_eviction_fence_mgr *evf_mgr); + +void +amdgpu_eviction_fence_signal(struct amdgpu_eviction_fence_mgr *evf_mgr); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 06171eab6e92..682b76cad359 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -293,6 +293,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, bo_va = amdgpu_vm_bo_add(adev, vm, abo); else ++bo_va->ref_count; + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 2e07776dd408..700442584f97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -1413,6 +1413,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); + r = amdgpu_eviction_fence_init(&fpriv->evf_mgr); + if (r) + goto error_vm; + amdgpu_ctx_mgr_init(&fpriv->ctx_mgr, adev); r = amdgpu_userq_mgr_init(&fpriv->userq_mgr, adev); @@ -1486,6 +1490,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_bo_unreserve(pd); } + amdgpu_eviction_fence_destroy(&fpriv->evf_mgr); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); amdgpu_userq_mgr_fini(&fpriv->userq_mgr); -- cgit v1.2.3 From 31f7efcdca4d1caaa3a0babc33377e27e6f9b593 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Wed, 20 Nov 2024 18:45:33 +0100 Subject: drm/amdgpu: enable eviction fence This patch enables attachment and detachment of eviction fences. This is just a fork of eviction fence enabling code from the first patch of the series so that the CI testing can happen on fully fledged code. Cc: Alex Deucher Cc: Christian Koenig Reviewed-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 10 ++++++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c index 8358dc6b68ad..167951aee502 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eviction_fence.c @@ -177,6 +177,8 @@ void amdgpu_eviction_fence_destroy(struct amdgpu_eviction_fence_mgr *evf_mgr) if (!ev_fence) return; + dma_fence_wait(&ev_fence->base, false); + /* Last unref of ev_fence */ dma_fence_put(&evf_mgr->ev_fence->base); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 682b76cad359..6850a6954a71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -294,6 +294,13 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, else ++bo_va->ref_count; + /* attach gfx eviction fence */ + r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); + if (r) { + DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); + return r; + } + amdgpu_bo_unreserve(abo); /* Validate and add eviction fence to DMABuf imports with dynamic @@ -344,6 +351,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); -- cgit v1.2.3 From 8639d2f5ca27ca533e782cc8f8de62ea002f1833 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 20 Dec 2024 13:44:23 +0100 Subject: drm/amdgpu: fix call to amdgpu_eviction_fence_detach MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That needs to be done after grabbing the lock, not before. Signed-off-by: Christian König Acked-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 6850a6954a71..b9b80b0b60ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -351,9 +351,6 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); @@ -367,6 +364,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; } + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + amdgpu_eviction_fence_detach(&fpriv->evf_mgr, bo); + bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) goto out_unlock; -- cgit v1.2.3 From ad6c120f6888033b1eb198a7a15ed4c6355edf6d Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 18 Feb 2025 18:56:25 +0530 Subject: drm/amdgpu: fix the memleak caused by fence not released MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Encountering a taint issue during the unloading of gpu_sched due to the fence not being released/put. In this context, amdgpu_vm_clear_freed is responsible for creating a job to update the page table (PT). It allocates kmem_cache for drm_sched_fence and returns the finished fence associated with job->base.s_fence. In case of Usermode queue this finished fence is added to the timeline sync object through amdgpu_gem_update_bo_mapping, which is utilized by user space to ensure the completion of the PT update. [ 508.900587] ============================================================================= [ 508.900605] BUG drm_sched_fence (Tainted: G N): Objects remaining in drm_sched_fence on __kmem_cache_shutdown() [ 508.900617] ----------------------------------------------------------------------------- [ 508.900627] Slab 0xffffe0cc04548780 objects=32 used=2 fp=0xffff8ea81521f000 flags=0x17ffffc0000240(workingset|head|node=0|zone=2|lastcpupid=0x1fffff) [ 508.900645] CPU: 3 UID: 0 PID: 2337 Comm: rmmod Tainted: G N 6.12.0+ #1 [ 508.900651] Tainted: [N]=TEST [ 508.900653] Hardware name: Gigabyte Technology Co., Ltd. X570 AORUS ELITE/X570 AORUS ELITE, BIOS F34 06/10/2021 [ 508.900656] Call Trace: [ 508.900659] [ 508.900665] dump_stack_lvl+0x70/0x90 [ 508.900674] dump_stack+0x14/0x20 [ 508.900678] slab_err+0xcb/0x110 [ 508.900687] ? srso_return_thunk+0x5/0x5f [ 508.900692] ? try_to_grab_pending+0xd3/0x1d0 [ 508.900697] ? srso_return_thunk+0x5/0x5f [ 508.900701] ? mutex_lock+0x17/0x50 [ 508.900708] __kmem_cache_shutdown+0x144/0x2d0 [ 508.900713] ? flush_rcu_work+0x50/0x60 [ 508.900719] kmem_cache_destroy+0x46/0x1f0 [ 508.900728] drm_sched_fence_slab_fini+0x19/0x970 [gpu_sched] [ 508.900736] __do_sys_delete_module.constprop.0+0x184/0x320 [ 508.900744] ? srso_return_thunk+0x5/0x5f [ 508.900747] ? debug_smp_processor_id+0x1b/0x30 [ 508.900754] __x64_sys_delete_module+0x16/0x20 [ 508.900758] x64_sys_call+0xdf/0x20d0 [ 508.900763] do_syscall_64+0x51/0x120 [ 508.900769] entry_SYSCALL_64_after_hwframe+0x76/0x7e v2: call dma_fence_put in amdgpu_gem_va_update_vm v3: Addressed review comments from Christian. - calling amdgpu_gem_update_timeline_node before switch. - puting a dma_fence in case of error or !timeline_syncobj. v4: Addressed review comments from Christian. Cc: Alex Deucher Cc: Christian König Cc: Shashank Sharma Cc: Sunil Khatri Reviewed-by: Christian König Signed-off-by: Le Ma Signed-off-by: Arvind Yadav Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b9b80b0b60ca..f03fc3cf4d50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -934,6 +934,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, bo_va = NULL; } + r = amdgpu_gem_update_timeline_node(filp, + args->vm_timeline_syncobj_out, + args->vm_timeline_point, + &timeline_syncobj, + &timeline_chain); + if (r) + goto error; + switch (args->operation) { case AMDGPU_VA_OP_MAP: va_flags = amdgpu_gem_va_map_flags(adev, args->flags); @@ -960,22 +968,18 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, break; } if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) { - - r = amdgpu_gem_update_timeline_node(filp, - args->vm_timeline_syncobj_out, - args->vm_timeline_point, - &timeline_syncobj, - &timeline_chain); - fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); - if (!r) + if (timeline_syncobj) amdgpu_gem_update_bo_mapping(filp, bo_va, - args->operation, - args->vm_timeline_point, - fence, timeline_syncobj, - timeline_chain); + args->operation, + args->vm_timeline_point, + fence, timeline_syncobj, + timeline_chain); + else + dma_fence_put(fence); + } error: -- cgit v1.2.3 From def41146b96a9e292a17cddb1ac97007f41c44bc Mon Sep 17 00:00:00 2001 From: Prike Liang Date: Tue, 6 May 2025 16:32:23 +0800 Subject: drm/amdgpu: unreserve the gem BO before returning from attach error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It requires unlocking the reserved gem BO before returning from attaching the eviction fence error. Signed-off-by: Prike Liang Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index f03fc3cf4d50..2c68118fe9fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -298,6 +298,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, r = amdgpu_eviction_fence_attach(&fpriv->evf_mgr, abo); if (r) { DRM_DEBUG_DRIVER("Failed to attach eviction fence to BO\n"); + amdgpu_bo_unreserve(abo); return r; } -- cgit v1.2.3