diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_ct.c')
| -rw-r--r-- | drivers/gpu/drm/xe/xe_guc_ct.c | 330 |
1 files changed, 308 insertions, 22 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 2447de0ebedf..37509f619503 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -25,6 +25,7 @@ #include "xe_gt_printk.h" #include "xe_gt_sriov_pf_control.h" #include "xe_gt_sriov_pf_monitor.h" +#include "xe_gt_sriov_printk.h" #include "xe_gt_tlb_invalidation.h" #include "xe_guc.h" #include "xe_guc_log.h" @@ -84,6 +85,8 @@ struct g2h_fence { bool done; }; +#define make_u64(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo))) + static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer) { g2h_fence->response_buffer = response_buffer; @@ -514,6 +517,9 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct) */ void xe_guc_ct_stop(struct xe_guc_ct *ct) { + if (!xe_guc_ct_initialized(ct)) + return; + xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED); stop_g2h_handler(ct); } @@ -625,6 +631,47 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) spin_unlock_irq(&ct->fast_lock); } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ + unsigned int slot = fence % ARRAY_SIZE(ct->fast_req); +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + unsigned long entries[SZ_32]; + unsigned int n; + + n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); + + /* May be called under spinlock, so avoid sleeping */ + ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT); +#endif + ct->fast_req[slot].fence = fence; + ct->fast_req[slot].action = action; +} +#else +static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action) +{ +} +#endif + +/* + * The CT protocol accepts a 16 bits fence. This field is fully owned by the + * driver, the GuC will just copy it to the reply message. Since we need to + * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, + * we use one bit of the seqno as an indicator for that and a rolling counter + * for the remaining 15 bits. + */ +#define CT_SEQNO_MASK GENMASK(14, 0) +#define CT_SEQNO_UNTRACKED BIT(15) +static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) +{ + u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; + + if (!is_g2h_fence) + seqno |= CT_SEQNO_UNTRACKED; + + return seqno; +} + #define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, @@ -701,6 +748,9 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { + fast_req_track(ct, ct_fence_value, + FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0])); + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | @@ -733,25 +783,6 @@ corrupted: return -EPIPE; } -/* - * The CT protocol accepts a 16 bits fence. This field is fully owned by the - * driver, the GuC will just copy it to the reply message. Since we need to - * be able to distinguish between replies to REQUEST and FAST_REQUEST messages, - * we use one bit of the seqno as an indicator for that and a rolling counter - * for the remaining 15 bits. - */ -#define CT_SEQNO_MASK GENMASK(14, 0) -#define CT_SEQNO_UNTRACKED BIT(15) -static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence) -{ - u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK; - - if (!is_g2h_fence) - seqno |= CT_SEQNO_UNTRACKED; - - return seqno; -} - static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len, u32 g2h_len, u32 num_g2h, struct g2h_fence *g2h_fence) @@ -760,7 +791,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u16 seqno; int ret; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); xe_gt_assert(gt, !g2h_len || !g2h_fence); xe_gt_assert(gt, !num_g2h || !g2h_fence); xe_gt_assert(gt, !g2h_len || num_g2h); @@ -1143,6 +1174,55 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) return 0; } +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG) +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ + u16 fence_min = U16_MAX, fence_max = 0; + struct xe_gt *gt = ct_to_gt(ct); + bool found = false; + unsigned int n; +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + char *buf; +#endif + + lockdep_assert_held(&ct->lock); + + for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) { + if (ct->fast_req[n].fence < fence_min) + fence_min = ct->fast_req[n].fence; + if (ct->fast_req[n].fence > fence_max) + fence_max = ct->fast_req[n].fence; + + if (ct->fast_req[n].fence != fence) + continue; + found = true; + +#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC) + buf = kmalloc(SZ_4K, GFP_NOWAIT); + if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0)) + xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s", + fence, ct->fast_req[n].action, buf); + else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n", + fence, ct->fast_req[n].action); + kfree(buf); +#else + xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n", + fence, ct->fast_req[n].action); +#endif + break; + } + + if (!found) + xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n", + fence, fence_min, fence_max, ct->fence_seqno); +} +#else +static void fast_req_report(struct xe_guc_ct *ct, u16 fence) +{ +} +#endif + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1171,6 +1251,9 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) else xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n", type, fence); + + fast_req_report(ct, fence); + CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE); return -EPROTO; @@ -1344,7 +1427,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path) u32 action; u32 *hxg; - xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED); + xe_gt_assert(gt, xe_guc_ct_initialized(ct)); lockdep_assert_held(&ct->fast_lock); if (ct->state == XE_GUC_CT_STATE_DISABLED) @@ -1624,6 +1707,186 @@ static void g2h_worker_func(struct work_struct *w) receive_g2h(ct); } +static void xe_fixup_u64_in_cmds(struct xe_device *xe, struct iosys_map *cmds, + u32 size, u32 idx, s64 shift) +{ + u32 hi, lo; + u64 offset; + + lo = xe_map_rd_ring_u32(xe, cmds, idx, size); + hi = xe_map_rd_ring_u32(xe, cmds, idx + 1, size); + offset = make_u64(hi, lo); + offset += shift; + lo = lower_32_bits(offset); + hi = upper_32_bits(offset); + xe_map_wr_ring_u32(xe, cmds, idx, size, lo); + xe_map_wr_ring_u32(xe, cmds, idx + 1, size, hi); +} + +/* + * Shift any GGTT addresses within a single message left within CTB from + * before post-migration recovery. + * @ct: pointer to CT struct of the target GuC + * @cmds: iomap buffer containing CT messages + * @head: start of the target message within the buffer + * @len: length of the target message + * @size: size of the commands buffer + * @shift: the address shift to be added to each GGTT reference + * Return: true if the message was fixed or needed no fixups, false on failure + */ +static bool ct_fixup_ggtt_in_message(struct xe_guc_ct *ct, + struct iosys_map *cmds, u32 head, + u32 len, u32 size, s64 shift) +{ + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = ct_to_xe(ct); + u32 msg[GUC_HXG_MSG_MIN_LEN]; + u32 action, i, n; + + xe_gt_assert(gt, len >= GUC_HXG_MSG_MIN_LEN); + + msg[0] = xe_map_rd_ring_u32(xe, cmds, head, size); + action = FIELD_GET(GUC_HXG_REQUEST_MSG_0_ACTION, msg[0]); + + xe_gt_sriov_dbg_verbose(gt, "fixing H2G %#x\n", action); + + switch (action) { + case XE_GUC_ACTION_REGISTER_CONTEXT: + if (len != XE_GUC_REGISTER_CONTEXT_MSG_LEN) + goto err_len; + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR, shift); + break; + case XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC: + if (len < XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN) + goto err_len; + n = xe_map_rd_ring_u32(xe, cmds, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS, size); + if (len != XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN + 2 * n) + goto err_len; + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER, + shift); + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER, + shift); + for (i = 0; i < n; i++) + xe_fixup_u64_in_cmds(xe, cmds, size, head + + XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR + + 2 * i, shift); + break; + default: + break; + } + return true; + +err_len: + xe_gt_err(gt, "Skipped G2G %#x message fixups, unexpected length (%u)\n", action, len); + return false; +} + +/* + * Apply fixups to the next outgoing CT message within given CTB + * @ct: the &xe_guc_ct struct instance representing the target GuC + * @h2g: the &guc_ctb struct instance of the target buffer + * @shift: shift to be added to all GGTT addresses within the CTB + * @mhead: pointer to an integer storing message start position; the + * position is changed to next message before this function return + * @avail: size of the area available for parsing, that is length + * of all remaining messages stored within the CTB + * Return: size of the area available for parsing after one message + * has been parsed, that is length remaining from the updated mhead + */ +static int ct_fixup_ggtt_in_buffer(struct xe_guc_ct *ct, struct guc_ctb *h2g, + s64 shift, u32 *mhead, s32 avail) +{ + struct xe_gt *gt = ct_to_gt(ct); + struct xe_device *xe = ct_to_xe(ct); + u32 msg[GUC_HXG_MSG_MIN_LEN]; + u32 size = h2g->info.size; + u32 head = *mhead; + u32 len; + + xe_gt_assert(gt, avail >= (s32)GUC_CTB_MSG_MIN_LEN); + + /* Read header */ + msg[0] = xe_map_rd_ring_u32(xe, &h2g->cmds, head, size); + len = FIELD_GET(GUC_CTB_MSG_0_NUM_DWORDS, msg[0]) + GUC_CTB_MSG_MIN_LEN; + + if (unlikely(len > (u32)avail)) { + xe_gt_err(gt, "H2G channel broken on read, avail=%d, len=%d, fixups skipped\n", + avail, len); + return 0; + } + + head = (head + GUC_CTB_MSG_MIN_LEN) % size; + if (!ct_fixup_ggtt_in_message(ct, &h2g->cmds, head, msg_len_to_hxg_len(len), size, shift)) + return 0; + *mhead = (head + msg_len_to_hxg_len(len)) % size; + + return avail - len; +} + +/** + * xe_guc_ct_fixup_messages_with_ggtt - Fixup any pending H2G CTB messages + * @ct: pointer to CT struct of the target GuC + * @ggtt_shift: shift to be added to all GGTT addresses within the CTB + * + * Messages in GuC to Host CTB are owned by GuC and any fixups in them + * are made by GuC. But content of the Host to GuC CTB is owned by the + * KMD, so fixups to GGTT references in any pending messages need to be + * applied here. + * This function updates GGTT offsets in payloads of pending H2G CTB + * messages (messages which were not consumed by GuC before the VF got + * paused). + */ +void xe_guc_ct_fixup_messages_with_ggtt(struct xe_guc_ct *ct, s64 ggtt_shift) +{ + struct guc_ctb *h2g = &ct->ctbs.h2g; + struct xe_guc *guc = ct_to_guc(ct); + struct xe_gt *gt = guc_to_gt(guc); + u32 head, tail, size; + s32 avail; + + if (unlikely(h2g->info.broken)) + return; + + h2g->info.head = desc_read(ct_to_xe(ct), h2g, head); + head = h2g->info.head; + tail = READ_ONCE(h2g->info.tail); + size = h2g->info.size; + + if (unlikely(head > size)) + goto corrupted; + + if (unlikely(tail >= size)) + goto corrupted; + + avail = tail - head; + + /* beware of buffer wrap case */ + if (unlikely(avail < 0)) + avail += size; + xe_gt_dbg(gt, "available %d (%u:%u:%u)\n", avail, head, tail, size); + xe_gt_assert(gt, avail >= 0); + + while (avail > 0) + avail = ct_fixup_ggtt_in_buffer(ct, h2g, ggtt_shift, &head, avail); + + return; + +corrupted: + xe_gt_err(gt, "Corrupted H2G descriptor head=%u tail=%u size=%u, fixups not applied\n", + head, tail, size); + h2g->info.broken = true; +} + static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bool atomic, bool want_ctb) { @@ -1770,6 +2033,24 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb) } #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +/* + * This is a helper function which assists the driver in identifying if a fault + * injection test is currently active, allowing it to reduce unnecessary debug + * output. Typically, the function returns zero, but the fault injection + * framework can alter this to return an error. Since faults are injected + * through this function, it's important to ensure the compiler doesn't optimize + * it into an inline function. To avoid such optimization, the 'noinline' + * attribute is applied. Compiler optimizes the static function defined in the + * header file as an inline function. + */ +noinline int xe_is_injection_active(void) { return 0; } +ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO); +#else +int xe_is_injection_active(void) { return 0; } +#endif + static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code) { struct xe_guc_log_snapshot *snapshot_log; @@ -1780,6 +2061,12 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso if (ctb) ctb->info.broken = true; + /* + * Huge dump is getting generated when injecting error for guc CT/MMIO + * functions. So, let us suppress the dump when fault is injected. + */ + if (xe_is_injection_active()) + return; /* Ignore further errors after the first dump until a reset */ if (ct->dead.reported) @@ -1830,7 +2117,6 @@ static void ct_dead_print(struct xe_dead_ct *dead) return; } - /* Can't generate a genuine core dump at this point, so just do the good bits */ drm_puts(&lp, "**** Xe Device Coredump ****\n"); drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason); |
