266 files changed, 5524 insertions, 3216 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 71913a18d142..a38e0fb4a6fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -38,6 +38,7 @@
 #include "amdgpu_gem.h"
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
+#include <linux/dma-fence-array.h>
 
 /**
  * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
@@ -187,6 +188,48 @@ error:
 	return ERR_PTR(ret);
 }
 
+static int
+__reservation_object_make_exclusive(struct reservation_object *obj)
+{
+	struct dma_fence **fences;
+	unsigned int count;
+	int r;
+
+	if (!reservation_object_get_list(obj)) /* no shared fences to convert */
+		return 0;
+
+	r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
+	if (r)
+		return r;
+
+	if (count == 0) {
+		/* Now that was unexpected. */
+	} else if (count == 1) {
+		reservation_object_add_excl_fence(obj, fences[0]);
+		dma_fence_put(fences[0]);
+		kfree(fences);
+	} else {
+		struct dma_fence_array *array;
+
+		array = dma_fence_array_create(count, fences,
+					       dma_fence_context_alloc(1), 0,
+					       false);
+		if (!array)
+			goto err_fences_put;
+
+		reservation_object_add_excl_fence(obj, &array->base);
+		dma_fence_put(&array->base);
+	}
+
+	return 0;
+
+err_fences_put:
+	while (count--)
+		dma_fence_put(fences[count]);
+	kfree(fences);
+	return -ENOMEM;
+}
+
 /**
  * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
  * @dma_buf: Shared DMA buffer
@@ -218,16 +261,16 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
 
 	if (attach->dev->driver != adev->dev->driver) {
 		/*
-		 * Wait for all shared fences to complete before we switch to future
-		 * use of exclusive fence on this prime shared bo.
+		 * We only create shared fences for internal use, but importers
+		 * of the dmabuf rely on exclusive fences for implicitly
+		 * tracking write hazards. As any of the current fences may
+		 * correspond to a write, we need to convert all existing
+		 * fences on the reservation object into a single exclusive
+		 * fence.
 		 */
-		r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
-							true, false,
-							MAX_SCHEDULE_TIMEOUT);
-		if (unlikely(r < 0)) {
-			DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
+		r = __reservation_object_make_exclusive(bo->tbo.resv);
+		if (r)
 			goto error_unreserve;
-		}
 	}
 
 	/* pin buffer into GTT */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index d87f165e3a23..3091488cd8cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -84,8 +84,10 @@ static int psp_sw_fini(void *handle)
 	adev->psp.sos_fw = NULL;
 	release_firmware(adev->psp.asd_fw);
 	adev->psp.asd_fw = NULL;
-	release_firmware(adev->psp.ta_fw);
-	adev->psp.ta_fw = NULL;
+	if (adev->psp.ta_fw) {
+		release_firmware(adev->psp.ta_fw);
+		adev->psp.ta_fw = NULL;
+	}
 	return 0;
 }
 
@@ -440,6 +442,9 @@ static int psp_xgmi_initialize(struct psp_context *psp)
 	struct ta_xgmi_shared_memory *xgmi_cmd;
 	int ret;
 
+	if (!psp->adev->psp.ta_fw)
+		return -ENOENT;
+
 	if (!psp->xgmi_context.initialized) {
 		ret = psp_xgmi_init_shared_buf(psp);
 		if (ret)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b06ffd29ccc6..942b5ebc6dc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3372,14 +3372,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
 			 struct amdgpu_task_info *task_info)
 {
 	struct amdgpu_vm *vm;
+	unsigned long flags;
 
-	spin_lock(&adev->vm_manager.pasid_lock);
+	spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
 
 	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
 	if (vm)
 		*task_info = vm->task_info;
 
-	spin_unlock(&adev->vm_manager.pasid_lock);
+	spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 727370ccd854..c69d51598cfe 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -90,7 +90,20 @@ static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev,
 static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev,
 							bool enable)
 {
+	u32 tmp = 0;
 
+	if (enable) {
+		tmp = REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) |
+		      REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) |
+		      REG_SET_FIELD(tmp, DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0);
+
+		WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_LOW,
+			     lower_32_bits(adev->doorbell.base));
+		WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_BASE_HIGH,
+			     upper_32_bits(adev->doorbell.base));
+	}
+
+	WREG32_SOC15(NBIO, 0, mmDOORBELL_SELFRING_GPA_APER_CNTL, tmp);
 }
 
 static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index f71384be1f97..860b70d80d3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -98,18 +98,22 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
 	err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
-	if (err)
-		goto out2;
-
-	err = amdgpu_ucode_validate(adev->psp.ta_fw);
-	if (err)
-		goto out2;
-
-	ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
-	adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
-	adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
-	adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
-		le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+	if (err) {
+		release_firmware(adev->psp.ta_fw);
+		adev->psp.ta_fw = NULL;
+		dev_info(adev->dev,
+			 "psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+	} else {
+		err = amdgpu_ucode_validate(adev->psp.ta_fw);
+		if (err)
+			goto out2;
+
+		ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
+		adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
+		adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes);
+		adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr +
+			le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
+	}
 
 	return 0;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 62d272b4be19..99ebcf29dcb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -858,11 +858,13 @@ static int soc15_common_early_init(void *handle)
 	case CHIP_RAVEN:
 		adev->asic_funcs = &soc15_asic_funcs;
 		if (adev->rev_id >= 0x8)
-			adev->external_rev_id = adev->rev_id + 0x81;
+			adev->external_rev_id = adev->rev_id + 0x79;
 		else if (adev->pdev->device == 0x15d8)
 			adev->external_rev_id = adev->rev_id + 0x41;
+		else if (adev->rev_id == 1)
+			adev->external_rev_id = adev->rev_id + 0x20;
 		else
-			adev->external_rev_id = 0x1;
+			adev->external_rev_id = adev->rev_id + 0x01;
 
 		if (adev->rev_id >= 0x8) {
 			adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 5d85ff341385..2e7c44955f43 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -863,7 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
 	return 0;
 }
 
-#if CONFIG_X86_64
+#ifdef CONFIG_X86_64
 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
 				uint32_t *num_entries,
 				struct crat_subtype_iolink *sub_type_hdr)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 7579592052cc..c87fcda61b66 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -699,22 +699,36 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_connector *connector;
+	struct drm_dp_mst_topology_mgr *mgr;
+	int ret;
+	bool need_hotplug = false;
 
 	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		   aconnector = to_amdgpu_dm_connector(connector);
-		   if (aconnector->dc_link->type == dc_connection_mst_branch &&
-				   !aconnector->mst_port) {
+	list_for_each_entry(connector, &dev->mode_config.connector_list,
+			    head) {
+		aconnector = to_amdgpu_dm_connector(connector);
+		if (aconnector->dc_link->type != dc_connection_mst_branch ||
+		    aconnector->mst_port)
+			continue;
 
-			   if (suspend)
-				   drm_dp_mst_topology_mgr_suspend(&aconnector->mst_mgr);
-			   else
-				   drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr);
-		   }
+		mgr = &aconnector->mst_mgr;
+
+		if (suspend) {
+			drm_dp_mst_topology_mgr_suspend(mgr);
+		} else {
+			ret = drm_dp_mst_topology_mgr_resume(mgr);
+			if (ret < 0) {
+				drm_dp_mst_topology_mgr_set_mst(mgr, false);
+				need_hotplug = true;
+			}
+		}
 	}
 
 	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+	if (need_hotplug)
+		drm_kms_helper_hotplug_event(dev);
 }
 
 /**
@@ -898,7 +912,6 @@ static int dm_resume(void *handle)
 	struct drm_plane_state *new_plane_state;
 	struct dm_plane_state *dm_new_plane_state;
 	enum dc_connection_type new_connection_type = dc_connection_none;
-	int ret;
 	int i;
 
 	/* power on hardware */
@@ -971,13 +984,13 @@ static int dm_resume(void *handle)
 		}
 	}
 
-	ret = drm_atomic_helper_resume(ddev, dm->cached_state);
+	drm_atomic_helper_resume(ddev, dm->cached_state);
 
 	dm->cached_state = NULL;
 
 	amdgpu_dm_irq_resume_late(adev);
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -4147,7 +4160,8 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
 	}
 
 	if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
-	    connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
+	    connector_type == DRM_MODE_CONNECTOR_DisplayPort ||
+	    connector_type == DRM_MODE_CONNECTOR_eDP) {
 		drm_connector_attach_vrr_capable_property(
 			&aconnector->base);
 	}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index cca3e16cda4f..4a55cde027cf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -671,6 +671,25 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us
 	return bytes_from_user;
 }
 
+/*
+ * Returns the min and max vrr vfreq through the connector's debugfs file.
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range
+ */
+static int vrr_range_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+
+	if (connector->status != connector_status_connected)
+		return -ENODEV;
+
+	seq_printf(m, "Min: %u\n", (unsigned int)aconnector->min_vfreq);
+	seq_printf(m, "Max: %u\n", (unsigned int)aconnector->max_vfreq);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(vrr_range);
+
 static const struct file_operations dp_link_settings_debugfs_fops = {
 	.owner = THIS_MODULE,
 	.read = dp_link_settings_read,
@@ -697,7 +716,8 @@ static const struct {
 } dp_debugfs_entries[] = {
 		{"link_settings", &dp_link_settings_debugfs_fops},
 		{"phy_settings", &dp_phy_settings_debugfs_fop},
-		{"test_pattern", &dp_phy_test_pattern_fops}
+		{"test_pattern", &dp_phy_test_pattern_fops},
+		{"vrr_range", &vrr_range_fops}
 };
 
 int connector_debugfs_init(struct amdgpu_dm_connector *connector)
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index 3c52a4fc921d..bbe051736a18 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -627,7 +627,15 @@ static void dce11_pplib_apply_display_requirements(
 			dc,
 			context->bw.dce.sclk_khz);
 
-	pp_display_cfg->min_dcfclock_khz = pp_display_cfg->min_engine_clock_khz;
+	/*
+	 * As workaround for >4x4K lightup set dcfclock to min_engine_clock value.
+	 * This is not required for less than 5 displays,
+	 * thus don't request decfclk in dc to avoid impact
+	 * on power saving.
+	 *
+	 */
+	pp_display_cfg->min_dcfclock_khz = (context->stream_count > 4)?
+			pp_display_cfg->min_engine_clock_khz : 0;
 
 	pp_display_cfg->min_engine_clock_deep_sleep_khz
 			= context->bw.dce.sclk_deep_sleep_khz;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index b45b0d0e7726..0ad8fe4a6277 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -1031,6 +1031,7 @@ static int smu10_get_clock_by_type_with_latency(struct pp_hwmgr *hwmgr,
 		break;
 	case amd_pp_dpp_clock:
 		pclk_vol_table = pinfo->vdd_dep_on_dppclk;
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
index 5bb5a55f6b31..3ca5718aa0c2 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
@@ -5,12 +5,16 @@
  *
  */
 #include <linux/clk.h>
+#include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
-#include <drm/drm_plane_helper.h>
 #include <drm/drm_crtc_helper.h>
-#include <linux/pm_runtime.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
+
 #include "komeda_dev.h"
 #include "komeda_kms.h"
 
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
index 0fe6954fbbf4..70e9bb7fa30c 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_dev.c
@@ -4,9 +4,13 @@
  * Author: James.Qian.Wang <james.qian.wang@arm.com>
  *
  */
-#include <linux/platform_device.h>
+#include <linux/io.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
+#include <linux/platform_device.h>
+
+#include <drm/drm_print.h>
+
 #include "komeda_dev.h"
 
 static int komeda_parse_pipe_dt(struct komeda_dev *mdev, struct device_node *np)
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c
index 23ee74d42239..9cc9935024f7 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.c
@@ -4,10 +4,12 @@
  * Author: James.Qian.Wang <james.qian.wang@arm.com>
  *
  */
-#include <drm/drm_gem.h>
-#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_gem.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+
 #include "komeda_framebuffer.h"
 #include "komeda_dev.h"
 
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index 3fc096d3883e..47a58ab20434 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -5,15 +5,19 @@
  *
  */
 #include <linux/component.h>
+#include <linux/interrupt.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
-#include <drm/drm_gem_framebuffer_helper.h>
-#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_fb_helper.h>
-#include <linux/interrupt.h>
+#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_vblank.h>
+
 #include "komeda_dev.h"
-#include "komeda_kms.h"
 #include "komeda_framebuffer.h"
+#include "komeda_kms.h"
 
 DEFINE_DRM_GEM_CMA_FOPS(komeda_cma_fops);
 
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
index f13666004a42..874e9c9f0749 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.h
@@ -10,6 +10,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_device.h>
 #include <drm/drm_writeback.h>
 
 /** struct komeda_plane - komeda instance of drm_plane */
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
index edb1cd7795f9..f1908e9ef128 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline.c
@@ -4,6 +4,8 @@
  * Author: James.Qian.Wang <james.qian.wang@arm.com>
  *
  */
+#include <drm/drm_print.h>
+
 #include "komeda_dev.h"
 #include "komeda_pipeline.h"
 
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 641a33f134ee..49463348a07a 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -442,5 +442,6 @@ int bochs_gem_prime_mmap(struct drm_gem_object *obj,
 {
 	struct bochs_bo *bo = gem_to_bochs_bo(obj);
 
-	return ttm_fbdev_mmap(vma, &bo->bo);
+	bo->gem.vma_node.vm_node.start = bo->bo.vma_node.vm_node.start;
+	return drm_gem_prime_mmap(obj, vma);
 }
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index 2fee47b0d50b..8840f396a7b6 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -30,6 +30,7 @@ config DRM_CDNS_DSI
 	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL_BRIDGE
+	select GENERIC_PHY_MIPI_DPHY
 	depends on OF
 	help
 	  Support Cadence DPI to DSI bridge. This is an internal
diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c
index 924abe82ea3c..6166dca6be81 100644
--- a/drivers/gpu/drm/bridge/cdns-dsi.c
+++ b/drivers/gpu/drm/bridge/cdns-dsi.c
@@ -23,6 +23,9 @@
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
+#include <linux/phy/phy.h>
+#include <linux/phy/phy-mipi-dphy.h>
+
 #define IP_CONF				0x0
 #define SP_HS_FIFO_DEPTH(x)		(((x) & GENMASK(30, 26)) >> 26)
 #define SP_LP_FIFO_DEPTH(x)		(((x) & GENMASK(25, 21)) >> 21)
@@ -421,44 +424,11 @@
 #define DSI_NULL_FRAME_OVERHEAD		6
 #define DSI_EOT_PKT_SIZE		4
 
-#define REG_WAKEUP_TIME_NS		800
-#define DPHY_PLL_RATE_HZ		108000000
-
-/* DPHY registers */
-#define DPHY_PMA_CMN(reg)		(reg)
-#define DPHY_PMA_LCLK(reg)		(0x100 + (reg))
-#define DPHY_PMA_LDATA(lane, reg)	(0x200 + ((lane) * 0x100) + (reg))
-#define DPHY_PMA_RCLK(reg)		(0x600 + (reg))
-#define DPHY_PMA_RDATA(lane, reg)	(0x700 + ((lane) * 0x100) + (reg))
-#define DPHY_PCS(reg)			(0xb00 + (reg))
-
-#define DPHY_CMN_SSM			DPHY_PMA_CMN(0x20)
-#define DPHY_CMN_SSM_EN			BIT(0)
-#define DPHY_CMN_TX_MODE_EN		BIT(9)
-
-#define DPHY_CMN_PWM			DPHY_PMA_CMN(0x40)
-#define DPHY_CMN_PWM_DIV(x)		((x) << 20)
-#define DPHY_CMN_PWM_LOW(x)		((x) << 10)
-#define DPHY_CMN_PWM_HIGH(x)		(x)
-
-#define DPHY_CMN_FBDIV			DPHY_PMA_CMN(0x4c)
-#define DPHY_CMN_FBDIV_VAL(low, high)	(((high) << 11) | ((low) << 22))
-#define DPHY_CMN_FBDIV_FROM_REG		(BIT(10) | BIT(21))
-
-#define DPHY_CMN_OPIPDIV		DPHY_PMA_CMN(0x50)
-#define DPHY_CMN_IPDIV_FROM_REG		BIT(0)
-#define DPHY_CMN_IPDIV(x)		((x) << 1)
-#define DPHY_CMN_OPDIV_FROM_REG		BIT(6)
-#define DPHY_CMN_OPDIV(x)		((x) << 7)
-
-#define DPHY_PSM_CFG			DPHY_PCS(0x4)
-#define DPHY_PSM_CFG_FROM_REG		BIT(0)
-#define DPHY_PSM_CLK_DIV(x)		((x) << 1)
-
 struct cdns_dsi_output {
 	struct mipi_dsi_device *dev;
 	struct drm_panel *panel;
 	struct drm_bridge *bridge;
+	union phy_configure_opts phy_opts;
 };
 
 enum cdns_dsi_input_id {
@@ -467,14 +437,6 @@ enum cdns_dsi_input_id {
 	CDNS_DSC_INPUT,
 };
 
-struct cdns_dphy_cfg {
-	u8 pll_ipdiv;
-	u8 pll_opdiv;
-	u16 pll_fbdiv;
-	unsigned long lane_bps;
-	unsigned int nlanes;
-};
-
 struct cdns_dsi_cfg {
 	unsigned int hfp;
 	unsigned int hsa;
@@ -483,34 +445,6 @@ struct cdns_dsi_cfg {
 	unsigned int htotal;
 };
 
-struct cdns_dphy;
-
-enum cdns_dphy_clk_lane_cfg {
-	DPHY_CLK_CFG_LEFT_DRIVES_ALL = 0,
-	DPHY_CLK_CFG_LEFT_DRIVES_RIGHT = 1,
-	DPHY_CLK_CFG_LEFT_DRIVES_LEFT = 2,
-	DPHY_CLK_CFG_RIGHT_DRIVES_ALL = 3,
-};
-
-struct cdns_dphy_ops {
-	int (*probe)(struct cdns_dphy *dphy);
-	void (*remove)(struct cdns_dphy *dphy);
-	void (*set_psm_div)(struct cdns_dphy *dphy, u8 div);
-	void (*set_clk_lane_cfg)(struct cdns_dphy *dphy,
-				 enum cdns_dphy_clk_lane_cfg cfg);
-	void (*set_pll_cfg)(struct cdns_dphy *dphy,
-			    const struct cdns_dphy_cfg *cfg);
-	unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy);
-};
-
-struct cdns_dphy {
-	struct cdns_dphy_cfg cfg;
-	void __iomem *regs;
-	struct clk *psm_clk;
-	struct clk *pll_ref_clk;
-	const struct cdns_dphy_ops *ops;
-};
-
 struct cdns_dsi_input {
 	enum cdns_dsi_input_id id;
 	struct drm_bridge bridge;
@@ -528,7 +462,7 @@ struct cdns_dsi {
 	struct reset_control *dsi_p_rst;
 	struct clk *dsi_sys_clk;
 	bool link_initialized;
-	struct cdns_dphy *dphy;
+	struct phy *dphy;
 };
 
 static inline struct cdns_dsi *input_to_dsi(struct cdns_dsi_input *input)
@@ -547,173 +481,13 @@ bridge_to_cdns_dsi_input(struct drm_bridge *bridge)
 	return container_of(bridge, struct cdns_dsi_input, bridge);
 }
 
-static int cdns_dsi_get_dphy_pll_cfg(struct cdns_dphy *dphy,
-				     struct cdns_dphy_cfg *cfg,
-				     unsigned int dpi_htotal,
-				     unsigned int dpi_bpp,
-				     unsigned int dpi_hz,
-				     unsigned int dsi_htotal,
-				     unsigned int dsi_nlanes,
-				     unsigned int *dsi_hfp_ext)
-{
-	u64 dlane_bps, dlane_bps_max, fbdiv, fbdiv_max, adj_dsi_htotal;
-	unsigned long pll_ref_hz = clk_get_rate(dphy->pll_ref_clk);
-
-	memset(cfg, 0, sizeof(*cfg));
-
-	cfg->nlanes = dsi_nlanes;
-
-	if (pll_ref_hz < 9600000 || pll_ref_hz >= 150000000)
-		return -EINVAL;
-	else if (pll_ref_hz < 19200000)
-		cfg->pll_ipdiv = 1;
-	else if (pll_ref_hz < 38400000)
-		cfg->pll_ipdiv = 2;
-	else if (pll_ref_hz < 76800000)
-		cfg->pll_ipdiv = 4;
-	else
-		cfg->pll_ipdiv = 8;
-
-	/*
-	 * Make sure DSI htotal is aligned on a lane boundary when calculating
-	 * the expected data rate. This is done by extending HFP in case of
-	 * misalignment.
-	 */
-	adj_dsi_htotal = dsi_htotal;
-	if (dsi_htotal % dsi_nlanes)
-		adj_dsi_htotal += dsi_nlanes - (dsi_htotal % dsi_nlanes);
-
-	dlane_bps = (u64)dpi_hz * adj_dsi_htotal;
-
-	/* data rate in bytes/sec is not an integer, refuse the mode. */
-	if (do_div(dlane_bps, dsi_nlanes * dpi_htotal))
-		return -EINVAL;
-
-	/* data rate was in bytes/sec, convert to bits/sec. */
-	dlane_bps *= 8;
-
-	if (dlane_bps > 2500000000UL || dlane_bps < 160000000UL)
-		return -EINVAL;
-	else if (dlane_bps >= 1250000000)
-		cfg->pll_opdiv = 1;
-	else if (dlane_bps >= 630000000)
-		cfg->pll_opdiv = 2;
-	else if (dlane_bps >= 320000000)
-		cfg->pll_opdiv = 4;
-	else if (dlane_bps >= 160000000)
-		cfg->pll_opdiv = 8;
-
-	/*
-	 * Allow a deviation of 0.2% on the per-lane data rate to try to
-	 * recover a potential mismatch between DPI and PPI clks.
-	 */
-	dlane_bps_max = dlane_bps + DIV_ROUND_DOWN_ULL(dlane_bps, 500);
-	fbdiv_max = DIV_ROUND_DOWN_ULL(dlane_bps_max * 2 *
-				       cfg->pll_opdiv * cfg->pll_ipdiv,
-				       pll_ref_hz);
-	fbdiv = DIV_ROUND_UP_ULL(dlane_bps * 2 * cfg->pll_opdiv *
-				 cfg->pll_ipdiv,
-				 pll_ref_hz);
-
-	/*
-	 * Iterate over all acceptable fbdiv and try to find an adjusted DSI
-	 * htotal length providing an exact match.
-	 *
-	 * Note that we could do something even trickier by relying on the fact
-	 * that a new line is not necessarily aligned on a lane boundary, so,
-	 * by making adj_dsi_htotal non aligned on a dsi_lanes we can improve a
-	 * bit the precision. With this, the step would be
-	 *
-	 *	pll_ref_hz / (2 * opdiv * ipdiv * nlanes)
-	 *
-	 * instead of
-	 *
-	 *	pll_ref_hz / (2 * opdiv * ipdiv)
-	 *
-	 * The drawback of this approach is that we would need to make sure the
-	 * number or lines is a multiple of the realignment periodicity which is
-	 * a function of the number of lanes and the original misalignment. For
-	 * example, for NLANES = 4 and HTOTAL % NLANES = 3, it takes 4 lines
-	 * to realign on a lane:
-	 * LINE 0: expected number of bytes, starts emitting first byte of
-	 *	   LINE 1 on LANE 3
-	 * LINE 1: expected number of bytes, starts emitting first 2 bytes of
-	 *	   LINE 2 on LANES 2 and 3
-	 * LINE 2: expected number of bytes, starts emitting first 3 bytes of
-	 *	   of LINE 3 on LANES 1, 2 and 3
-	 * LINE 3: one byte less, now things are realigned on LANE 0 for LINE 4
-	 *
-	 * I figured this extra complexity was not worth the benefit, but if
-	 * someone really has unfixable mismatch, that would be something to
-	 * investigate.
-	 */
-	for (; fbdiv <= fbdiv_max; fbdiv++) {
-		u32 rem;
-
-		adj_dsi_htotal = (u64)fbdiv * pll_ref_hz * dsi_nlanes *
-				 dpi_htotal;
-
-		/*
-		 * Do the division in 2 steps to avoid an overflow on the
-		 * divider.
-		 */
-		rem = do_div(adj_dsi_htotal, dpi_hz);
-		if (rem)
-			continue;
-
-		rem = do_div(adj_dsi_htotal,
-			     cfg->pll_opdiv * cfg->pll_ipdiv * 2 * 8);
-		if (rem)
-			continue;
-
-		cfg->pll_fbdiv = fbdiv;
-		*dsi_hfp_ext = adj_dsi_htotal - dsi_htotal;
-		break;
-	}
-
-	/* No match, let's just reject the display mode. */
-	if (!cfg->pll_fbdiv)
-		return -EINVAL;
-
-	dlane_bps = DIV_ROUND_DOWN_ULL((u64)dpi_hz * adj_dsi_htotal * 8,
-				       dsi_nlanes * dpi_htotal);
-	cfg->lane_bps = dlane_bps;
-
-	return 0;
-}
-
-static int cdns_dphy_setup_psm(struct cdns_dphy *dphy)
-{
-	unsigned long psm_clk_hz = clk_get_rate(dphy->psm_clk);
-	unsigned long psm_div;
-
-	if (!psm_clk_hz || psm_clk_hz > 100000000)
-		return -EINVAL;
-
-	psm_div = DIV_ROUND_CLOSEST(psm_clk_hz, 1000000);
-	if (dphy->ops->set_psm_div)
-		dphy->ops->set_psm_div(dphy, psm_div);
-
-	return 0;
-}
-
-static void cdns_dphy_set_clk_lane_cfg(struct cdns_dphy *dphy,
-				       enum cdns_dphy_clk_lane_cfg cfg)
-{
-	if (dphy->ops->set_clk_lane_cfg)
-		dphy->ops->set_clk_lane_cfg(dphy, cfg);
-}
-
-static void cdns_dphy_set_pll_cfg(struct cdns_dphy *dphy,
-				  const struct cdns_dphy_cfg *cfg)
+static unsigned int mode_to_dpi_hfp(const struct drm_display_mode *mode,
+				    bool mode_valid_check)
 {
-	if (dphy->ops->set_pll_cfg)
-		dphy->ops->set_pll_cfg(dphy, cfg);
-}
+	if (mode_valid_check)
+		return mode->hsync_start - mode->hdisplay;
 
-static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy)
-{
-	return dphy->ops->get_wakeup_time_ns(dphy);
+	return mode->crtc_hsync_start - mode->crtc_hdisplay;
 }
 
 static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing,
@@ -733,14 +507,12 @@ static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing,
 static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi,
 			     const struct drm_display_mode *mode,
 			     struct cdns_dsi_cfg *dsi_cfg,
-			     struct cdns_dphy_cfg *dphy_cfg,
 			     bool mode_valid_check)
 {
-	unsigned long dsi_htotal = 0, dsi_hss_hsa_hse_hbp = 0;
 	struct cdns_dsi_output *output = &dsi->output;
-	unsigned int dsi_hfp_ext = 0, dpi_hfp, tmp;
+	unsigned int tmp;
 	bool sync_pulse = false;
-	int bpp, nlanes, ret;
+	int bpp, nlanes;
 
 	memset(dsi_cfg, 0, sizeof(*dsi_cfg));
 
@@ -759,8 +531,6 @@ static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi,
 		       mode->crtc_hsync_end : mode->crtc_hsync_start);
 
 	dsi_cfg->hbp = dpi_to_dsi_timing(tmp, bpp, DSI_HBP_FRAME_OVERHEAD);
-	dsi_htotal += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD;
-	dsi_hss_hsa_hse_hbp += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD;
 
 	if (sync_pulse) {
 		if (mode_valid_check)
@@ -770,49 +540,104 @@ static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi,
 
 		dsi_cfg->hsa = dpi_to_dsi_timing(tmp, bpp,
 						 DSI_HSA_FRAME_OVERHEAD);
-		dsi_htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD;
-		dsi_hss_hsa_hse_hbp += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD;
 	}
 
 	dsi_cfg->hact = dpi_to_dsi_timing(mode_valid_check ?
 					  mode->hdisplay : mode->crtc_hdisplay,
 					  bpp, 0);
-	dsi_htotal += dsi_cfg->hact;
+	dsi_cfg->hfp = dpi_to_dsi_timing(mode_to_dpi_hfp(mode, mode_valid_check),
+					 bpp, DSI_HFP_FRAME_OVERHEAD);
 
-	if (mode_valid_check)
-		dpi_hfp = mode->hsync_start - mode->hdisplay;
-	else
-		dpi_hfp = mode->crtc_hsync_start - mode->crtc_hdisplay;
+	return 0;
+}
+
+static int cdns_dsi_adjust_phy_config(struct cdns_dsi *dsi,
+			      struct cdns_dsi_cfg *dsi_cfg,
+			      struct phy_configure_opts_mipi_dphy *phy_cfg,
+			      const struct drm_display_mode *mode,
+			      bool mode_valid_check)
+{
+	struct cdns_dsi_output *output = &dsi->output;
+	unsigned long long dlane_bps;
+	unsigned long adj_dsi_htotal;
+	unsigned long dsi_htotal;
+	unsigned long dpi_htotal;
+	unsigned long dpi_hz;
+	unsigned int dsi_hfp_ext;
+	unsigned int lanes = output->dev->lanes;
+
+	dsi_htotal = dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD;
+	if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
+		dsi_htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD;
 
-	dsi_cfg->hfp = dpi_to_dsi_timing(dpi_hfp, bpp, DSI_HFP_FRAME_OVERHEAD);
+	dsi_htotal += dsi_cfg->hact;
 	dsi_htotal += dsi_cfg->hfp + DSI_HFP_FRAME_OVERHEAD;
 
-	if (mode_valid_check)
-		ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg,
-						mode->htotal, bpp,
-						mode->clock * 1000,
-						dsi_htotal, nlanes,
-						&dsi_hfp_ext);
-	else
-		ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg,
-						mode->crtc_htotal, bpp,
-						mode->crtc_clock * 1000,
-						dsi_htotal, nlanes,
-						&dsi_hfp_ext);
+	/*
+	 * Make sure DSI htotal is aligned on a lane boundary when calculating
+	 * the expected data rate. This is done by extending HFP in case of
+	 * misalignment.
+	 */
+	adj_dsi_htotal = dsi_htotal;
+	if (dsi_htotal % lanes)
+		adj_dsi_htotal += lanes - (dsi_htotal % lanes);
+
+	dpi_hz = (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000;
+	dlane_bps = (unsigned long long)dpi_hz * adj_dsi_htotal;
+
+	/* data rate in bytes/sec is not an integer, refuse the mode. */
+	dpi_htotal = mode_valid_check ? mode->htotal : mode->crtc_htotal;
+	if (do_div(dlane_bps, lanes * dpi_htotal))
+		return -EINVAL;
+
+	/* data rate was in bytes/sec, convert to bits/sec. */
+	phy_cfg->hs_clk_rate = dlane_bps * 8;
 
+	dsi_hfp_ext = adj_dsi_htotal - dsi_htotal;
+	dsi_cfg->hfp += dsi_hfp_ext;
+	dsi_cfg->htotal = dsi_htotal + dsi_hfp_ext;
+
+	return 0;
+}
+
+static int cdns_dsi_check_conf(struct cdns_dsi *dsi,
+			       const struct drm_display_mode *mode,
+			       struct cdns_dsi_cfg *dsi_cfg,
+			       bool mode_valid_check)
+{
+	struct cdns_dsi_output *output = &dsi->output;
+	struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy;
+	unsigned long dsi_hss_hsa_hse_hbp;
+	unsigned int nlanes = output->dev->lanes;
+	int ret;
+
+	ret = cdns_dsi_mode2cfg(dsi, mode, dsi_cfg, mode_valid_check);
 	if (ret)
 		return ret;
 
-	dsi_cfg->hfp += dsi_hfp_ext;
-	dsi_htotal += dsi_hfp_ext;
-	dsi_cfg->htotal = dsi_htotal;
+	phy_mipi_dphy_get_default_config(mode->crtc_clock * 1000,
+					 mipi_dsi_pixel_format_to_bpp(output->dev->format),
+					 nlanes, phy_cfg);
+
+	ret = cdns_dsi_adjust_phy_config(dsi, dsi_cfg, phy_cfg, mode, mode_valid_check);
+	if (ret)
+		return ret;
+
+	ret = phy_validate(dsi->dphy, PHY_MODE_MIPI_DPHY, 0, &output->phy_opts);
+	if (ret)
+		return ret;
+
+	dsi_hss_hsa_hse_hbp = dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD;
+	if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
+		dsi_hss_hsa_hse_hbp += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD;
 
 	/*
 	 * Make sure DPI(HFP) > DSI(HSS+HSA+HSE+HBP) to guarantee that the FIFO
 	 * is empty before we start a receiving a new line on the DPI
 	 * interface.
 	 */
-	if ((u64)dphy_cfg->lane_bps * dpi_hfp * nlanes <
+	if ((u64)phy_cfg->hs_clk_rate *
+	    mode_to_dpi_hfp(mode, mode_valid_check) * nlanes <
 	    (u64)dsi_hss_hsa_hse_hbp *
 	    (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000)
 		return -EINVAL;
@@ -842,9 +667,8 @@ cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge,
 	struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge);
 	struct cdns_dsi *dsi = input_to_dsi(input);
 	struct cdns_dsi_output *output = &dsi->output;
-	struct cdns_dphy_cfg dphy_cfg;
 	struct cdns_dsi_cfg dsi_cfg;
-	int bpp, nlanes, ret;
+	int bpp, ret;
 
 	/*
 	 * VFP_DSI should be less than VFP_DPI and VFP_DSI should be at
@@ -862,11 +686,9 @@ cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge,
 	if ((mode->hdisplay * bpp) % 32)
 		return MODE_H_ILLEGAL;
 
-	nlanes = output->dev->lanes;
-
-	ret = cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, true);
+	ret = cdns_dsi_check_conf(dsi, mode, &dsi_cfg, true);
 	if (ret)
-		return MODE_CLOCK_RANGE;
+		return MODE_BAD;
 
 	return MODE_OK;
 }
@@ -887,9 +709,9 @@ static void cdns_dsi_bridge_disable(struct drm_bridge *bridge)
 	pm_runtime_put(dsi->base.dev);
 }
 
-static void cdns_dsi_hs_init(struct cdns_dsi *dsi,
-			     const struct cdns_dphy_cfg *dphy_cfg)
+static void cdns_dsi_hs_init(struct cdns_dsi *dsi)
 {
+	struct cdns_dsi_output *output = &dsi->output;
 	u32 status;
 
 	/*
@@ -900,30 +722,10 @@ static void cdns_dsi_hs_init(struct cdns_dsi *dsi,
 	       DPHY_CMN_PDN | DPHY_PLL_PDN,
 	       dsi->regs + MCTL_DPHY_CFG0);
 
-	/*
-	 * Configure the internal PSM clk divider so that the DPHY has a
-	 * 1MHz clk (or something close).
-	 */
-	WARN_ON_ONCE(cdns_dphy_setup_psm(dsi->dphy));
-
-	/*
-	 * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes
-	 * and 8 data lanes, each clk lane can be attache different set of
-	 * data lanes. The 2 groups are named 'left' and 'right', so here we
-	 * just say that we want the 'left' clk lane to drive the 'left' data
-	 * lanes.
-	 */
-	cdns_dphy_set_clk_lane_cfg(dsi->dphy, DPHY_CLK_CFG_LEFT_DRIVES_LEFT);
-
-	/*
-	 * Configure the DPHY PLL that will be used to generate the TX byte
-	 * clk.
-	 */
-	cdns_dphy_set_pll_cfg(dsi->dphy, dphy_cfg);
-
-	/* Start TX state machine. */
-	writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN,
-	       dsi->dphy->regs + DPHY_CMN_SSM);
+	phy_init(dsi->dphy);
+	phy_set_mode(dsi->dphy, PHY_MODE_MIPI_DPHY);
+	phy_configure(dsi->dphy, &output->phy_opts);
+	phy_power_on(dsi->dphy);
 
 	/* Activate the PLL and wait until it's locked. */
 	writel(PLL_LOCKED, dsi->regs + MCTL_MAIN_STS_CLR);
@@ -933,7 +735,7 @@ static void cdns_dsi_hs_init(struct cdns_dsi *dsi,
 					status & PLL_LOCKED, 100, 100));
 	/* De-assert data and clock reset lines. */
 	writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN |
-	       DPHY_D_RSTB(dphy_cfg->nlanes) | DPHY_C_RSTB,
+	       DPHY_D_RSTB(output->dev->lanes) | DPHY_C_RSTB,
 	       dsi->regs + MCTL_DPHY_CFG0);
 }
 
@@ -979,7 +781,7 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge)
 	struct cdns_dsi *dsi = input_to_dsi(input);
 	struct cdns_dsi_output *output = &dsi->output;
 	struct drm_display_mode *mode;
-	struct cdns_dphy_cfg dphy_cfg;
+	struct phy_configure_opts_mipi_dphy *phy_cfg = &output->phy_opts.mipi_dphy;
 	unsigned long tx_byte_period;
 	struct cdns_dsi_cfg dsi_cfg;
 	u32 tmp, reg_wakeup, div;
@@ -992,9 +794,9 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge)
 	bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format);
 	nlanes = output->dev->lanes;
 
-	WARN_ON_ONCE(cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, false));
+	WARN_ON_ONCE(cdns_dsi_check_conf(dsi, mode, &dsi_cfg, false));
 
-	cdns_dsi_hs_init(dsi, &dphy_cfg);
+	cdns_dsi_hs_init(dsi);
 	cdns_dsi_init_link(dsi);
 
 	writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa),
@@ -1030,9 +832,8 @@ static void cdns_dsi_bridge_enable(struct drm_bridge *bridge)
 		tmp -= DIV_ROUND_UP(DSI_EOT_PKT_SIZE, nlanes);
 
 	tx_byte_period = DIV_ROUND_DOWN_ULL((u64)NSEC_PER_SEC * 8,
-					    dphy_cfg.lane_bps);
-	reg_wakeup = cdns_dphy_get_wakeup_time_ns(dsi->dphy) /
-		     tx_byte_period;
+					    phy_cfg->hs_clk_rate);
+	reg_wakeup = (phy_cfg->hs_prepare + phy_cfg->hs_zero) / tx_byte_period;
 	writel(REG_WAKEUP_TIME(reg_wakeup) | REG_LINE_DURATION(tmp),
 	       dsi->regs + VID_DPHY_TIME);
 
@@ -1346,8 +1147,6 @@ static int __maybe_unused cdns_dsi_resume(struct device *dev)
 	reset_control_deassert(dsi->dsi_p_rst);
 	clk_prepare_enable(dsi->dsi_p_clk);
 	clk_prepare_enable(dsi->dsi_sys_clk);
-	clk_prepare_enable(dsi->dphy->psm_clk);
-	clk_prepare_enable(dsi->dphy->pll_ref_clk);
 
 	return 0;
 }
@@ -1356,8 +1155,6 @@ static int __maybe_unused cdns_dsi_suspend(struct device *dev)
 {
 	struct cdns_dsi *dsi = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(dsi->dphy->pll_ref_clk);
-	clk_disable_unprepare(dsi->dphy->psm_clk);
 	clk_disable_unprepare(dsi->dsi_sys_clk);
 	clk_disable_unprepare(dsi->dsi_p_clk);
 	reset_control_assert(dsi->dsi_p_rst);
@@ -1368,121 +1165,6 @@ static int __maybe_unused cdns_dsi_suspend(struct device *dev)
 static UNIVERSAL_DEV_PM_OPS(cdns_dsi_pm_ops, cdns_dsi_suspend, cdns_dsi_resume,
 			    NULL);
 
-static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy)
-{
-	/* Default wakeup time is 800 ns (in a simulated environment). */
-	return 800;
-}
-
-static void cdns_dphy_ref_set_pll_cfg(struct cdns_dphy *dphy,
-				      const struct cdns_dphy_cfg *cfg)
-{
-	u32 fbdiv_low, fbdiv_high;
-
-	fbdiv_low = (cfg->pll_fbdiv / 4) - 2;
-	fbdiv_high = cfg->pll_fbdiv - fbdiv_low - 2;
-
-	writel(DPHY_CMN_IPDIV_FROM_REG | DPHY_CMN_OPDIV_FROM_REG |
-	       DPHY_CMN_IPDIV(cfg->pll_ipdiv) |
-	       DPHY_CMN_OPDIV(cfg->pll_opdiv),
-	       dphy->regs + DPHY_CMN_OPIPDIV);
-	writel(DPHY_CMN_FBDIV_FROM_REG |
-	       DPHY_CMN_FBDIV_VAL(fbdiv_low, fbdiv_high),
-	       dphy->regs + DPHY_CMN_FBDIV);
-	writel(DPHY_CMN_PWM_HIGH(6) | DPHY_CMN_PWM_LOW(0x101) |
-	       DPHY_CMN_PWM_DIV(0x8),
-	       dphy->regs + DPHY_CMN_PWM);
-}
-
-static void cdns_dphy_ref_set_psm_div(struct cdns_dphy *dphy, u8 div)
-{
-	writel(DPHY_PSM_CFG_FROM_REG | DPHY_PSM_CLK_DIV(div),
-	       dphy->regs + DPHY_PSM_CFG);
-}
-
-/*
- * This is the reference implementation of DPHY hooks. Specific integration of
- * this IP may have to re-implement some of them depending on how they decided
- * to wire things in the SoC.
- */
-static const struct cdns_dphy_ops ref_dphy_ops = {
-	.get_wakeup_time_ns = cdns_dphy_ref_get_wakeup_time_ns,
-	.set_pll_cfg = cdns_dphy_ref_set_pll_cfg,
-	.set_psm_div = cdns_dphy_ref_set_psm_div,
-};
-
-static const struct of_device_id cdns_dphy_of_match[] = {
-	{ .compatible = "cdns,dphy", .data = &ref_dphy_ops },
-	{ /* sentinel */ },
-};
-
-static struct cdns_dphy *cdns_dphy_probe(struct platform_device *pdev)
-{
-	const struct of_device_id *match;
-	struct cdns_dphy *dphy;
-	struct of_phandle_args args;
-	struct resource res;
-	int ret;
-
-	ret = of_parse_phandle_with_args(pdev->dev.of_node, "phys",
-					 "#phy-cells", 0, &args);
-	if (ret)
-		return ERR_PTR(-ENOENT);
-
-	match = of_match_node(cdns_dphy_of_match, args.np);
-	if (!match || !match->data)
-		return ERR_PTR(-EINVAL);
-
-	dphy = devm_kzalloc(&pdev->dev, sizeof(*dphy), GFP_KERNEL);
-	if (!dphy)
-		return ERR_PTR(-ENOMEM);
-
-	dphy->ops = match->data;
-
-	ret = of_address_to_resource(args.np, 0, &res);
-	if (ret)
-		return ERR_PTR(ret);
-
-	dphy->regs = devm_ioremap_resource(&pdev->dev, &res);
-	if (IS_ERR(dphy->regs))
-		return ERR_CAST(dphy->regs);
-
-	dphy->psm_clk = of_clk_get_by_name(args.np, "psm");
-	if (IS_ERR(dphy->psm_clk))
-		return ERR_CAST(dphy->psm_clk);
-
-	dphy->pll_ref_clk = of_clk_get_by_name(args.np, "pll_ref");
-	if (IS_ERR(dphy->pll_ref_clk)) {
-		ret = PTR_ERR(dphy->pll_ref_clk);
-		goto err_put_psm_clk;
-	}
-
-	if (dphy->ops->probe) {
-		ret = dphy->ops->probe(dphy);
-		if (ret)
-			goto err_put_pll_ref_clk;
-	}
-
-	return dphy;
-
-err_put_pll_ref_clk:
-	clk_put(dphy->pll_ref_clk);
-
-err_put_psm_clk:
-	clk_put(dphy->psm_clk);
-
-	return ERR_PTR(ret);
-}
-
-static void cdns_dphy_remove(struct cdns_dphy *dphy)
-{
-	if (dphy->ops->remove)
-		dphy->ops->remove(dphy);
-
-	clk_put(dphy->pll_ref_clk);
-	clk_put(dphy->psm_clk);
-}
-
 static int cdns_dsi_drm_probe(struct platform_device *pdev)
 {
 	struct cdns_dsi *dsi;
@@ -1521,13 +1203,13 @@ static int cdns_dsi_drm_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	dsi->dphy = cdns_dphy_probe(pdev);
+	dsi->dphy = devm_phy_get(&pdev->dev, "dphy");
 	if (IS_ERR(dsi->dphy))
 		return PTR_ERR(dsi->dphy);
 
 	ret = clk_prepare_enable(dsi->dsi_p_clk);
 	if (ret)
-		goto err_remove_dphy;
+		return ret;
 
 	val = readl(dsi->regs + ID_REG);
 	if (REV_VENDOR_ID(val) != 0xcad) {
@@ -1585,9 +1267,6 @@ err_disable_runtime_pm:
 err_disable_pclk:
 	clk_disable_unprepare(dsi->dsi_p_clk);
 
-err_remove_dphy:
-	cdns_dphy_remove(dsi->dphy);
-
 	return ret;
 }
 
@@ -1597,7 +1276,6 @@ static int cdns_dsi_drm_remove(struct platform_device *pdev)
 
 	mipi_dsi_host_unregister(&dsi->base);
 	pm_runtime_disable(&pdev->dev);
-	cdns_dphy_remove(dsi->dphy);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index bbc17530c23d..888980d4bc74 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -98,6 +98,8 @@
 #define DP0_STARTVAL		0x064c
 #define DP0_ACTIVEVAL		0x0650
 #define DP0_SYNCVAL		0x0654
+#define SYNCVAL_HS_POL_ACTIVE_LOW	(1 << 15)
+#define SYNCVAL_VS_POL_ACTIVE_LOW	(1 << 31)
 #define DP0_MISC		0x0658
 #define TU_SIZE_RECOMMENDED		(63) /* LSCLK cycles per TU */
 #define BPC_6				(0 << 5)
@@ -142,6 +144,8 @@
 #define DP0_LTLOOPCTRL		0x06d8
 #define DP0_SNKLTCTRL		0x06e4
 
+#define DP1_SRCCTRL		0x07a0
+
 /* PHY */
 #define DP_PHY_CTRL		0x0800
 #define DP_PHY_RST			BIT(28)  /* DP PHY Global Soft Reset */
@@ -150,6 +154,7 @@
 #define PHY_M1_RST			BIT(12)  /* Reset PHY1 Main Channel */
 #define PHY_RDY				BIT(16)  /* PHY Main Channels Ready */
 #define PHY_M0_RST			BIT(8)   /* Reset PHY0 Main Channel */
+#define PHY_2LANE			BIT(2)   /* PHY Enable 2 lanes */
 #define PHY_A0_EN			BIT(1)   /* PHY Aux Channel0 Enable */
 #define PHY_M0_EN			BIT(0)   /* PHY Main Channel0 Enable */
 
@@ -540,6 +545,7 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	unsigned long rate;
 	u32 value;
 	int ret;
+	u32 dp_phy_ctrl;
 
 	rate = clk_get_rate(tc->refclk);
 	switch (rate) {
@@ -564,7 +570,10 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
 	tc_write(SYS_PLLPARAM, value);
 
-	tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN);
+	dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN;
+	if (tc->link.base.num_lanes == 2)
+		dp_phy_ctrl |= PHY_2LANE;
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
 
 	/*
 	 * Initially PLLs are in bypass. Force PLL parameter update,
@@ -720,7 +729,9 @@ static int tc_set_video_mode(struct tc_data *tc,
 
 	tc_write(DP0_ACTIVEVAL, (mode->vdisplay << 16) | (mode->hdisplay));
 
-	tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0));
+	tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0) |
+		 ((mode->flags & DRM_MODE_FLAG_NHSYNC) ? SYNCVAL_HS_POL_ACTIVE_LOW : 0) |
+		 ((mode->flags & DRM_MODE_FLAG_NVSYNC) ? SYNCVAL_VS_POL_ACTIVE_LOW : 0));
 
 	tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW |
 		 DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888);
@@ -830,12 +841,11 @@ static int tc_main_link_setup(struct tc_data *tc)
 	if (!tc->mode)
 		return -EINVAL;
 
-	/* from excel file - DP0_SrcCtrl */
-	tc_write(DP0_SRCCTRL, DP0_SRCCTRL_SCRMBLDIS | DP0_SRCCTRL_EN810B |
-		 DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_LANES_2 |
-		 DP0_SRCCTRL_BW27 | DP0_SRCCTRL_AUTOCORRECT);
-	/* from excel file - DP1_SrcCtrl */
-	tc_write(0x07a0, 0x00003083);
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc));
+	/* SSCG and BW27 on DP1 must be set to the same as on DP0 */
+	tc_write(DP1_SRCCTRL,
+		 (tc->link.spread ? DP0_SRCCTRL_SSCG : 0) |
+		 ((tc->link.base.rate != 162000) ? DP0_SRCCTRL_BW27 : 0));
 
 	rate = clk_get_rate(tc->refclk);
 	switch (rate) {
@@ -856,8 +866,11 @@ static int tc_main_link_setup(struct tc_data *tc)
 	}
 	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
 	tc_write(SYS_PLLPARAM, value);
+
 	/* Setup Main Link */
-	dp_phy_ctrl = BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN |  PHY_M0_EN;
+	dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN | PHY_M0_EN;
+	if (tc->link.base.num_lanes == 2)
+		dp_phy_ctrl |= PHY_2LANE;
 	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
 	msleep(100);
 
@@ -1106,10 +1119,20 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge,
 static enum drm_mode_status tc_connector_mode_valid(struct drm_connector *connector,
 				   struct drm_display_mode *mode)
 {
+	struct tc_data *tc = connector_to_tc(connector);
+	u32 req, avail;
+	u32 bits_per_pixel = 24;
+
 	/* DPI interface clock limitation: upto 154 MHz */
 	if (mode->clock > 154000)
 		return MODE_CLOCK_HIGH;
 
+	req = mode->clock * bits_per_pixel / 8;
+	avail = tc->link.base.num_lanes * tc->link.base.rate;
+
+	if (req > avail)
+		return MODE_BAD;
+
 	return MODE_OK;
 }
 
@@ -1187,7 +1210,8 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 	/* Create eDP connector */
 	drm_connector_helper_add(&tc->connector, &tc_connector_helper_funcs);
 	ret = drm_connector_init(drm, &tc->connector, &tc_connector_funcs,
-				 DRM_MODE_CONNECTOR_eDP);
+				 tc->panel ? DRM_MODE_CONNECTOR_eDP :
+				 DRM_MODE_CONNECTOR_DisplayPort);
 	if (ret)
 		return ret;
 
@@ -1196,6 +1220,10 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 
 	drm_display_info_set_bus_formats(&tc->connector.display_info,
 					 &bus_format, 1);
+	tc->connector.display_info.bus_flags =
+		DRM_BUS_FLAG_DE_HIGH |
+		DRM_BUS_FLAG_PIXDATA_NEGEDGE |
+		DRM_BUS_FLAG_SYNC_NEGEDGE;
 	drm_connector_attach_encoder(&tc->connector, tc->bridge.encoder);
 
 	return 0;
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index a830e70fc0bb..7f9bc32af685 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -360,10 +360,70 @@ static const struct drm_crtc_helper_funcs cirrus_helper_funcs = {
 };
 
 /* CRTC setup */
+static const uint32_t cirrus_formats_16[] = {
+	DRM_FORMAT_RGB565,
+};
+
+static const uint32_t cirrus_formats_24[] = {
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_RGB565,
+};
+
+static const uint32_t cirrus_formats_32[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_RGB565,
+};
+
+static struct drm_plane *cirrus_primary_plane(struct drm_device *dev)
+{
+	const uint32_t *formats;
+	uint32_t nformats;
+	struct drm_plane *primary;
+	int ret;
+
+	switch (cirrus_bpp) {
+	case 16:
+		formats = cirrus_formats_16;
+		nformats = ARRAY_SIZE(cirrus_formats_16);
+		break;
+	case 24:
+		formats = cirrus_formats_24;
+		nformats = ARRAY_SIZE(cirrus_formats_24);
+		break;
+	case 32:
+		formats = cirrus_formats_32;
+		nformats = ARRAY_SIZE(cirrus_formats_32);
+		break;
+	default:
+		return NULL;
+	}
+
+	primary = kzalloc(sizeof(*primary), GFP_KERNEL);
+	if (primary == NULL) {
+		DRM_DEBUG_KMS("Failed to allocate primary plane\n");
+		return NULL;
+	}
+
+	ret = drm_universal_plane_init(dev, primary, 0,
+				       &drm_primary_helper_funcs,
+				       formats, nformats,
+				       NULL,
+				       DRM_PLANE_TYPE_PRIMARY, NULL);
+	if (ret) {
+		kfree(primary);
+		primary = NULL;
+	}
+
+	return primary;
+}
+
 static void cirrus_crtc_init(struct drm_device *dev)
 {
 	struct cirrus_device *cdev = dev->dev_private;
 	struct cirrus_crtc *cirrus_crtc;
+	struct drm_plane *primary;
 
 	cirrus_crtc = kzalloc(sizeof(struct cirrus_crtc) +
 			      (CIRRUSFB_CONN_LIMIT * sizeof(struct drm_connector *)),
@@ -372,7 +432,15 @@ static void cirrus_crtc_init(struct drm_device *dev)
 	if (cirrus_crtc == NULL)
 		return;
 
-	drm_crtc_init(dev, &cirrus_crtc->base, &cirrus_crtc_funcs);
+	primary = cirrus_primary_plane(dev);
+	if (primary == NULL) {
+		kfree(cirrus_crtc);
+		return;
+	}
+
+	drm_crtc_init_with_planes(dev, &cirrus_crtc->base,
+				  primary, NULL,
+				  &cirrus_crtc_funcs, NULL);
 
 	drm_mode_crtc_set_gamma_size(&cirrus_crtc->base, CIRRUS_LUT_SIZE);
 	cdev->mode_info.crtc = cirrus_crtc;
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 737f02885c28..40fba1c04dfc 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -348,7 +348,7 @@ int drm_agp_bind_ioctl(struct drm_device *dev, void *data,
  * \return zero on success or a negative number on failure.
  *
  * Verifies the AGP device is present and has been acquired and looks up the
- * AGP memory entry. If the memory it's currently bound, unbind it via
+ * AGP memory entry. If the memory is currently bound, unbind it via
  * unbind_agp(). Frees it via free_agp() as well as the entry itself
  * and unlinks from the doubly linked list it's inserted in.
  */
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 6fe2303fccd9..c53ecbd9abdd 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -330,10 +330,17 @@ update_connector_routing(struct drm_atomic_state *state,
 	 * Since the connector can be unregistered at any point during an
 	 * atomic check or commit, this is racy. But that's OK: all we care
 	 * about is ensuring that userspace can't do anything but shut off the
-	 * display on a connector that was destroyed after its been notified,
+	 * display on a connector that was destroyed after it's been notified,
 	 * not before.
+	 *
+	 * Additionally, we also want to ignore connector registration when
+	 * we're trying to restore an atomic state during system resume since
+	 * there's a chance the connector may have been destroyed during the
+	 * process, but it's better to ignore that then cause
+	 * drm_atomic_helper_resume() to fail.
 	 */
-	if (drm_connector_is_unregistered(connector) && crtc_state->active) {
+	if (!state->duplicated && drm_connector_is_unregistered(connector) &&
+	    crtc_state->active) {
 		DRM_DEBUG_ATOMIC("[CONNECTOR:%d:%s] is not registered\n",
 				 connector->base.id, connector->name);
 		return -EINVAL;
@@ -685,7 +692,7 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 
 	/*
 	 * After all the routing has been prepared we need to add in any
-	 * connector which is itself unchanged, but who's crtc changes it's
+	 * connector which is itself unchanged, but whose crtc changes its
 	 * configuration. This must be done before calling mode_fixup in case a
 	 * crtc only changed its mode but has the same set of connectors.
 	 */
@@ -1670,7 +1677,7 @@ EXPORT_SYMBOL(drm_atomic_helper_async_commit);
  * drm_atomic_helper_setup_commit() and related functions.
  *
  * Committing the actual hardware state is done through the
- * &drm_mode_config_helper_funcs.atomic_commit_tail callback, or it's default
+ * &drm_mode_config_helper_funcs.atomic_commit_tail callback, or its default
  * implementation drm_atomic_helper_commit_tail().
  *
  * RETURNS:
@@ -1893,7 +1900,7 @@ crtc_or_fake_commit(struct drm_atomic_state *state, struct drm_crtc *crtc)
  * functions. drm_atomic_helper_wait_for_dependencies() must be called before
  * actually committing the hardware state, and for nonblocking commits this call
  * must be placed in the async worker. See also drm_atomic_helper_swap_state()
- * and it's stall parameter, for when a driver's commit hooks look at the
+ * and its stall parameter, for when a driver's commit hooks look at the
  * &drm_crtc.state, &drm_plane.state or &drm_connector.state pointer directly.
  *
  * Completion of the hardware commit step must be signalled using
@@ -3180,6 +3187,7 @@ drm_atomic_helper_duplicate_state(struct drm_device *dev,
 		return ERR_PTR(-ENOMEM);
 
 	state->acquire_ctx = ctx;
+	state->duplicated = true;
 
 	drm_for_each_crtc(crtc, dev) {
 		struct drm_crtc_state *crtc_state;
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index c40889888a16..0aabd401d3ca 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -44,8 +44,8 @@
  * DOC: overview
  *
  * This file contains the marshalling and demarshalling glue for the atomic UAPI
- * in all it's form: The monster ATOMIC IOCTL itself, code for GET_PROPERTY and
- * SET_PROPERTY IOCTls. Plus interface functions for compatibility helpers and
+ * in all its forms: The monster ATOMIC IOCTL itself, code for GET_PROPERTY and
+ * SET_PROPERTY IOCTLs. Plus interface functions for compatibility helpers and
  * drivers which have special needs to construct their own atomic updates, e.g.
  * for load detect or similiar.
  */
@@ -1296,12 +1296,11 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
 			(arg->flags & DRM_MODE_PAGE_FLIP_EVENT))
 		return -EINVAL;
 
-	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
-
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
 		return -ENOMEM;
 
+	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 	state->acquire_ctx = &ctx;
 	state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET);
 
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index a39ab2193bfe..e407adb033e7 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -494,7 +494,7 @@ int drm_legacy_getmap_ioctl(struct drm_device *dev, void *data,
  * isn't in use.
  *
  * Searches the map on drm_device::maplist, removes it from the list, see if
- * its being used, and free any associate resource (such as MTRR's) if it's not
+ * it's being used, and free any associated resource (such as MTRR's) if it's not
  * being on use.
  *
  * \sa drm_legacy_addmap
@@ -621,7 +621,7 @@ int drm_legacy_rmmap_ioctl(struct drm_device *dev, void *data,
 		}
 	}
 
-	/* List has wrapped around to the head pointer, or its empty we didn't
+	/* List has wrapped around to the head pointer, or it's empty we didn't
 	 * find anything.
 	 */
 	if (list_empty(&dev->maplist) || !map) {
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index e3ff73695c32..dd40eff0911c 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1066,7 +1066,7 @@ EXPORT_SYMBOL(drm_mode_create_dvi_i_properties);
  *
  * content type (HDMI specific):
  *	Indicates content type setting to be used in HDMI infoframes to indicate
- *	content type for the external device, so that it adjusts it's display
+ *	content type for the external device, so that it adjusts its display
  *	settings accordingly.
  *
  *	The value of this property can be one of the following:
diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index e16aa5ae00b4..ee67c96841fa 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -32,6 +32,7 @@
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_damage_helper.h>
+#include <drm/drm_device.h>
 
 /**
  * DOC: overview
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 54120b6319e7..54a6414c5d96 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -1360,7 +1360,20 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc,
 EXPORT_SYMBOL(drm_dp_read_desc);
 
 /**
- * DRM DP Helpers for DSC
+ * drm_dp_dsc_sink_max_slice_count() - Get the max slice count
+ * supported by the DSC sink.
+ * @dsc_dpcd: DSC capabilities from DPCD
+ * @is_edp: true if its eDP, false for DP
+ *
+ * Read the slice capabilities DPCD register from DSC sink to get
+ * the maximum slice count supported. This is used to populate
+ * the DSC parameters in the &struct drm_dsc_config by the driver.
+ * Driver creates an infoframe using these parameters to populate
+ * &struct drm_dsc_pps_infoframe. These are sent to the sink using DSC
+ * infoframe using the helper function drm_dsc_pps_infoframe_pack()
+ *
+ * Returns:
+ * Maximum slice count supported by DSC sink or 0 its invalid
  */
 u8 drm_dp_dsc_sink_max_slice_count(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE],
 				   bool is_edp)
@@ -1405,6 +1418,21 @@ u8 drm_dp_dsc_sink_max_slice_count(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE],
 }
 EXPORT_SYMBOL(drm_dp_dsc_sink_max_slice_count);
 
+/**
+ * drm_dp_dsc_sink_line_buf_depth() - Get the line buffer depth in bits
+ * @dsc_dpcd: DSC capabilities from DPCD
+ *
+ * Read the DSC DPCD register to parse the line buffer depth in bits which is
+ * number of bits of precision within the decoder line buffer supported by
+ * the DSC sink. This is used to populate the DSC parameters in the
+ * &struct drm_dsc_config by the driver.
+ * Driver creates an infoframe using these parameters to populate
+ * &struct drm_dsc_pps_infoframe. These are sent to the sink using DSC
+ * infoframe using the helper function drm_dsc_pps_infoframe_pack()
+ *
+ * Returns:
+ * Line buffer depth supported by DSC panel or 0 its invalid
+ */
 u8 drm_dp_dsc_sink_line_buf_depth(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE])
 {
 	u8 line_buf_depth = dsc_dpcd[DP_DSC_LINE_BUF_BIT_DEPTH - DP_DSC_SUPPORT];
@@ -1434,6 +1462,23 @@ u8 drm_dp_dsc_sink_line_buf_depth(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE])
 }
 EXPORT_SYMBOL(drm_dp_dsc_sink_line_buf_depth);
 
+/**
+ * drm_dp_dsc_sink_supported_input_bpcs() - Get all the input bits per component
+ * values supported by the DSC sink.
+ * @dsc_dpcd: DSC capabilities from DPCD
+ * @dsc_bpc: An array to be filled by this helper with supported
+ *           input bpcs.
+ *
+ * Read the DSC DPCD from the sink device to parse the supported bits per
+ * component values. This is used to populate the DSC parameters
+ * in the &struct drm_dsc_config by the driver.
+ * Driver creates an infoframe using these parameters to populate
+ * &struct drm_dsc_pps_infoframe. These are sent to the sink using DSC
+ * infoframe using the helper function drm_dsc_pps_infoframe_pack()
+ *
+ * Returns:
+ * Number of input BPC values parsed from the DPCD
+ */
 int drm_dp_dsc_sink_supported_input_bpcs(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE],
 					 u8 dsc_bpc[3])
 {
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index b1c63e9cdf8a..dc7ac0c60547 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -941,7 +941,7 @@ static void drm_dp_free_mst_branch_device(struct kref *kref)
  * in-memory topology state from being changed in the middle of critical
  * operations like changing the internal state of payload allocations. This
  * means each branch and port will be considered to be connected to the rest
- * of the topology until it's topology refcount reaches zero. Additionally,
+ * of the topology until its topology refcount reaches zero. Additionally,
  * for ports this means that their associated &struct drm_connector will stay
  * registered with userspace until the port's refcount reaches 0.
  *
@@ -979,8 +979,8 @@ static void drm_dp_free_mst_branch_device(struct kref *kref)
  *    same way as the C pointers used to reference a structure.
  *
  * As you can see in the above figure, every branch increments the topology
- * refcount of it's children, and increments the malloc refcount of it's
- * parent. Additionally, every payload increments the malloc refcount of it's
+ * refcount of its children, and increments the malloc refcount of its
+ * parent. Additionally, every payload increments the malloc refcount of its
  * assigned port by 1.
  *
  * So, what would happen if MSTB #3 from the above figure was unplugged from
@@ -997,9 +997,9 @@ static void drm_dp_free_mst_branch_device(struct kref *kref)
  * of its parent, and finally its own malloc refcount. For MSTB #4 and port
  * #4, this means they both have been disconnected from the topology and freed
  * from memory. But, because payload #2 is still holding a reference to port
- * #3, port #3 is removed from the topology but it's &struct drm_dp_mst_port
+ * #3, port #3 is removed from the topology but its &struct drm_dp_mst_port
  * is still accessible from memory. This also means port #3 has not yet
- * decremented the malloc refcount of MSTB #3, so it's &struct
+ * decremented the malloc refcount of MSTB #3, so its &struct
  * drm_dp_mst_branch will also stay allocated in memory until port #3's
  * malloc refcount reaches 0.
  *
@@ -1139,7 +1139,7 @@ static void drm_dp_destroy_mst_branch_device(struct kref *kref)
 
 /**
  * drm_dp_mst_topology_try_get_mstb() - Increment the topology refcount of a
- * branch device unless its zero
+ * branch device unless it's zero
  * @mstb: &struct drm_dp_mst_branch to increment the topology refcount of
  *
  * Attempts to grab a topology reference to @mstb, if it hasn't yet been
@@ -1265,7 +1265,7 @@ static void drm_dp_destroy_port(struct kref *kref)
 
 /**
  * drm_dp_mst_topology_try_get_port() - Increment the topology refcount of a
- * port unless its zero
+ * port unless it's zero
  * @port: &struct drm_dp_mst_port to increment the topology refcount of
  *
  * Attempts to grab a topology reference to @port, if it hasn't yet been
@@ -1471,7 +1471,7 @@ static bool drm_dp_port_setup_pdt(struct drm_dp_mst_port *port)
 			port->mstb->port_parent = port;
 			/*
 			 * Make sure this port's memory allocation stays
-			 * around until it's child MSTB releases it
+			 * around until its child MSTB releases it
 			 */
 			drm_dp_mst_get_port_malloc(port);
 
@@ -2271,7 +2271,7 @@ static int drm_dp_destroy_payload_step1(struct drm_dp_mst_topology_mgr *mgr,
 					struct drm_dp_payload *payload)
 {
 	DRM_DEBUG_KMS("\n");
-	/* its okay for these to fail */
+	/* it's okay for these to fail */
 	if (port) {
 		drm_dp_payload_send_msg(mgr, port, id, 0);
 	}
@@ -2947,7 +2947,7 @@ enum drm_connector_status drm_dp_mst_detect_port(struct drm_connector *connector
 {
 	enum drm_connector_status status = connector_status_disconnected;
 
-	/* we need to search for the port in the mgr in case its gone */
+	/* we need to search for the port in the mgr in case it's gone */
 	port = drm_dp_mst_topology_get_port_validated(mgr, port);
 	if (!port)
 		return connector_status_disconnected;
@@ -3013,7 +3013,7 @@ struct edid *drm_dp_mst_get_edid(struct drm_connector *connector, struct drm_dp_
 {
 	struct edid *edid = NULL;
 
-	/* we need to search for the port in the mgr in case its gone */
+	/* we need to search for the port in the mgr in case it's gone */
 	port = drm_dp_mst_topology_get_port_validated(mgr, port);
 	if (!port)
 		return NULL;
@@ -3117,10 +3117,6 @@ int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 	if (IS_ERR(topology_state))
 		return PTR_ERR(topology_state);
 
-	port = drm_dp_mst_topology_get_port_validated(mgr, port);
-	if (port == NULL)
-		return -EINVAL;
-
 	/* Find the current allocation for this port, if any */
 	list_for_each_entry(pos, &topology_state->vcpis, next) {
 		if (pos->port == port) {
@@ -3153,10 +3149,8 @@ int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 	/* Add the new allocation to the state */
 	if (!vcpi) {
 		vcpi = kzalloc(sizeof(*vcpi), GFP_KERNEL);
-		if (!vcpi) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!vcpi)
+			return -ENOMEM;
 
 		drm_dp_mst_get_port_malloc(port);
 		vcpi->port = port;
@@ -3165,8 +3159,6 @@ int drm_dp_atomic_find_vcpi_slots(struct drm_atomic_state *state,
 	vcpi->vcpi = req_slots;
 
 	ret = req_slots;
-out:
-	drm_dp_mst_topology_put_port(port);
 	return ret;
 }
 EXPORT_SYMBOL(drm_dp_atomic_find_vcpi_slots);
@@ -3180,7 +3172,7 @@ EXPORT_SYMBOL(drm_dp_atomic_find_vcpi_slots);
  * Releases any VCPI slots that have been allocated to a port in the atomic
  * state. Any atomic drivers which support MST must call this function in
  * their &drm_connector_helper_funcs.atomic_check() callback when the
- * connector will no longer have VCPI allocated (e.g. because it's CRTC was
+ * connector will no longer have VCPI allocated (e.g. because its CRTC was
  * removed) when it had VCPI allocated in the previous atomic state.
  *
  * It is OK to call this even if @port has been removed from the system.
@@ -3268,7 +3260,7 @@ bool drm_dp_mst_allocate_vcpi(struct drm_dp_mst_topology_mgr *mgr,
 	DRM_DEBUG_KMS("initing vcpi for pbn=%d slots=%d\n",
 		      pbn, port->vcpi.num_slots);
 
-	/* Keep port allocated until it's payload has been removed */
+	/* Keep port allocated until its payload has been removed */
 	drm_dp_mst_get_port_malloc(port);
 	drm_dp_mst_topology_put_port(port);
 	return true;
@@ -3300,7 +3292,7 @@ EXPORT_SYMBOL(drm_dp_mst_get_vcpi_slots);
 void drm_dp_mst_reset_vcpi_slots(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port)
 {
 	/*
-	 * A port with VCPI will remain allocated until it's VCPI is
+	 * A port with VCPI will remain allocated until its VCPI is
 	 * released, no verified ref needed
 	 */
 
@@ -3311,15 +3303,16 @@ EXPORT_SYMBOL(drm_dp_mst_reset_vcpi_slots);
 /**
  * drm_dp_mst_deallocate_vcpi() - deallocate a VCPI
  * @mgr: manager for this port
- * @port: unverified port to deallocate vcpi for
+ * @port: port to deallocate vcpi for
+ *
+ * This can be called unconditionally, regardless of whether
+ * drm_dp_mst_allocate_vcpi() succeeded or not.
  */
 void drm_dp_mst_deallocate_vcpi(struct drm_dp_mst_topology_mgr *mgr,
 				struct drm_dp_mst_port *port)
 {
-	/*
-	 * A port with VCPI will remain allocated until it's VCPI is
-	 * released, no verified ref needed
-	 */
+	if (!port->vcpi.vcpi)
+		return;
 
 	drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
 	port->vcpi.num_slots = 0;
diff --git a/drivers/gpu/drm/drm_dsc.c b/drivers/gpu/drm/drm_dsc.c
index bc2b23adb072..bce99f95c1a3 100644
--- a/drivers/gpu/drm/drm_dsc.c
+++ b/drivers/gpu/drm/drm_dsc.c
@@ -17,6 +17,12 @@
 /**
  * DOC: dsc helpers
  *
+ * VESA specification for DP 1.4 adds a new feature called Display Stream
+ * Compression (DSC) used to compress the pixel bits before sending it on
+ * DP/eDP/MIPI DSI interface. DSC is required to be enabled so that the existing
+ * display interfaces can support high resolutions at higher frames rates uisng
+ * the maximum available link capacity of these interfaces.
+ *
  * These functions contain some common logic and helpers to deal with VESA
  * Display Stream Compression standard required for DSC on Display Port/eDP or
  * MIPI display interfaces.
@@ -26,6 +32,13 @@
  * drm_dsc_dp_pps_header_init() - Initializes the PPS Header
  * for DisplayPort as per the DP 1.4 spec.
  * @pps_sdp: Secondary data packet for DSC Picture Parameter Set
+ *           as defined in &struct drm_dsc_pps_infoframe
+ *
+ * DP 1.4 spec defines the secondary data packet for sending the
+ * picture parameter infoframes from the source to the sink.
+ * This function populates the pps header defined in
+ * &struct drm_dsc_pps_infoframe as per the header bytes defined
+ * in &struct dp_sdp_header.
  */
 void drm_dsc_dp_pps_header_init(struct drm_dsc_pps_infoframe *pps_sdp)
 {
@@ -38,15 +51,20 @@ EXPORT_SYMBOL(drm_dsc_dp_pps_header_init);
 
 /**
  * drm_dsc_pps_infoframe_pack() - Populates the DSC PPS infoframe
- * using the DSC configuration parameters in the order expected
- * by the DSC Display Sink device. For the DSC, the sink device
- * expects the PPS payload in the big endian format for the fields
- * that span more than 1 byte.
  *
  * @pps_sdp:
- * Secondary data packet for DSC Picture Parameter Set
+ * Secondary data packet for DSC Picture Parameter Set. This is defined
+ * by &struct drm_dsc_pps_infoframe
  * @dsc_cfg:
- * DSC Configuration data filled by driver
+ * DSC Configuration data filled by driver as defined by
+ * &struct drm_dsc_config
+ *
+ * DSC source device sends a secondary data packet filled with all the
+ * picture parameter set (PPS) information required by the sink to decode
+ * the compressed frame. Driver populates the dsC PPS infoframe using the DSC
+ * configuration parameters in the order expected by the DSC Display Sink
+ * device. For the DSC, the sink device expects the PPS payload in the big
+ * endian format for the fields that span more than 1 byte.
  */
 void drm_dsc_pps_infoframe_pack(struct drm_dsc_pps_infoframe *pps_sdp,
 				const struct drm_dsc_config *dsc_cfg)
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index c5c79986f9c5..0e9349ff2d16 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1,
 	       var_1->transp.msb_right == var_2->transp.msb_right;
 }
 
+static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
+					 u8 depth)
+{
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8; /* 8bit DAC */
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.offset = 15;
+		var->transp.length = 1;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.offset = 24;
+		var->transp.length = 8;
+		break;
+	default:
+		break;
+	}
+}
+
 /**
  * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var
  * @var: screeninfo to check
@@ -1632,9 +1690,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_framebuffer *fb = fb_helper->fb;
 
-	if (var->pixclock != 0 || in_dbg_master())
+	if (in_dbg_master())
 		return -EINVAL;
 
+	if (var->pixclock != 0) {
+		DRM_DEBUG("fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n");
+		var->pixclock = 0;
+	}
+
 	if ((drm_format_info_block_width(fb->format, 0) > 1) ||
 	    (drm_format_info_block_height(fb->format, 0) > 1))
 		return -EINVAL;
@@ -1655,6 +1718,20 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	}
 
 	/*
+	 * Workaround for SDL 1.2, which is known to be setting all pixel format
+	 * fields values to zero in some cases. We treat this situation as a
+	 * kind of "use some reasonable autodetected values".
+	 */
+	if (!var->red.offset     && !var->green.offset    &&
+	    !var->blue.offset    && !var->transp.offset   &&
+	    !var->red.length     && !var->green.length    &&
+	    !var->blue.length    && !var->transp.length   &&
+	    !var->red.msb_right  && !var->green.msb_right &&
+	    !var->blue.msb_right && !var->transp.msb_right) {
+		drm_fb_helper_fill_pixel_fmt(var, fb->format->depth);
+	}
+
+	/*
 	 * drm fbdev emulation doesn't support changing the pixel format at all,
 	 * so reject all pixel format changing requests.
 	 */
@@ -2020,59 +2097,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helpe
 	info->var.yoffset = 0;
 	info->var.activate = FB_ACTIVATE_NOW;
 
-	switch (fb->format->depth) {
-	case 8:
-		info->var.red.offset = 0;
-		info->var.green.offset = 0;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8; /* 8bit DAC */
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 15:
-		info->var.red.offset = 10;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 5;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 15;
-		info->var.transp.length = 1;
-		break;
-	case 16:
-		info->var.red.offset = 11;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 6;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 0;
-		break;
-	case 24:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 24;
-		info->var.transp.length = 8;
-		break;
-	default:
-		break;
-	}
+	drm_fb_helper_fill_pixel_fmt(&info->var, fb->format->depth);
 
 	info->var.xres = fb_width;
 	info->var.yres = fb_height;
@@ -2483,7 +2508,7 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper,
 /*
  * This function checks if rotation is necessary because of panel orientation
  * and if it is, if it is supported.
- * If rotation is necessary and supported, its gets set in fb_crtc.rotation.
+ * If rotation is necessary and supported, it gets set in fb_crtc.rotation.
  * If rotation is necessary but not supported, a DRM_MODE_ROTATE_* flag gets
  * or-ed into fb_helper->sw_rotations. In drm_setup_crtcs_fb() we check if only
  * one bit is set and then we set fb_info.fbcon_rotate_hint to make fbcon do
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index 46f48f245eb5..b1838a41ad43 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -701,7 +701,7 @@ int drm_event_reserve_init(struct drm_device *dev,
 EXPORT_SYMBOL(drm_event_reserve_init);
 
 /**
- * drm_event_cancel_free - free a DRM event and release it's space
+ * drm_event_cancel_free - free a DRM event and release its space
  * @dev: DRM device
  * @p: tracking structure for the pending event
  *
diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index d90ee03a84c6..ba7e19d4336c 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -238,6 +238,15 @@ const struct drm_format_info *__drm_format_info(u32 format)
 		{ .format = DRM_FORMAT_X0L2,		.depth = 0,  .num_planes = 1,
 		  .char_per_block = { 8, 0, 0 }, .block_w = { 2, 0, 0 }, .block_h = { 2, 0, 0 },
 		  .hsub = 2, .vsub = 2, .is_yuv = true },
+		{ .format = DRM_FORMAT_P010,            .depth = 0,  .num_planes = 2,
+		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 },
+		  .hsub = 2, .vsub = 2, .is_yuv = true},
+		{ .format = DRM_FORMAT_P012,		.depth = 0,  .num_planes = 2,
+		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 },
+		   .hsub = 2, .vsub = 2, .is_yuv = true},
+		{ .format = DRM_FORMAT_P016,		.depth = 0,  .num_planes = 2,
+		  .char_per_block = { 2, 4, 0 }, .block_w = { 1, 0, 0 }, .block_h = { 1, 0, 0 },
+		  .hsub = 2, .vsub = 2, .is_yuv = true},
 	};
 
 	unsigned int i;
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 7abcb265a108..d8d75e25f6fb 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -773,7 +773,7 @@ EXPORT_SYMBOL(drm_framebuffer_lookup);
  * @fb: fb to unregister
  *
  * Drivers need to call this when cleaning up driver-private framebuffers, e.g.
- * those used for fbdev. Note that the caller must hold a reference of it's own,
+ * those used for fbdev. Note that the caller must hold a reference of its own,
  * i.e. the object may not be destroyed through this call (since it'll lead to a
  * locking inversion).
  *
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 2896ff60552f..d0b9f6a9953f 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -660,7 +660,7 @@ void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
 EXPORT_SYMBOL(drm_gem_put_pages);
 
 /**
- * drm_gem_object_lookup - look up a GEM object from it's handle
+ * drm_gem_object_lookup - look up a GEM object from its handle
  * @filp: DRM file private date
  * @handle: userspace handle
  *
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index b735704653cb..603b0bd9c5ce 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -528,7 +528,8 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev,
 
 	object_count = cl->object_count;
 
-	object_ids = memdup_user(u64_to_user_ptr(cl->object_ids), object_count * sizeof(__u32));
+	object_ids = memdup_user(u64_to_user_ptr(cl->object_ids),
+			array_size(object_count, sizeof(__u32)));
 	if (IS_ERR(object_ids))
 		return PTR_ERR(object_ids);
 
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 3cc5fbd78ee2..2b4f373736c7 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -816,7 +816,7 @@ EXPORT_SYMBOL(drm_mm_scan_add_block);
  * When the scan list is empty, the selected memory nodes can be freed. An
  * immediately following drm_mm_insert_node_in_range_generic() or one of the
  * simpler versions of that function with !DRM_MM_SEARCH_BEST will then return
- * the just freed block (because its at the top of the free_stack list).
+ * the just freed block (because it's at the top of the free_stack list).
  *
  * Returns:
  * True if this block should be evicted, false otherwise. Will always
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index adce9a26bac9..869ac6f4671e 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -751,7 +751,7 @@ int drm_mode_hsync(const struct drm_display_mode *mode)
 	if (mode->hsync)
 		return mode->hsync;
 
-	if (mode->htotal < 0)
+	if (mode->htotal <= 0)
 		return 0;
 
 	calc_val = (mode->clock * 1000) / mode->htotal; /* hsync in Hz */
@@ -1272,7 +1272,7 @@ const char *drm_get_mode_status_name(enum drm_mode_status status)
  * @verbose: be verbose about it
  *
  * This helper function can be used to prune a display mode list after
- * validation has been completed. All modes who's status is not MODE_OK will be
+ * validation has been completed. All modes whose status is not MODE_OK will be
  * removed from the list, and if @verbose the status code and mode name is also
  * printed to dmesg.
  */
diff --git a/drivers/gpu/drm/drm_modeset_helper.c b/drivers/gpu/drm/drm_modeset_helper.c
index 890eee07892d..da483125e063 100644
--- a/drivers/gpu/drm/drm_modeset_helper.c
+++ b/drivers/gpu/drm/drm_modeset_helper.c
@@ -22,8 +22,10 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 /**
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index a9d9df6c85ad..693748ad8b88 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -61,8 +61,9 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
 		return NULL;
 
 	dmah->size = size;
-	dmah->vaddr = dma_zalloc_coherent(&dev->pdev->dev, size, &dmah->busaddr,
-						GFP_KERNEL | __GFP_COMP);
+	dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size,
+					 &dmah->busaddr,
+					 GFP_KERNEL | __GFP_COMP);
 
 	if (dmah->vaddr == NULL) {
 		kfree(dmah);
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 79c77c3cad86..f8ec8f9c3e7a 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -866,7 +866,7 @@ err:
  * value doesn't become invalid part way through the property update due to
  * race).  The value returned by reference via 'obj' should be passed back
  * to drm_property_change_valid_put() after the property is set (and the
- * object to which the property is attached has a chance to take it's own
+ * object to which the property is attached has a chance to take its own
  * reference).
  */
 bool drm_property_change_valid_get(struct drm_property *property,
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index 8c057829b804..66c41b12719c 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -208,114 +208,6 @@ int drm_rect_calc_vscale(const struct drm_rect *src,
 EXPORT_SYMBOL(drm_rect_calc_vscale);
 
 /**
- * drm_calc_hscale_relaxed - calculate the horizontal scaling factor
- * @src: source window rectangle
- * @dst: destination window rectangle
- * @min_hscale: minimum allowed horizontal scaling factor
- * @max_hscale: maximum allowed horizontal scaling factor
- *
- * Calculate the horizontal scaling factor as
- * (@src width) / (@dst width).
- *
- * If the calculated scaling factor is below @min_vscale,
- * decrease the height of rectangle @dst to compensate.
- *
- * If the calculated scaling factor is above @max_vscale,
- * decrease the height of rectangle @src to compensate.
- *
- * If the scale is below 1 << 16, round down. If the scale is above
- * 1 << 16, round up. This will calculate the scale with the most
- * pessimistic limit calculation.
- *
- * RETURNS:
- * The horizontal scaling factor.
- */
-int drm_rect_calc_hscale_relaxed(struct drm_rect *src,
-				 struct drm_rect *dst,
-				 int min_hscale, int max_hscale)
-{
-	int src_w = drm_rect_width(src);
-	int dst_w = drm_rect_width(dst);
-	int hscale = drm_calc_scale(src_w, dst_w);
-
-	if (hscale < 0 || dst_w == 0)
-		return hscale;
-
-	if (hscale < min_hscale) {
-		int max_dst_w = src_w / min_hscale;
-
-		drm_rect_adjust_size(dst, max_dst_w - dst_w, 0);
-
-		return min_hscale;
-	}
-
-	if (hscale > max_hscale) {
-		int max_src_w = dst_w * max_hscale;
-
-		drm_rect_adjust_size(src, max_src_w - src_w, 0);
-
-		return max_hscale;
-	}
-
-	return hscale;
-}
-EXPORT_SYMBOL(drm_rect_calc_hscale_relaxed);
-
-/**
- * drm_rect_calc_vscale_relaxed - calculate the vertical scaling factor
- * @src: source window rectangle
- * @dst: destination window rectangle
- * @min_vscale: minimum allowed vertical scaling factor
- * @max_vscale: maximum allowed vertical scaling factor
- *
- * Calculate the vertical scaling factor as
- * (@src height) / (@dst height).
- *
- * If the calculated scaling factor is below @min_vscale,
- * decrease the height of rectangle @dst to compensate.
- *
- * If the calculated scaling factor is above @max_vscale,
- * decrease the height of rectangle @src to compensate.
- *
- * If the scale is below 1 << 16, round down. If the scale is above
- * 1 << 16, round up. This will calculate the scale with the most
- * pessimistic limit calculation.
- *
- * RETURNS:
- * The vertical scaling factor.
- */
-int drm_rect_calc_vscale_relaxed(struct drm_rect *src,
-				 struct drm_rect *dst,
-				 int min_vscale, int max_vscale)
-{
-	int src_h = drm_rect_height(src);
-	int dst_h = drm_rect_height(dst);
-	int vscale = drm_calc_scale(src_h, dst_h);
-
-	if (vscale < 0 || dst_h == 0)
-		return vscale;
-
-	if (vscale < min_vscale) {
-		int max_dst_h = src_h / min_vscale;
-
-		drm_rect_adjust_size(dst, 0, max_dst_h - dst_h);
-
-		return min_vscale;
-	}
-
-	if (vscale > max_vscale) {
-		int max_src_h = dst_h * max_vscale;
-
-		drm_rect_adjust_size(src, 0, max_src_h - src_h);
-
-		return max_vscale;
-	}
-
-	return vscale;
-}
-EXPORT_SYMBOL(drm_rect_calc_vscale_relaxed);
-
-/**
  * drm_rect_debug_print - print the rectangle information
  * @prefix: prefix string
  * @r: rectangle to print
diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c
index cde71ee95a8f..a1b65d26d761 100644
--- a/drivers/gpu/drm/drm_vblank.c
+++ b/drivers/gpu/drm/drm_vblank.c
@@ -48,7 +48,7 @@
  * Drivers must initialize the vertical blanking handling core with a call to
  * drm_vblank_init(). Minimally, a driver needs to implement
  * &drm_crtc_funcs.enable_vblank and &drm_crtc_funcs.disable_vblank plus call
- * drm_crtc_handle_vblank() in it's vblank interrupt handler for working vblank
+ * drm_crtc_handle_vblank() in its vblank interrupt handler for working vblank
  * support.
  *
  * Vertical blanking interrupts can be enabled by the DRM core or by drivers
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 9c106e47e640..bc64b810e0d5 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -2799,6 +2799,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x21f0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 5e01cc8d9b16..4862fb12778e 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -47,7 +47,7 @@ struct intel_gvt_mpt {
 	int (*host_init)(struct device *dev, void *gvt, const void *ops);
 	void (*host_exit)(struct device *dev);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
-	void (*detach_vgpu)(unsigned long handle);
+	void (*detach_vgpu)(void *vgpu);
 	int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
 	unsigned long (*from_virt_to_mfn)(void *p);
 	int (*enable_page_track)(unsigned long handle, u64 gfn);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 63eef86a2a85..d5fcc447d22f 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1147,7 +1147,7 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 {
 	unsigned int index;
 	u64 virtaddr;
-	unsigned long req_size, pgoff = 0;
+	unsigned long req_size, pgoff, req_start;
 	pgprot_t pg_prot;
 	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 
@@ -1165,7 +1165,17 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 	pg_prot = vma->vm_page_prot;
 	virtaddr = vma->vm_start;
 	req_size = vma->vm_end - vma->vm_start;
-	pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	req_start = pgoff << PAGE_SHIFT;
+
+	if (!intel_vgpu_in_aperture(vgpu, req_start))
+		return -EINVAL;
+	if (req_start + req_size >
+	    vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
+		return -EINVAL;
+
+	pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
 
 	return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
 }
@@ -1813,9 +1823,21 @@ static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
 	return 0;
 }
 
-static void kvmgt_detach_vgpu(unsigned long handle)
+static void kvmgt_detach_vgpu(void *p_vgpu)
 {
-	/* nothing to do here */
+	int i;
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+
+	if (!vgpu->vdev.region)
+		return;
+
+	for (i = 0; i < vgpu->vdev.num_regions; i++)
+		if (vgpu->vdev.region[i].ops->release)
+			vgpu->vdev.region[i].ops->release(vgpu,
+					&vgpu->vdev.region[i]);
+	vgpu->vdev.num_regions = 0;
+	kfree(vgpu->vdev.region);
+	vgpu->vdev.region = NULL;
 }
 
 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 5d8b8f228d8f..0f9440128123 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -99,7 +99,7 @@ static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu)
 	if (!intel_gvt_host.mpt->detach_vgpu)
 		return;
 
-	intel_gvt_host.mpt->detach_vgpu(vgpu->handle);
+	intel_gvt_host.mpt->detach_vgpu(vgpu);
 }
 
 #define MSI_CAP_CONTROL(offset) (offset + 2)
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index b7957eefb976..1bb8f936fdaa 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -333,6 +333,9 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 
 	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
 	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
+
+	wa_ctx->indirect_ctx.obj = NULL;
+	wa_ctx->indirect_ctx.shadow_va = NULL;
 }
 
 static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
@@ -357,6 +360,33 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 	return 0;
 }
 
+static int
+intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
+	struct i915_request *rq;
+	int ret = 0;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	if (workload->req)
+		goto out;
+
+	rq = i915_request_alloc(engine, shadow_ctx);
+	if (IS_ERR(rq)) {
+		gvt_vgpu_err("fail to allocate gem request\n");
+		ret = PTR_ERR(rq);
+		goto out;
+	}
+	workload->req = i915_request_get(rq);
+out:
+	return ret;
+}
+
 /**
  * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
  * shadow it as well, include ringbuffer,wa_ctx and ctx.
@@ -373,12 +403,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 	struct intel_context *ce;
-	struct i915_request *rq;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	if (workload->req)
+	if (workload->shadow)
 		return 0;
 
 	ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
@@ -418,22 +447,8 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 			goto err_shadow;
 	}
 
-	rq = i915_request_alloc(engine, shadow_ctx);
-	if (IS_ERR(rq)) {
-		gvt_vgpu_err("fail to allocate gem request\n");
-		ret = PTR_ERR(rq);
-		goto err_shadow;
-	}
-	workload->req = i915_request_get(rq);
-
-	ret = populate_shadow_context(workload);
-	if (ret)
-		goto err_req;
-
+	workload->shadow = true;
 	return 0;
-err_req:
-	rq = fetch_and_zero(&workload->req);
-	i915_request_put(rq);
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
 err_unpin:
@@ -672,23 +687,31 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	mutex_lock(&vgpu->vgpu_lock);
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	ret = intel_gvt_workload_req_alloc(workload);
+	if (ret)
+		goto err_req;
+
 	ret = intel_gvt_scan_and_shadow_workload(workload);
 	if (ret)
 		goto out;
 
-	ret = prepare_workload(workload);
+	ret = populate_shadow_context(workload);
+	if (ret) {
+		release_shadow_wa_ctx(&workload->wa_ctx);
+		goto out;
+	}
 
+	ret = prepare_workload(workload);
 out:
-	if (ret)
-		workload->status = ret;
-
 	if (!IS_ERR_OR_NULL(workload->req)) {
 		gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
 				ring_id, workload->req);
 		i915_request_add(workload->req);
 		workload->dispatched = true;
 	}
-
+err_req:
+	if (ret)
+		workload->status = ret;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	mutex_unlock(&vgpu->vgpu_lock);
 	return ret;
@@ -892,11 +915,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 
 	list_del_init(&workload->list);
 
-	if (!workload->status) {
-		release_shadow_batch_buffer(workload);
-		release_shadow_wa_ctx(&workload->wa_ctx);
-	}
-
 	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
 		/* if workload->status is not successful means HW GPU
 		 * has occurred GPU hang or something wrong with i915/GVT,
@@ -1262,6 +1280,9 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu_submission *s = &workload->vgpu->submission;
 
+	release_shadow_batch_buffer(workload);
+	release_shadow_wa_ctx(&workload->wa_ctx);
+
 	if (workload->shadow_mm)
 		intel_vgpu_mm_put(workload->shadow_mm);
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 1e9eec6a32fe..0635b2c4bed7 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -83,6 +83,7 @@ struct intel_vgpu_workload {
 	struct i915_request *req;
 	/* if this workload has been dispatched to i915? */
 	bool dispatched;
+	bool shadow;      /* if workload has done shadow of guest request */
 	int status;
 
 	struct intel_vgpu_mm *shadow_mm;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7de90701f6f1..6630212f2faf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -41,8 +41,10 @@
 #include <linux/vt.h>
 #include <acpi/video.h>
 
-#include <drm/drm_probe_helper.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_probe_helper.h>
 #include <drm/i915_drm.h>
 
 #include "i915_drv.h"
diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c
index 16263add3cdd..7cf9290ea34a 100644
--- a/drivers/gpu/drm/i915/intel_atomic.c
+++ b/drivers/gpu/drm/i915/intel_atomic.c
@@ -31,7 +31,9 @@
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
+
 #include "intel_drv.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c
index a1a263026574..db0965904439 100644
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -32,7 +32,9 @@
  */
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
+
 #include "intel_drv.h"
 
 struct intel_plane *intel_plane_alloc(void)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 619f1a20cc2d..ccb616351bba 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -24,23 +24,32 @@
  *	Eric Anholt <eric@anholt.net>
  */
 
-#include <linux/module.h>
-#include <linux/input.h>
 #include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/intel-iommu.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/reservation.h>
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
-#include <drm/drm_edid.h>
-#include <drm/i915_drm.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_atomic_uapi.h>
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_rect.h>
-#include <drm/drm_atomic_uapi.h>
-#include <linux/intel-iommu.h>
-#include <linux/reservation.h>
+#include <drm/i915_drm.h>
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include "intel_dsi.h"
+#include "intel_frontbuffer.h"
 
 #include "intel_drv.h"
 #include "intel_dsi.h"
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index cdb83d294cdd..fb67cd931117 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -80,17 +80,12 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	mst_pbn = drm_dp_calc_pbn_mode(adjusted_mode->crtc_clock, bpp);
 	pipe_config->pbn = mst_pbn;
 
-	/* Zombie connectors can't have VCPI slots */
-	if (!drm_connector_is_unregistered(connector)) {
-		slots = drm_dp_atomic_find_vcpi_slots(state,
-						      &intel_dp->mst_mgr,
-						      port,
-						      mst_pbn);
-		if (slots < 0) {
-			DRM_DEBUG_KMS("failed finding vcpi slots:%d\n",
-				      slots);
-			return slots;
-		}
+	slots = drm_dp_atomic_find_vcpi_slots(state, &intel_dp->mst_mgr, port,
+					      mst_pbn);
+	if (slots < 0) {
+		DRM_DEBUG_KMS("failed finding vcpi slots:%d\n",
+			      slots);
+		return slots;
 	}
 
 	intel_link_compute_m_n(bpp, lane_count,
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 30ae96c5c97c..5e00ee9270b5 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -54,7 +54,12 @@
 struct opregion_header {
 	u8 signature[16];
 	u32 size;
-	u32 opregion_ver;
+	struct {
+		u8 rsvd;
+		u8 revision;
+		u8 minor;
+		u8 major;
+	}  __packed over;
 	u8 bios_ver[32];
 	u8 vbios_ver[16];
 	u8 driver_ver[16];
@@ -118,7 +123,8 @@ struct opregion_asle {
 	u64 fdss;
 	u32 fdsp;
 	u32 stat;
-	u64 rvda;	/* Physical address of raw vbt data */
+	u64 rvda;	/* Physical (2.0) or relative from opregion (2.1+)
+			 * address of raw VBT data. */
 	u32 rvds;	/* Size of raw vbt data */
 	u8 rsvd[58];
 } __packed;
@@ -924,6 +930,11 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	opregion->header = base;
 	opregion->lid_state = base + ACPI_CLID;
 
+	DRM_DEBUG_DRIVER("ACPI OpRegion version %u.%u.%u\n",
+			 opregion->header->over.major,
+			 opregion->header->over.minor,
+			 opregion->header->over.revision);
+
 	mboxes = opregion->header->mboxes;
 	if (mboxes & MBOX_ACPI) {
 		DRM_DEBUG_DRIVER("Public ACPI methods supported\n");
@@ -952,11 +963,26 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	if (dmi_check_system(intel_no_opregion_vbt))
 		goto out;
 
-	if (opregion->header->opregion_ver >= 2 && opregion->asle &&
+	if (opregion->header->over.major >= 2 && opregion->asle &&
 	    opregion->asle->rvda && opregion->asle->rvds) {
-		opregion->rvda = memremap(opregion->asle->rvda,
-					  opregion->asle->rvds,
+		resource_size_t rvda = opregion->asle->rvda;
+
+		/*
+		 * opregion 2.0: rvda is the physical VBT address.
+		 *
+		 * opregion 2.1+: rvda is unsigned, relative offset from
+		 * opregion base, and should never point within opregion.
+		 */
+		if (opregion->header->over.major > 2 ||
+		    opregion->header->over.minor >= 1) {
+			WARN_ON(rvda < OPREGION_SIZE);
+
+			rvda += asls;
+		}
+
+		opregion->rvda = memremap(rvda, opregion->asle->rvds,
 					  MEMREMAP_WB);
+
 		vbt = opregion->rvda;
 		vbt_size = opregion->asle->rvds;
 		if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
@@ -966,6 +992,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 			goto out;
 		} else {
 			DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n");
+			memunmap(opregion->rvda);
+			opregion->rvda = NULL;
 		}
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0f15685529a0..54307f1df6cf 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -26,13 +26,16 @@
  */
 
 #include <linux/cpufreq.h>
+#include <linux/module.h>
 #include <linux/pm_runtime.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
+
 #include "i915_drv.h"
 #include "intel_drv.h"
 #include "../../../platform/x86/intel_ips.h"
-#include <linux/module.h>
-#include <drm/drm_atomic_helper.h>
 
 /**
  * DOC: RC6
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index e2a129d4ae7b..383733302280 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -643,8 +643,10 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		int bus_format;
 
 		ret = of_property_read_u32(child, "reg", &i);
-		if (ret || i < 0 || i > 1)
-			return -EINVAL;
+		if (ret || i < 0 || i > 1) {
+			ret = -EINVAL;
+			goto free_child;
+		}
 
 		if (!of_device_is_available(child))
 			continue;
@@ -657,7 +659,6 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		channel = &imx_ldb->channel[i];
 		channel->ldb = imx_ldb;
 		channel->chno = i;
-		channel->child = child;
 
 		/*
 		 * The output port is port@4 with an external 4-port mux or
@@ -667,13 +668,13 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 						  imx_ldb->lvds_mux ? 4 : 2, 0,
 						  &channel->panel, &channel->bridge);
 		if (ret && ret != -ENODEV)
-			return ret;
+			goto free_child;
 
 		/* panel ddc only if there is no bridge */
 		if (!channel->bridge) {
 			ret = imx_ldb_panel_ddc(dev, channel, child);
 			if (ret)
-				return ret;
+				goto free_child;
 		}
 
 		bus_format = of_get_bus_format(dev, child);
@@ -689,18 +690,26 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		if (bus_format < 0) {
 			dev_err(dev, "could not determine data mapping: %d\n",
 				bus_format);
-			return bus_format;
+			ret = bus_format;
+			goto free_child;
 		}
 		channel->bus_format = bus_format;
+		channel->child = child;
 
 		ret = imx_ldb_register(drm, channel);
-		if (ret)
-			return ret;
+		if (ret) {
+			channel->child = NULL;
+			goto free_child;
+		}
 	}
 
 	dev_set_drvdata(dev, imx_ldb);
 
 	return 0;
+
+free_child:
+	of_node_put(child);
+	return ret;
 }
 
 static void imx_ldb_unbind(struct device *dev, struct device *master,
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index c390924de93d..21e964f6ab5c 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -370,9 +370,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane,
 	if (ret)
 		return ret;
 
-	/* CRTC should be enabled */
+	/* nothing to check when disabling or disabled */
 	if (!crtc_state->enable)
-		return -EINVAL;
+		return 0;
 
 	switch (plane->type) {
 	case DRM_PLANE_TYPE_PRIMARY:
diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index ec573c04206b..43e29984f8b1 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -46,7 +46,6 @@ struct meson_crtc {
 	struct drm_crtc base;
 	struct drm_pending_vblank_event *event;
 	struct meson_drm *priv;
-	bool enabled;
 };
 #define to_meson_crtc(x) container_of(x, struct meson_crtc, base)
 
@@ -82,7 +81,8 @@ static const struct drm_crtc_funcs meson_crtc_funcs = {
 
 };
 
-static void meson_crtc_enable(struct drm_crtc *crtc)
+static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
+				     struct drm_crtc_state *old_state)
 {
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
 	struct drm_crtc_state *crtc_state = crtc->state;
@@ -108,20 +108,6 @@ static void meson_crtc_enable(struct drm_crtc *crtc)
 
 	drm_crtc_vblank_on(crtc);
 
-	meson_crtc->enabled = true;
-}
-
-static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
-				     struct drm_crtc_state *old_state)
-{
-	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
-	struct meson_drm *priv = meson_crtc->priv;
-
-	DRM_DEBUG_DRIVER("\n");
-
-	if (!meson_crtc->enabled)
-		meson_crtc_enable(crtc);
-
 	priv->viu.osd1_enabled = true;
 }
 
@@ -153,8 +139,6 @@ static void meson_crtc_atomic_disable(struct drm_crtc *crtc,
 
 		crtc->state->event = NULL;
 	}
-
-	meson_crtc->enabled = false;
 }
 
 static void meson_crtc_atomic_begin(struct drm_crtc *crtc,
@@ -163,9 +147,6 @@ static void meson_crtc_atomic_begin(struct drm_crtc *crtc,
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
 	unsigned long flags;
 
-	if (crtc->state->enable && !meson_crtc->enabled)
-		meson_crtc_enable(crtc);
-
 	if (crtc->state->event) {
 		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
 
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 27f38bdc8677..2281ed3eb774 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -75,6 +75,10 @@ static const struct drm_mode_config_funcs meson_mode_config_funcs = {
 	.fb_create           = drm_gem_fb_create,
 };
 
+static const struct drm_mode_config_helper_funcs meson_mode_config_helpers = {
+	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
+};
+
 static irqreturn_t meson_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -286,6 +290,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
 	drm->mode_config.max_width = 3840;
 	drm->mode_config.max_height = 2160;
 	drm->mode_config.funcs = &meson_mode_config_funcs;
+	drm->mode_config.helper_private	= &meson_mode_config_helpers;
 
 	/* Hardware Initialization */
 
@@ -408,8 +413,10 @@ static int meson_probe_remote(struct platform_device *pdev,
 		remote_node = of_graph_get_remote_port_parent(ep);
 		if (!remote_node ||
 		    remote_node == parent || /* Ignore parent endpoint */
-		    !of_device_is_available(remote_node))
+		    !of_device_is_available(remote_node)) {
+			of_node_put(remote_node);
 			continue;
+		}
 
 		count += meson_probe_remote(pdev, match, remote, remote_node);
 
@@ -428,10 +435,13 @@ static int meson_drv_probe(struct platform_device *pdev)
 
 	for_each_endpoint_of_node(np, ep) {
 		remote = of_graph_get_remote_port_parent(ep);
-		if (!remote || !of_device_is_available(remote))
+		if (!remote || !of_device_is_available(remote)) {
+			of_node_put(remote);
 			continue;
+		}
 
 		count += meson_probe_remote(pdev, &match, np, remote);
+		of_node_put(remote);
 	}
 
 	if (count && !match)
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 5beb83d1cf87..ce1b3cc4bf6d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -944,7 +944,7 @@ static u32 a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq)
 	np = dev_pm_opp_get_of_node(opp);
 
 	if (np) {
-		of_property_read_u32(np, "qcom,level", &val);
+		of_property_read_u32(np, "opp-level", &val);
 		of_node_put(np);
 	}
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 2e4372ef17a3..2cfee1a4fe0b 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -765,7 +765,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	adreno_gpu->rev = config->rev;
 
 	adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
-	adreno_gpu_config.irqname = "kgsl_3d0_irq";
 
 	adreno_gpu_config.va_start = SZ_16M;
 	adreno_gpu_config.va_end = 0xffffffff;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index d130825e2c75..b776fca571f3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -465,8 +465,6 @@ static void _dpu_crtc_setup_mixer_for_encoder(
 			return;
 		}
 
-		mixer->encoder = enc;
-
 		cstate->num_mixers++;
 		DPU_DEBUG("setup mixer %d: lm %d\n",
 				i, mixer->hw_lm->idx - LM_0);
@@ -718,11 +716,8 @@ void dpu_crtc_commit_kickoff(struct drm_crtc *crtc, bool async)
 	 * may delay and flush at an irq event (e.g. ppdone)
 	 */
 	drm_for_each_encoder_mask(encoder, crtc->dev,
-				  crtc->state->encoder_mask) {
-		struct dpu_encoder_kickoff_params params = { 0 };
-		dpu_encoder_prepare_for_kickoff(encoder, &params, async);
-	}
-
+				  crtc->state->encoder_mask)
+		dpu_encoder_prepare_for_kickoff(encoder, async);
 
 	if (!async) {
 		/* wait for frame_event_done completion */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h
index dbfb38a1986c..e59d62be4980 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.h
@@ -84,14 +84,12 @@ struct dpu_crtc_smmu_state_data {
  * struct dpu_crtc_mixer: stores the map for each virtual pipeline in the CRTC
  * @hw_lm:	LM HW Driver context
  * @lm_ctl:	CTL Path HW driver context
- * @encoder:	Encoder attached to this lm & ctl
  * @mixer_op_mode:	mixer blending operation mode
  * @flush_mask:	mixer flush mask for ctl, mixer and pipe
  */
 struct dpu_crtc_mixer {
 	struct dpu_hw_mixer *hw_lm;
 	struct dpu_hw_ctl *lm_ctl;
-	struct drm_encoder *encoder;
 	u32 mixer_op_mode;
 	u32 flush_mask;
 };
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 36af231bb73f..5aa3307f3f0c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -205,7 +205,7 @@ struct dpu_encoder_virt {
 	bool idle_pc_supported;
 	struct mutex rc_lock;
 	enum dpu_enc_rc_states rc_state;
-	struct kthread_delayed_work delayed_off_work;
+	struct delayed_work delayed_off_work;
 	struct kthread_work vsync_event_work;
 	struct msm_display_topology topology;
 	bool mode_set_complete;
@@ -742,7 +742,6 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc,
 {
 	struct dpu_encoder_virt *dpu_enc;
 	struct msm_drm_private *priv;
-	struct msm_drm_thread *disp_thread;
 	bool is_vid_mode = false;
 
 	if (!drm_enc || !drm_enc->dev || !drm_enc->dev->dev_private ||
@@ -755,12 +754,6 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc,
 	is_vid_mode = dpu_enc->disp_info.capabilities &
 						MSM_DISPLAY_CAP_VID_MODE;
 
-	if (drm_enc->crtc->index >= ARRAY_SIZE(priv->disp_thread)) {
-		DPU_ERROR("invalid crtc index\n");
-		return -EINVAL;
-	}
-	disp_thread = &priv->disp_thread[drm_enc->crtc->index];
-
 	/*
 	 * when idle_pc is not supported, process only KICKOFF, STOP and MODESET
 	 * events and return early for other events (ie wb display).
@@ -777,8 +770,7 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc,
 	switch (sw_event) {
 	case DPU_ENC_RC_EVENT_KICKOFF:
 		/* cancel delayed off work, if any */
-		if (kthread_cancel_delayed_work_sync(
-				&dpu_enc->delayed_off_work))
+		if (cancel_delayed_work_sync(&dpu_enc->delayed_off_work))
 			DPU_DEBUG_ENC(dpu_enc, "sw_event:%d, work cancelled\n",
 					sw_event);
 
@@ -837,10 +829,8 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc,
 			return 0;
 		}
 
-		kthread_queue_delayed_work(
-			&disp_thread->worker,
-			&dpu_enc->delayed_off_work,
-			msecs_to_jiffies(dpu_enc->idle_timeout));
+		queue_delayed_work(priv->wq, &dpu_enc->delayed_off_work,
+				   msecs_to_jiffies(dpu_enc->idle_timeout));
 
 		trace_dpu_enc_rc(DRMID(drm_enc), sw_event,
 				 dpu_enc->idle_pc_supported, dpu_enc->rc_state,
@@ -849,8 +839,7 @@ static int dpu_encoder_resource_control(struct drm_encoder *drm_enc,
 
 	case DPU_ENC_RC_EVENT_PRE_STOP:
 		/* cancel delayed off work, if any */
-		if (kthread_cancel_delayed_work_sync(
-				&dpu_enc->delayed_off_work))
+		if (cancel_delayed_work_sync(&dpu_enc->delayed_off_work))
 			DPU_DEBUG_ENC(dpu_enc, "sw_event:%d, work cancelled\n",
 					sw_event);
 
@@ -1368,7 +1357,7 @@ static void dpu_encoder_frame_done_callback(
 	}
 }
 
-static void dpu_encoder_off_work(struct kthread_work *work)
+static void dpu_encoder_off_work(struct work_struct *work)
 {
 	struct dpu_encoder_virt *dpu_enc = container_of(work,
 			struct dpu_encoder_virt, delayed_off_work.work);
@@ -1756,15 +1745,14 @@ static void dpu_encoder_vsync_event_work_handler(struct kthread_work *work)
 			nsecs_to_jiffies(ktime_to_ns(wakeup_time)));
 }
 
-void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc,
-		struct dpu_encoder_kickoff_params *params, bool async)
+void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc, bool async)
 {
 	struct dpu_encoder_virt *dpu_enc;
 	struct dpu_encoder_phys *phys;
 	bool needs_hw_reset = false;
 	unsigned int i;
 
-	if (!drm_enc || !params) {
+	if (!drm_enc) {
 		DPU_ERROR("invalid args\n");
 		return;
 	}
@@ -1778,7 +1766,7 @@ void dpu_encoder_prepare_for_kickoff(struct drm_encoder *drm_enc,
 		phys = dpu_enc->phys_encs[i];
 		if (phys) {
 			if (phys->ops.prepare_for_kickoff)
-				phys->ops.prepare_for_kickoff(phys, params);
+				phys->ops.prepare_for_kickoff(phys);
 			if (phys->enable_state == DPU_ENC_ERR_NEEDS_HW_RESET)
 				needs_hw_reset = true;
 		}
@@ -2193,7 +2181,7 @@ int dpu_encoder_setup(struct drm_device *dev, struct drm_encoder *enc,
 
 
 	mutex_init(&dpu_enc->rc_lock);
-	kthread_init_delayed_work(&dpu_enc->delayed_off_work,
+	INIT_DELAYED_WORK(&dpu_enc->delayed_off_work,
 			dpu_encoder_off_work);
 	dpu_enc->idle_timeout = IDLE_TIMEOUT;
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
index 3f5dafe00580..d77f74fb26d4 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.h
@@ -38,15 +38,6 @@ struct dpu_encoder_hw_resources {
 };
 
 /**
- * dpu_encoder_kickoff_params - info encoder requires at kickoff
- * @affected_displays:  bitmask, bit set means the ROI of the commit lies within
- *                      the bounds of the physical display at the bit index
- */
-struct dpu_encoder_kickoff_params {
-	unsigned long affected_displays;
-};
-
-/**
  * dpu_encoder_get_hw_resources - Populate table of required hardware resources
  * @encoder:	encoder pointer
  * @hw_res:	resource table to populate with encoder required resources
@@ -88,11 +79,9 @@ void dpu_encoder_register_frame_event_callback(struct drm_encoder *encoder,
  *	Immediately: if no previous commit is outstanding.
  *	Delayed: Block until next trigger can be issued.
  * @encoder:	encoder pointer
- * @params:	kickoff time parameters
  * @async:	true if this is an asynchronous commit
  */
-void dpu_encoder_prepare_for_kickoff(struct drm_encoder *encoder,
-		struct dpu_encoder_kickoff_params *params, bool async);
+void dpu_encoder_prepare_for_kickoff(struct drm_encoder *encoder,  bool async);
 
 /**
  * dpu_encoder_trigger_kickoff_pending - Clear the flush bits from previous
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index 44e6f8b68e70..db94f3d3bea3 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -144,8 +144,7 @@ struct dpu_encoder_phys_ops {
 	int (*wait_for_commit_done)(struct dpu_encoder_phys *phys_enc);
 	int (*wait_for_tx_complete)(struct dpu_encoder_phys *phys_enc);
 	int (*wait_for_vblank)(struct dpu_encoder_phys *phys_enc);
-	void (*prepare_for_kickoff)(struct dpu_encoder_phys *phys_enc,
-			struct dpu_encoder_kickoff_params *params);
+	void (*prepare_for_kickoff)(struct dpu_encoder_phys *phys_enc);
 	void (*handle_post_kickoff)(struct dpu_encoder_phys *phys_enc);
 	void (*trigger_start)(struct dpu_encoder_phys *phys_enc);
 	bool (*needs_single_flush)(struct dpu_encoder_phys *phys_enc);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
index 99ab5ca9bed3..a399e1edd313 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
@@ -594,8 +594,7 @@ static void dpu_encoder_phys_cmd_get_hw_resources(
 }
 
 static void dpu_encoder_phys_cmd_prepare_for_kickoff(
-		struct dpu_encoder_phys *phys_enc,
-		struct dpu_encoder_kickoff_params *params)
+		struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_encoder_phys_cmd *cmd_enc =
 			to_dpu_encoder_phys_cmd(phys_enc);
@@ -693,7 +692,7 @@ static int dpu_encoder_phys_cmd_wait_for_commit_done(
 
 	/* required for both controllers */
 	if (!rc && cmd_enc->serialize_wait4pp)
-		dpu_encoder_phys_cmd_prepare_for_kickoff(phys_enc, NULL);
+		dpu_encoder_phys_cmd_prepare_for_kickoff(phys_enc);
 
 	return rc;
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index acdab5b0db18..3c4eb470a82c 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -587,14 +587,13 @@ static int dpu_encoder_phys_vid_wait_for_vblank(
 }
 
 static void dpu_encoder_phys_vid_prepare_for_kickoff(
-		struct dpu_encoder_phys *phys_enc,
-		struct dpu_encoder_kickoff_params *params)
+		struct dpu_encoder_phys *phys_enc)
 {
 	struct dpu_encoder_phys_vid *vid_enc;
 	struct dpu_hw_ctl *ctl;
 	int rc;
 
-	if (!phys_enc || !params) {
+	if (!phys_enc) {
 		DPU_ERROR("invalid encoder/parameters\n");
 		return;
 	}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index 0874f0a53bf9..f59fe1a9f4b9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -263,13 +263,13 @@ static const struct dpu_format dpu_format_map[] = {
 
 	INTERLEAVED_RGB_FMT(RGB565,
 		0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT,
-		C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3,
+		C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3,
 		false, 2, 0,
 		DPU_FETCH_LINEAR, 1),
 
 	INTERLEAVED_RGB_FMT(BGR565,
 		0, COLOR_5BIT, COLOR_6BIT, COLOR_5BIT,
-		C2_R_Cr, C0_G_Y, C1_B_Cb, 0, 3,
+		C1_B_Cb, C0_G_Y, C2_R_Cr, 0, 3,
 		false, 2, 0,
 		DPU_FETCH_LINEAR, 1),
 
@@ -1137,36 +1137,3 @@ const struct msm_format *dpu_get_msm_format(
 		return &fmt->base;
 	return NULL;
 }
-
-uint32_t dpu_populate_formats(
-		const struct dpu_format_extended *format_list,
-		uint32_t *pixel_formats,
-		uint64_t *pixel_modifiers,
-		uint32_t pixel_formats_max)
-{
-	uint32_t i, fourcc_format;
-
-	if (!format_list || !pixel_formats)
-		return 0;
-
-	for (i = 0, fourcc_format = 0;
-			format_list->fourcc_format && i < pixel_formats_max;
-			++format_list) {
-		/* verify if listed format is in dpu_format_map? */
-
-		/* optionally return modified formats */
-		if (pixel_modifiers) {
-			/* assume same modifier for all fb planes */
-			pixel_formats[i] = format_list->fourcc_format;
-			pixel_modifiers[i++] = format_list->modifier;
-		} else {
-			/* assume base formats grouped together */
-			if (fourcc_format != format_list->fourcc_format) {
-				fourcc_format = format_list->fourcc_format;
-				pixel_formats[i++] = fourcc_format;
-			}
-		}
-	}
-
-	return i;
-}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.h
index a54451d8d011..c02c81e7a667 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.h
@@ -41,20 +41,6 @@ const struct msm_format *dpu_get_msm_format(
 		const uint64_t modifiers);
 
 /**
- * dpu_populate_formats - populate the given array with fourcc codes supported
- * @format_list:       pointer to list of possible formats
- * @pixel_formats:     array to populate with fourcc codes
- * @pixel_modifiers:   array to populate with drm modifiers, can be NULL
- * @pixel_formats_max: length of pixel formats array
- * Return: number of elements populated
- */
-uint32_t dpu_populate_formats(
-		const struct dpu_format_extended *format_list,
-		uint32_t *pixel_formats,
-		uint64_t *pixel_modifiers,
-		uint32_t pixel_formats_max);
-
-/**
  * dpu_format_check_modified_format - validate format and buffers for
  *                   dpu non-standard, i.e. modified format
  * @kms:             kms driver
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 512ac0834d2b..df6852cc98b9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -151,7 +151,9 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = {
 		.id = DPU_SSPP_CSC_10BIT, \
 		.base = 0x1a00, .len = 0x100,}, \
 	.format_list = plane_formats_yuv, \
+	.num_formats = ARRAY_SIZE(plane_formats_yuv), \
 	.virt_format_list = plane_formats, \
+	.virt_num_formats = ARRAY_SIZE(plane_formats), \
 	}
 
 #define _DMA_SBLK(num, sdma_pri) \
@@ -163,7 +165,9 @@ static const struct dpu_sspp_blks_common sdm845_sspp_common = {
 	.src_blk = {.name = STRCAT("sspp_src_", num), \
 		.id = DPU_SSPP_SRC, .base = 0x00, .len = 0x150,}, \
 	.format_list = plane_formats, \
+	.num_formats = ARRAY_SIZE(plane_formats), \
 	.virt_format_list = plane_formats, \
+	.virt_num_formats = ARRAY_SIZE(plane_formats), \
 	}
 
 static const struct dpu_sspp_sub_blks sdm845_vig_sblk_0 = _VIG_SBLK("0", 5);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 144358a3d0fb..a55653b2e466 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -252,17 +252,6 @@ struct dpu_pp_blk {
 };
 
 /**
- * struct dpu_format_extended - define dpu specific pixel format+modifier
- * @fourcc_format: Base FOURCC pixel format code
- * @modifier: 64-bit drm format modifier, same modifier must be applied to all
- *            framebuffer planes
- */
-struct dpu_format_extended {
-	uint32_t fourcc_format;
-	uint64_t modifier;
-};
-
-/**
  * enum dpu_qos_lut_usage - define QoS LUT use cases
  */
 enum dpu_qos_lut_usage {
@@ -348,7 +337,9 @@ struct dpu_sspp_blks_common {
  * @pcc_blk:
  * @igc_blk:
  * @format_list: Pointer to list of supported formats
+ * @num_formats: Number of supported formats
  * @virt_format_list: Pointer to list of supported formats for virtual planes
+ * @virt_num_formats: Number of supported formats for virtual planes
  */
 struct dpu_sspp_sub_blks {
 	const struct dpu_sspp_blks_common *common;
@@ -366,8 +357,10 @@ struct dpu_sspp_sub_blks {
 	struct dpu_pp_blk pcc_blk;
 	struct dpu_pp_blk igc_blk;
 
-	const struct dpu_format_extended *format_list;
-	const struct dpu_format_extended *virt_format_list;
+	const u32 *format_list;
+	u32 num_formats;
+	const u32 *virt_format_list;
+	u32 virt_num_formats;
 };
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h
index 3c9f028628ef..d09730985951 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog_format.h
@@ -12,157 +12,81 @@
 
 #include "dpu_hw_mdss.h"
 
-static const struct dpu_format_extended plane_formats[] = {
-	{DRM_FORMAT_ARGB8888, 0},
-	{DRM_FORMAT_ABGR8888, 0},
-	{DRM_FORMAT_RGBA8888, 0},
-	{DRM_FORMAT_ABGR8888, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_BGRA8888, 0},
-	{DRM_FORMAT_XRGB8888, 0},
-	{DRM_FORMAT_RGBX8888, 0},
-	{DRM_FORMAT_BGRX8888, 0},
-	{DRM_FORMAT_XBGR8888, 0},
-	{DRM_FORMAT_XBGR8888, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_RGB888, 0},
-	{DRM_FORMAT_BGR888, 0},
-	{DRM_FORMAT_RGB565, 0},
-	{DRM_FORMAT_BGR565, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_BGR565, 0},
-	{DRM_FORMAT_ARGB1555, 0},
-	{DRM_FORMAT_ABGR1555, 0},
-	{DRM_FORMAT_RGBA5551, 0},
-	{DRM_FORMAT_BGRA5551, 0},
-	{DRM_FORMAT_XRGB1555, 0},
-	{DRM_FORMAT_XBGR1555, 0},
-	{DRM_FORMAT_RGBX5551, 0},
-	{DRM_FORMAT_BGRX5551, 0},
-	{DRM_FORMAT_ARGB4444, 0},
-	{DRM_FORMAT_ABGR4444, 0},
-	{DRM_FORMAT_RGBA4444, 0},
-	{DRM_FORMAT_BGRA4444, 0},
-	{DRM_FORMAT_XRGB4444, 0},
-	{DRM_FORMAT_XBGR4444, 0},
-	{DRM_FORMAT_RGBX4444, 0},
-	{DRM_FORMAT_BGRX4444, 0},
-	{0, 0},
+static const uint32_t qcom_compressed_supported_formats[] = {
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_BGR565,
 };
 
-static const struct dpu_format_extended plane_formats_yuv[] = {
-	{DRM_FORMAT_ARGB8888, 0},
-	{DRM_FORMAT_ABGR8888, 0},
-	{DRM_FORMAT_RGBA8888, 0},
-	{DRM_FORMAT_BGRX8888, 0},
-	{DRM_FORMAT_ABGR8888, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_BGRA8888, 0},
-	{DRM_FORMAT_XRGB8888, 0},
-	{DRM_FORMAT_XBGR8888, 0},
-	{DRM_FORMAT_RGBX8888, 0},
-	{DRM_FORMAT_XBGR8888, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_RGB888, 0},
-	{DRM_FORMAT_BGR888, 0},
-	{DRM_FORMAT_RGB565, 0},
-	{DRM_FORMAT_BGR565, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_BGR565, 0},
-	{DRM_FORMAT_ARGB1555, 0},
-	{DRM_FORMAT_ABGR1555, 0},
-	{DRM_FORMAT_RGBA5551, 0},
-	{DRM_FORMAT_BGRA5551, 0},
-	{DRM_FORMAT_XRGB1555, 0},
-	{DRM_FORMAT_XBGR1555, 0},
-	{DRM_FORMAT_RGBX5551, 0},
-	{DRM_FORMAT_BGRX5551, 0},
-	{DRM_FORMAT_ARGB4444, 0},
-	{DRM_FORMAT_ABGR4444, 0},
-	{DRM_FORMAT_RGBA4444, 0},
-	{DRM_FORMAT_BGRA4444, 0},
-	{DRM_FORMAT_XRGB4444, 0},
-	{DRM_FORMAT_XBGR4444, 0},
-	{DRM_FORMAT_RGBX4444, 0},
-	{DRM_FORMAT_BGRX4444, 0},
-
-	{DRM_FORMAT_NV12, 0},
-	{DRM_FORMAT_NV12, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_NV21, 0},
-	{DRM_FORMAT_NV16, 0},
-	{DRM_FORMAT_NV61, 0},
-	{DRM_FORMAT_VYUY, 0},
-	{DRM_FORMAT_UYVY, 0},
-	{DRM_FORMAT_YUYV, 0},
-	{DRM_FORMAT_YVYU, 0},
-	{DRM_FORMAT_YUV420, 0},
-	{DRM_FORMAT_YVU420, 0},
-	{0, 0},
-};
-
-static const struct dpu_format_extended cursor_formats[] = {
-	{DRM_FORMAT_ARGB8888, 0},
-	{DRM_FORMAT_ABGR8888, 0},
-	{DRM_FORMAT_RGBA8888, 0},
-	{DRM_FORMAT_BGRA8888, 0},
-	{DRM_FORMAT_XRGB8888, 0},
-	{DRM_FORMAT_ARGB1555, 0},
-	{DRM_FORMAT_ABGR1555, 0},
-	{DRM_FORMAT_RGBA5551, 0},
-	{DRM_FORMAT_BGRA5551, 0},
-	{DRM_FORMAT_ARGB4444, 0},
-	{DRM_FORMAT_ABGR4444, 0},
-	{DRM_FORMAT_RGBA4444, 0},
-	{DRM_FORMAT_BGRA4444, 0},
-	{0, 0},
+static const uint32_t plane_formats[] = {
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_RGBA4444,
+	DRM_FORMAT_BGRA4444,
+	DRM_FORMAT_XRGB4444,
+	DRM_FORMAT_XBGR4444,
+	DRM_FORMAT_RGBX4444,
+	DRM_FORMAT_BGRX4444,
 };
 
-static const struct dpu_format_extended wb2_formats[] = {
-	{DRM_FORMAT_RGB565, 0},
-	{DRM_FORMAT_BGR565, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_RGB888, 0},
-	{DRM_FORMAT_ARGB8888, 0},
-	{DRM_FORMAT_RGBA8888, 0},
-	{DRM_FORMAT_ABGR8888, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_XRGB8888, 0},
-	{DRM_FORMAT_RGBX8888, 0},
-	{DRM_FORMAT_XBGR8888, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_ARGB1555, 0},
-	{DRM_FORMAT_RGBA5551, 0},
-	{DRM_FORMAT_XRGB1555, 0},
-	{DRM_FORMAT_RGBX5551, 0},
-	{DRM_FORMAT_ARGB4444, 0},
-	{DRM_FORMAT_RGBA4444, 0},
-	{DRM_FORMAT_RGBX4444, 0},
-	{DRM_FORMAT_XRGB4444, 0},
-
-	{DRM_FORMAT_BGR565, 0},
-	{DRM_FORMAT_BGR888, 0},
-	{DRM_FORMAT_ABGR8888, 0},
-	{DRM_FORMAT_BGRA8888, 0},
-	{DRM_FORMAT_BGRX8888, 0},
-	{DRM_FORMAT_XBGR8888, 0},
-	{DRM_FORMAT_ABGR1555, 0},
-	{DRM_FORMAT_BGRA5551, 0},
-	{DRM_FORMAT_XBGR1555, 0},
-	{DRM_FORMAT_BGRX5551, 0},
-	{DRM_FORMAT_ABGR4444, 0},
-	{DRM_FORMAT_BGRA4444, 0},
-	{DRM_FORMAT_BGRX4444, 0},
-	{DRM_FORMAT_XBGR4444, 0},
-
-	{DRM_FORMAT_YUV420, 0},
-	{DRM_FORMAT_NV12, 0},
-	{DRM_FORMAT_NV12, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_NV16, 0},
-	{DRM_FORMAT_YUYV, 0},
-
-	{0, 0},
-};
+static const uint32_t plane_formats_yuv[] = {
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_ABGR8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_RGB888,
+	DRM_FORMAT_BGR888,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_BGR565,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_ABGR1555,
+	DRM_FORMAT_RGBA5551,
+	DRM_FORMAT_BGRA5551,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_XBGR1555,
+	DRM_FORMAT_RGBX5551,
+	DRM_FORMAT_BGRX5551,
+	DRM_FORMAT_ARGB4444,
+	DRM_FORMAT_ABGR4444,
+	DRM_FORMAT_RGBA4444,
+	DRM_FORMAT_BGRA4444,
+	DRM_FORMAT_XRGB4444,
+	DRM_FORMAT_XBGR4444,
+	DRM_FORMAT_RGBX4444,
+	DRM_FORMAT_BGRX4444,
 
-static const struct dpu_format_extended rgb_10bit_formats[] = {
-	{DRM_FORMAT_BGRA1010102, 0},
-	{DRM_FORMAT_BGRX1010102, 0},
-	{DRM_FORMAT_RGBA1010102, 0},
-	{DRM_FORMAT_RGBX1010102, 0},
-	{DRM_FORMAT_ABGR2101010, 0},
-	{DRM_FORMAT_ABGR2101010, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_XBGR2101010, 0},
-	{DRM_FORMAT_XBGR2101010, DRM_FORMAT_MOD_QCOM_COMPRESSED},
-	{DRM_FORMAT_ARGB2101010, 0},
-	{DRM_FORMAT_XRGB2101010, 0},
+	DRM_FORMAT_NV12,
+	DRM_FORMAT_NV21,
+	DRM_FORMAT_NV16,
+	DRM_FORMAT_NV61,
+	DRM_FORMAT_VYUY,
+	DRM_FORMAT_UYVY,
+	DRM_FORMAT_YUYV,
+	DRM_FORMAT_YVYU,
+	DRM_FORMAT_YUV420,
+	DRM_FORMAT_YVU420,
 };
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
index c0b7f0049365..8a28a03ac6a9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
@@ -170,10 +170,6 @@
 /**
  * AD4 interrupt status bit definitions
  */
-#define DPU_INTR_BRIGHTPR_UPDATED BIT(4)
-#define DPU_INTR_DARKENH_UPDATED BIT(3)
-#define DPU_INTR_STREN_OUTROI_UPDATED BIT(2)
-#define DPU_INTR_STREN_INROI_UPDATED BIT(1)
 #define DPU_INTR_BACKLIGHT_UPDATED BIT(0)
 /**
  * struct dpu_intr_reg - array of DPU register sets
@@ -782,18 +778,6 @@ static int dpu_hw_intr_irqidx_lookup(enum dpu_intr_type intr_type,
 	return -EINVAL;
 }
 
-static void dpu_hw_intr_set_mask(struct dpu_hw_intr *intr, uint32_t reg_off,
-		uint32_t mask)
-{
-	if (!intr)
-		return;
-
-	DPU_REG_WRITE(&intr->hw, reg_off, mask);
-
-	/* ensure register writes go through */
-	wmb();
-}
-
 static void dpu_hw_intr_dispatch_irq(struct dpu_hw_intr *intr,
 		void (*cbfunc)(void *, int),
 		void *arg)
@@ -1004,18 +988,6 @@ static int dpu_hw_intr_disable_irqs(struct dpu_hw_intr *intr)
 	return 0;
 }
 
-static int dpu_hw_intr_get_valid_interrupts(struct dpu_hw_intr *intr,
-		uint32_t *mask)
-{
-	if (!intr || !mask)
-		return -EINVAL;
-
-	*mask = IRQ_SOURCE_MDP | IRQ_SOURCE_DSI0 | IRQ_SOURCE_DSI1
-		| IRQ_SOURCE_HDMI | IRQ_SOURCE_EDP;
-
-	return 0;
-}
-
 static void dpu_hw_intr_get_interrupt_statuses(struct dpu_hw_intr *intr)
 {
 	int i;
@@ -1065,19 +1037,6 @@ static void dpu_hw_intr_clear_intr_status_nolock(struct dpu_hw_intr *intr,
 	wmb();
 }
 
-static void dpu_hw_intr_clear_interrupt_status(struct dpu_hw_intr *intr,
-		int irq_idx)
-{
-	unsigned long irq_flags;
-
-	if (!intr)
-		return;
-
-	spin_lock_irqsave(&intr->irq_lock, irq_flags);
-	dpu_hw_intr_clear_intr_status_nolock(intr, irq_idx);
-	spin_unlock_irqrestore(&intr->irq_lock, irq_flags);
-}
-
 static u32 dpu_hw_intr_get_interrupt_status(struct dpu_hw_intr *intr,
 		int irq_idx, bool clear)
 {
@@ -1113,16 +1072,13 @@ static u32 dpu_hw_intr_get_interrupt_status(struct dpu_hw_intr *intr,
 
 static void __setup_intr_ops(struct dpu_hw_intr_ops *ops)
 {
-	ops->set_mask = dpu_hw_intr_set_mask;
 	ops->irq_idx_lookup = dpu_hw_intr_irqidx_lookup;
 	ops->enable_irq = dpu_hw_intr_enable_irq;
 	ops->disable_irq = dpu_hw_intr_disable_irq;
 	ops->dispatch_irqs = dpu_hw_intr_dispatch_irq;
 	ops->clear_all_irqs = dpu_hw_intr_clear_irqs;
 	ops->disable_all_irqs = dpu_hw_intr_disable_irqs;
-	ops->get_valid_interrupts = dpu_hw_intr_get_valid_interrupts;
 	ops->get_interrupt_statuses = dpu_hw_intr_get_interrupt_statuses;
-	ops->clear_interrupt_status = dpu_hw_intr_clear_interrupt_status;
 	ops->clear_intr_status_nolock = dpu_hw_intr_clear_intr_status_nolock;
 	ops->get_interrupt_status = dpu_hw_intr_get_interrupt_status;
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
index 61e4cba36562..4d7a1c727ce2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.h
@@ -20,13 +20,6 @@
 #include "dpu_hw_util.h"
 #include "dpu_hw_mdss.h"
 
-#define IRQ_SOURCE_MDP		BIT(0)
-#define IRQ_SOURCE_DSI0		BIT(4)
-#define IRQ_SOURCE_DSI1		BIT(5)
-#define IRQ_SOURCE_HDMI		BIT(8)
-#define IRQ_SOURCE_EDP		BIT(12)
-#define IRQ_SOURCE_MHL		BIT(16)
-
 /**
  * dpu_intr_type - HW Interrupt Type
  * @DPU_IRQ_TYPE_WB_ROT_COMP:		WB rotator done
@@ -96,18 +89,6 @@ struct dpu_hw_intr;
  */
 struct dpu_hw_intr_ops {
 	/**
-	 * set_mask - Programs the given interrupt register with the
-	 *            given interrupt mask. Register value will get overwritten.
-	 * @intr:	HW interrupt handle
-	 * @reg_off:	MDSS HW register offset
-	 * @irqmask:	IRQ mask value
-	 */
-	void (*set_mask)(
-			struct dpu_hw_intr *intr,
-			uint32_t reg,
-			uint32_t irqmask);
-
-	/**
 	 * irq_idx_lookup - Lookup IRQ index on the HW interrupt type
 	 *                 Used for all irq related ops
 	 * @intr_type:		Interrupt type defined in dpu_intr_type
@@ -177,16 +158,6 @@ struct dpu_hw_intr_ops {
 			struct dpu_hw_intr *intr);
 
 	/**
-	 * clear_interrupt_status - Clears HW interrupt status based on given
-	 *                          lookup IRQ index.
-	 * @intr:	HW interrupt handle
-	 * @irq_idx:	Lookup irq index return from irq_idx_lookup
-	 */
-	void (*clear_interrupt_status)(
-			struct dpu_hw_intr *intr,
-			int irq_idx);
-
-	/**
 	 * clear_intr_status_nolock() - clears the HW interrupts without lock
 	 * @intr:	HW interrupt handle
 	 * @irq_idx:	Lookup irq index return from irq_idx_lookup
@@ -206,21 +177,6 @@ struct dpu_hw_intr_ops {
 			struct dpu_hw_intr *intr,
 			int irq_idx,
 			bool clear);
-
-	/**
-	 * get_valid_interrupts - Gets a mask of all valid interrupt sources
-	 *                        within DPU. These are actually status bits
-	 *                        within interrupt registers that specify the
-	 *                        source of the interrupt in IRQs. For example,
-	 *                        valid interrupt sources can be MDP, DSI,
-	 *                        HDMI etc.
-	 * @intr:	HW interrupt handle
-	 * @mask:	Returning the interrupt source MASK
-	 * @return:	0 for success, otherwise failure
-	 */
-	int (*get_valid_interrupts)(
-			struct dpu_hw_intr *intr,
-			uint32_t *mask);
 };
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
index 68c54d2c9677..1ab8d4a889f7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_mdss.h
@@ -258,12 +258,6 @@ enum dpu_vbif {
 	VBIF_NRT = VBIF_1
 };
 
-enum dpu_iommu_domain {
-	DPU_IOMMU_DOMAIN_UNSECURE,
-	DPU_IOMMU_DOMAIN_SECURE,
-	DPU_IOMMU_DOMAIN_MAX
-};
-
 /**
  * DPU HW,Component order color map
  */
@@ -358,7 +352,6 @@ enum dpu_3d_blend_mode {
  * @alpha_enable: whether the format has an alpha channel
  * @num_planes: number of planes (including meta data planes)
  * @fetch_mode: linear, tiled, or ubwc hw fetch behavior
- * @is_yuv: is format a yuv variant
  * @flag: usage bit flags
  * @tile_width: format tile width
  * @tile_height: format tile height
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index 321fc64ddd0e..efe70c508ee0 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -18,7 +18,6 @@
 #include "dpu_hw_mdss.h"
 
 #define REG_MASK(n)                     ((BIT(n)) - 1)
-struct dpu_format_extended;
 
 /*
  * This is the common struct maintained by each sub block
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 4d67b3c96702..885bf88afa3e 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -405,35 +405,38 @@ static void dpu_kms_wait_for_commit_done(struct msm_kms *kms,
 	}
 }
 
-static void _dpu_kms_initialize_dsi(struct drm_device *dev,
+static int _dpu_kms_initialize_dsi(struct drm_device *dev,
 				    struct msm_drm_private *priv,
 				    struct dpu_kms *dpu_kms)
 {
 	struct drm_encoder *encoder = NULL;
-	int i, rc;
+	int i, rc = 0;
+
+	if (!(priv->dsi[0] || priv->dsi[1]))
+		return rc;
 
 	/*TODO: Support two independent DSI connectors */
 	encoder = dpu_encoder_init(dev, DRM_MODE_ENCODER_DSI);
-	if (IS_ERR_OR_NULL(encoder)) {
+	if (IS_ERR(encoder)) {
 		DPU_ERROR("encoder init failed for dsi display\n");
-		return;
+		return PTR_ERR(encoder);
 	}
 
 	priv->encoders[priv->num_encoders++] = encoder;
 
 	for (i = 0; i < ARRAY_SIZE(priv->dsi); i++) {
-		if (!priv->dsi[i]) {
-			DPU_DEBUG("invalid msm_dsi for ctrl %d\n", i);
-			return;
-		}
+		if (!priv->dsi[i])
+			continue;
 
 		rc = msm_dsi_modeset_init(priv->dsi[i], dev, encoder);
 		if (rc) {
 			DPU_ERROR("modeset_init failed for dsi[%d], rc = %d\n",
 				i, rc);
-			continue;
+			break;
 		}
 	}
+
+	return rc;
 }
 
 /**
@@ -444,16 +447,16 @@ static void _dpu_kms_initialize_dsi(struct drm_device *dev,
  * @dpu_kms:    Pointer to dpu kms structure
  * Returns:     Zero on success
  */
-static void _dpu_kms_setup_displays(struct drm_device *dev,
+static int _dpu_kms_setup_displays(struct drm_device *dev,
 				    struct msm_drm_private *priv,
 				    struct dpu_kms *dpu_kms)
 {
-	_dpu_kms_initialize_dsi(dev, priv, dpu_kms);
-
 	/**
 	 * Extend this function to initialize other
 	 * types of displays
 	 */
+
+	return _dpu_kms_initialize_dsi(dev, priv, dpu_kms);
 }
 
 static void _dpu_kms_drm_obj_destroy(struct dpu_kms *dpu_kms)
@@ -516,7 +519,9 @@ static int _dpu_kms_drm_obj_init(struct dpu_kms *dpu_kms)
 	 * Create encoder and query display drivers to create
 	 * bridges and connectors
 	 */
-	_dpu_kms_setup_displays(dev, priv, dpu_kms);
+	ret = _dpu_kms_setup_displays(dev, priv, dpu_kms);
+	if (ret)
+		goto fail;
 
 	max_crtc_count = min(catalog->mixer_count, priv->num_encoders);
 
@@ -627,6 +632,10 @@ static void _dpu_kms_hw_destroy(struct dpu_kms *dpu_kms)
 		devm_iounmap(&dpu_kms->pdev->dev, dpu_kms->vbif[VBIF_RT]);
 	dpu_kms->vbif[VBIF_RT] = NULL;
 
+	if (dpu_kms->hw_mdp)
+		dpu_hw_mdp_destroy(dpu_kms->hw_mdp);
+	dpu_kms->hw_mdp = NULL;
+
 	if (dpu_kms->mmio)
 		devm_iounmap(&dpu_kms->pdev->dev, dpu_kms->mmio);
 	dpu_kms->mmio = NULL;
@@ -877,8 +886,7 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		goto power_error;
 	}
 
-	rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mmio,
-			dpu_kms->dev);
+	rc = dpu_rm_init(&dpu_kms->rm, dpu_kms->catalog, dpu_kms->mmio);
 	if (rc) {
 		DPU_ERROR("rm init failed: %d\n", rc);
 		goto power_error;
@@ -886,11 +894,10 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 
 	dpu_kms->rm_init = true;
 
-	dpu_kms->hw_mdp = dpu_rm_get_mdp(&dpu_kms->rm);
-	if (IS_ERR_OR_NULL(dpu_kms->hw_mdp)) {
+	dpu_kms->hw_mdp = dpu_hw_mdptop_init(MDP_TOP, dpu_kms->mmio,
+					     dpu_kms->catalog);
+	if (IS_ERR(dpu_kms->hw_mdp)) {
 		rc = PTR_ERR(dpu_kms->hw_mdp);
-		if (!dpu_kms->hw_mdp)
-			rc = -EINVAL;
 		DPU_ERROR("failed to get hw_mdp: %d\n", rc);
 		dpu_kms->hw_mdp = NULL;
 		goto power_error;
@@ -926,16 +933,6 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 		goto hw_intr_init_err;
 	}
 
-	/*
-	 * _dpu_kms_drm_obj_init should create the DRM related objects
-	 * i.e. CRTCs, planes, encoders, connectors and so forth
-	 */
-	rc = _dpu_kms_drm_obj_init(dpu_kms);
-	if (rc) {
-		DPU_ERROR("modeset init failed: %d\n", rc);
-		goto drm_obj_init_err;
-	}
-
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
 
@@ -952,6 +949,16 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 	 */
 	dev->mode_config.allow_fb_modifiers = true;
 
+	/*
+	 * _dpu_kms_drm_obj_init should create the DRM related objects
+	 * i.e. CRTCs, planes, encoders, connectors and so forth
+	 */
+	rc = _dpu_kms_drm_obj_init(dpu_kms);
+	if (rc) {
+		DPU_ERROR("modeset init failed: %d\n", rc);
+		goto drm_obj_init_err;
+	}
+
 	dpu_vbif_init_memtypes(dpu_kms);
 
 	pm_runtime_put_sync(&dpu_kms->pdev->dev);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
index cb307a2abf06..7316b4ab1b85 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_mdss.c
@@ -23,11 +23,14 @@ struct dpu_mdss {
 	struct dpu_irq_controller irq_controller;
 };
 
-static irqreturn_t dpu_mdss_irq(int irq, void *arg)
+static void dpu_mdss_irq(struct irq_desc *desc)
 {
-	struct dpu_mdss *dpu_mdss = arg;
+	struct dpu_mdss *dpu_mdss = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
 	u32 interrupts;
 
+	chained_irq_enter(chip, desc);
+
 	interrupts = readl_relaxed(dpu_mdss->mmio + HW_INTR_STATUS);
 
 	while (interrupts) {
@@ -39,20 +42,20 @@ static irqreturn_t dpu_mdss_irq(int irq, void *arg)
 					   hwirq);
 		if (mapping == 0) {
 			DRM_ERROR("couldn't find irq mapping for %lu\n", hwirq);
-			return IRQ_NONE;
+			break;
 		}
 
 		rc = generic_handle_irq(mapping);
 		if (rc < 0) {
 			DRM_ERROR("handle irq fail: irq=%lu mapping=%u rc=%d\n",
 				  hwirq, mapping, rc);
-			return IRQ_NONE;
+			break;
 		}
 
 		interrupts &= ~(1 << hwirq);
 	}
 
-	return IRQ_HANDLED;
+	chained_irq_exit(chip, desc);
 }
 
 static void dpu_mdss_irq_mask(struct irq_data *irqd)
@@ -83,16 +86,16 @@ static struct irq_chip dpu_mdss_irq_chip = {
 	.irq_unmask = dpu_mdss_irq_unmask,
 };
 
+static struct lock_class_key dpu_mdss_lock_key, dpu_mdss_request_key;
+
 static int dpu_mdss_irqdomain_map(struct irq_domain *domain,
 		unsigned int irq, irq_hw_number_t hwirq)
 {
 	struct dpu_mdss *dpu_mdss = domain->host_data;
-	int ret;
 
+	irq_set_lockdep_class(irq, &dpu_mdss_lock_key, &dpu_mdss_request_key);
 	irq_set_chip_and_handler(irq, &dpu_mdss_irq_chip, handle_level_irq);
-	ret = irq_set_chip_data(irq, dpu_mdss);
-
-	return ret;
+	return irq_set_chip_data(irq, dpu_mdss);
 }
 
 static const struct irq_domain_ops dpu_mdss_irqdomain_ops = {
@@ -159,11 +162,13 @@ static void dpu_mdss_destroy(struct drm_device *dev)
 	struct msm_drm_private *priv = dev->dev_private;
 	struct dpu_mdss *dpu_mdss = to_dpu_mdss(priv->mdss);
 	struct dss_module_power *mp = &dpu_mdss->mp;
+	int irq;
 
 	pm_runtime_suspend(dev->dev);
 	pm_runtime_disable(dev->dev);
 	_dpu_mdss_irq_domain_fini(dpu_mdss);
-	free_irq(platform_get_irq(pdev, 0), dpu_mdss);
+	irq = platform_get_irq(pdev, 0);
+	irq_set_chained_handler_and_data(irq, NULL, NULL);
 	msm_dss_put_clk(mp->clk_config, mp->num_clk);
 	devm_kfree(&pdev->dev, mp->clk_config);
 
@@ -187,6 +192,7 @@ int dpu_mdss_init(struct drm_device *dev)
 	struct dpu_mdss *dpu_mdss;
 	struct dss_module_power *mp;
 	int ret = 0;
+	int irq;
 
 	dpu_mdss = devm_kzalloc(dev->dev, sizeof(*dpu_mdss), GFP_KERNEL);
 	if (!dpu_mdss)
@@ -219,12 +225,12 @@ int dpu_mdss_init(struct drm_device *dev)
 	if (ret)
 		goto irq_domain_error;
 
-	ret = request_irq(platform_get_irq(pdev, 0),
-			dpu_mdss_irq, 0, "dpu_mdss_isr", dpu_mdss);
-	if (ret) {
-		DPU_ERROR("failed to init irq: %d\n", ret);
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
 		goto irq_error;
-	}
+
+	irq_set_chained_handler_and_data(irq, dpu_mdss_irq,
+					 dpu_mdss);
 
 	pm_runtime_enable(dev->dev);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index fd75870eb17f..b01183b309b9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -95,8 +95,6 @@ struct dpu_plane {
 
 	enum dpu_sspp pipe;
 	uint32_t features;      /* capabilities from catalog */
-	uint32_t nformats;
-	uint32_t formats[64];
 
 	struct dpu_hw_pipe *pipe_hw;
 	struct dpu_hw_pipe_cfg pipe_cfg;
@@ -121,6 +119,12 @@ struct dpu_plane {
 	bool debugfs_default_scale;
 };
 
+static const uint64_t supported_format_modifiers[] = {
+	DRM_FORMAT_MOD_QCOM_COMPRESSED,
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
 #define to_dpu_plane(x) container_of(x, struct dpu_plane, base)
 
 static struct dpu_kms *_dpu_plane_get_kms(struct drm_plane *plane)
@@ -365,19 +369,6 @@ static void _dpu_plane_set_qos_ctrl(struct drm_plane *plane,
 			&pdpu->pipe_qos_cfg);
 }
 
-static void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable)
-{
-	struct dpu_plane *pdpu = to_dpu_plane(plane);
-	struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane);
-
-	if (!pdpu->is_rt_pipe)
-		return;
-
-	pm_runtime_get_sync(&dpu_kms->pdev->dev);
-	_dpu_plane_set_qos_ctrl(plane, enable, DPU_PLANE_QOS_PANIC_CTRL);
-	pm_runtime_put_sync(&dpu_kms->pdev->dev);
-}
-
 /**
  * _dpu_plane_set_ot_limit - set OT limit for the given plane
  * @plane:		Pointer to drm plane
@@ -1248,6 +1239,19 @@ static void dpu_plane_reset(struct drm_plane *plane)
 }
 
 #ifdef CONFIG_DEBUG_FS
+static void dpu_plane_danger_signal_ctrl(struct drm_plane *plane, bool enable)
+{
+	struct dpu_plane *pdpu = to_dpu_plane(plane);
+	struct dpu_kms *dpu_kms = _dpu_plane_get_kms(plane);
+
+	if (!pdpu->is_rt_pipe)
+		return;
+
+	pm_runtime_get_sync(&dpu_kms->pdev->dev);
+	_dpu_plane_set_qos_ctrl(plane, enable, DPU_PLANE_QOS_PANIC_CTRL);
+	pm_runtime_put_sync(&dpu_kms->pdev->dev);
+}
+
 static ssize_t _dpu_plane_danger_read(struct file *file,
 			char __user *buff, size_t count, loff_t *ppos)
 {
@@ -1410,6 +1414,23 @@ static void dpu_plane_early_unregister(struct drm_plane *plane)
 	debugfs_remove_recursive(pdpu->debugfs_root);
 }
 
+static bool dpu_plane_format_mod_supported(struct drm_plane *plane,
+		uint32_t format, uint64_t modifier)
+{
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	if (modifier == DRM_FORMAT_MOD_QCOM_COMPRESSED) {
+		int i;
+		for (i = 0; i < ARRAY_SIZE(qcom_compressed_supported_formats); i++) {
+			if (format == qcom_compressed_supported_formats[i])
+				return true;
+		}
+	}
+
+	return false;
+}
+
 static const struct drm_plane_funcs dpu_plane_funcs = {
 		.update_plane = drm_atomic_helper_update_plane,
 		.disable_plane = drm_atomic_helper_disable_plane,
@@ -1419,6 +1440,7 @@ static const struct drm_plane_funcs dpu_plane_funcs = {
 		.atomic_destroy_state = dpu_plane_destroy_state,
 		.late_register = dpu_plane_late_register,
 		.early_unregister = dpu_plane_early_unregister,
+		.format_mod_supported = dpu_plane_format_mod_supported,
 };
 
 static const struct drm_plane_helper_funcs dpu_plane_helper_funcs = {
@@ -1444,11 +1466,12 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 		unsigned long possible_crtcs, u32 master_plane_id)
 {
 	struct drm_plane *plane = NULL, *master_plane = NULL;
-	const struct dpu_format_extended *format_list;
+	const uint32_t *format_list;
 	struct dpu_plane *pdpu;
 	struct msm_drm_private *priv = dev->dev_private;
 	struct dpu_kms *kms = to_dpu_kms(priv->kms);
 	int zpos_max = DPU_ZPOS_MAX;
+	uint32_t num_formats;
 	int ret = -EINVAL;
 
 	/* create and zero local structure */
@@ -1491,24 +1514,18 @@ struct drm_plane *dpu_plane_init(struct drm_device *dev,
 		goto clean_sspp;
 	}
 
-	if (!master_plane_id)
-		format_list = pdpu->pipe_sblk->format_list;
-	else
+	if (pdpu->is_virtual) {
 		format_list = pdpu->pipe_sblk->virt_format_list;
-
-	pdpu->nformats = dpu_populate_formats(format_list,
-				pdpu->formats,
-				0,
-				ARRAY_SIZE(pdpu->formats));
-
-	if (!pdpu->nformats) {
-		DPU_ERROR("[%u]no valid formats for plane\n", pipe);
-		goto clean_sspp;
+		num_formats = pdpu->pipe_sblk->virt_num_formats;
+	}
+	else {
+		format_list = pdpu->pipe_sblk->format_list;
+		num_formats = pdpu->pipe_sblk->num_formats;
 	}
 
 	ret = drm_universal_plane_init(dev, plane, 0xff, &dpu_plane_funcs,
-				pdpu->formats, pdpu->nformats,
-				NULL, type, NULL);
+				format_list, num_formats,
+				supported_format_modifiers, type, NULL);
 	if (ret)
 		goto clean_sspp;
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
index 7fed0b627708..0e6063acd041 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.h
@@ -28,23 +28,18 @@
 /**
  * struct dpu_plane_state: Define dpu extension of drm plane state object
  * @base:	base drm plane state object
- * @property_state: Local storage for msm_prop properties
- * @property_values:	cached plane property values
  * @aspace:	pointer to address space for input/output buffers
- * @input_fence:	dereferenced input fence pointer
  * @stage:	assigned by crtc blender
  * @multirect_index: index of the rectangle of SSPP
  * @multirect_mode: parallel or time multiplex multirect mode
  * @pending:	whether the current update is still pending
  * @scaler3_cfg: configuration data for scaler3
  * @pixel_ext: configuration data for pixel extensions
- * @scaler_check_state: indicates status of user provided pixel extension data
  * @cdp_cfg:	CDP configuration
  */
 struct dpu_plane_state {
 	struct drm_plane_state base;
 	struct msm_gem_address_space *aspace;
-	void *input_fence;
 	enum dpu_stage stage;
 	uint32_t multirect_index;
 	uint32_t multirect_mode;
@@ -107,12 +102,6 @@ void dpu_plane_restore(struct drm_plane *plane);
 void dpu_plane_flush(struct drm_plane *plane);
 
 /**
- * dpu_plane_kickoff - final plane operations before commit kickoff
- * @plane: Pointer to drm plane structure
- */
-void dpu_plane_kickoff(struct drm_plane *plane);
-
-/**
  * dpu_plane_set_error: enable/disable error condition
  * @plane: pointer to drm_plane structure
  */
@@ -147,14 +136,6 @@ int dpu_plane_validate_multirect_v2(struct dpu_multirect_plane_states *plane);
 void dpu_plane_clear_multirect(const struct drm_plane_state *drm_state);
 
 /**
- * dpu_plane_wait_input_fence - wait for input fence object
- * @plane:   Pointer to DRM plane object
- * @wait_ms: Wait timeout value
- * Returns: Zero on success
- */
-int dpu_plane_wait_input_fence(struct drm_plane *plane, uint32_t wait_ms);
-
-/**
  * dpu_plane_color_fill - enables color fill on plane
  * @plane:  Pointer to DRM plane object
  * @color:  RGB fill color value, [23..16] Blue, [15..8] Green, [7..0] Red
@@ -164,12 +145,4 @@ int dpu_plane_wait_input_fence(struct drm_plane *plane, uint32_t wait_ms);
 int dpu_plane_color_fill(struct drm_plane *plane,
 		uint32_t color, uint32_t alpha);
 
-/**
- * dpu_plane_set_revalidate - sets revalidate flag which forces a full
- *	validation of the plane properties in the next atomic check
- * @plane: Pointer to DRM plane object
- * @enable: Boolean to set/unset the flag
- */
-void dpu_plane_set_revalidate(struct drm_plane *plane, bool enable);
-
 #endif /* _DPU_PLANE_H_ */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
index bdb117709674..037d9f4187f9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.c
@@ -21,8 +21,8 @@
 #include "dpu_encoder.h"
 #include "dpu_trace.h"
 
-#define RESERVED_BY_OTHER(h, r) \
-	((h)->rsvp && ((h)->rsvp->enc_id != (r)->enc_id))
+#define RESERVED_BY_OTHER(h, r)  \
+		((h)->enc_id && (h)->enc_id != r)
 
 /**
  * struct dpu_rm_requirements - Reservation requirements parameter bundle
@@ -34,90 +34,21 @@ struct dpu_rm_requirements {
 	struct dpu_encoder_hw_resources hw_res;
 };
 
-/**
- * struct dpu_rm_rsvp - Use Case Reservation tagging structure
- *	Used to tag HW blocks as reserved by a CRTC->Encoder->Connector chain
- *	By using as a tag, rather than lists of pointers to HW blocks used
- *	we can avoid some list management since we don't know how many blocks
- *	of each type a given use case may require.
- * @list:	List head for list of all reservations
- * @seq:	Global RSVP sequence number for debugging, especially for
- *		differentiating differenct allocations for same encoder.
- * @enc_id:	Reservations are tracked by Encoder DRM object ID.
- *		CRTCs may be connected to multiple Encoders.
- *		An encoder or connector id identifies the display path.
- */
-struct dpu_rm_rsvp {
-	struct list_head list;
-	uint32_t seq;
-	uint32_t enc_id;
-};
 
 /**
  * struct dpu_rm_hw_blk - hardware block tracking list member
  * @list:	List head for list of all hardware blocks tracking items
- * @rsvp:	Pointer to use case reservation if reserved by a client
- * @rsvp_nxt:	Temporary pointer used during reservation to the incoming
- *		request. Will be swapped into rsvp if proposal is accepted
- * @type:	Type of hardware block this structure tracks
  * @id:		Hardware ID number, within it's own space, ie. LM_X
- * @catalog:	Pointer to the hardware catalog entry for this block
+ * @enc_id:	Encoder id to which this blk is binded
  * @hw:		Pointer to the hardware register access object for this block
  */
 struct dpu_rm_hw_blk {
 	struct list_head list;
-	struct dpu_rm_rsvp *rsvp;
-	struct dpu_rm_rsvp *rsvp_nxt;
-	enum dpu_hw_blk_type type;
 	uint32_t id;
+	uint32_t enc_id;
 	struct dpu_hw_blk *hw;
 };
 
-/**
- * dpu_rm_dbg_rsvp_stage - enum of steps in making reservation for event logging
- */
-enum dpu_rm_dbg_rsvp_stage {
-	DPU_RM_STAGE_BEGIN,
-	DPU_RM_STAGE_AFTER_CLEAR,
-	DPU_RM_STAGE_AFTER_RSVPNEXT,
-	DPU_RM_STAGE_FINAL
-};
-
-static void _dpu_rm_print_rsvps(
-		struct dpu_rm *rm,
-		enum dpu_rm_dbg_rsvp_stage stage)
-{
-	struct dpu_rm_rsvp *rsvp;
-	struct dpu_rm_hw_blk *blk;
-	enum dpu_hw_blk_type type;
-
-	DPU_DEBUG("%d\n", stage);
-
-	list_for_each_entry(rsvp, &rm->rsvps, list) {
-		DRM_DEBUG_KMS("%d rsvp[s%ue%u]\n", stage, rsvp->seq,
-			      rsvp->enc_id);
-	}
-
-	for (type = 0; type < DPU_HW_BLK_MAX; type++) {
-		list_for_each_entry(blk, &rm->hw_blks[type], list) {
-			if (!blk->rsvp && !blk->rsvp_nxt)
-				continue;
-
-			DRM_DEBUG_KMS("%d rsvp[s%ue%u->s%ue%u] %d %d\n", stage,
-				(blk->rsvp) ? blk->rsvp->seq : 0,
-				(blk->rsvp) ? blk->rsvp->enc_id : 0,
-				(blk->rsvp_nxt) ? blk->rsvp_nxt->seq : 0,
-				(blk->rsvp_nxt) ? blk->rsvp_nxt->enc_id : 0,
-				blk->type, blk->id);
-		}
-	}
-}
-
-struct dpu_hw_mdp *dpu_rm_get_mdp(struct dpu_rm *rm)
-{
-	return rm->hw_mdp;
-}
-
 void dpu_rm_init_hw_iter(
 		struct dpu_rm_hw_iter *iter,
 		uint32_t enc_id,
@@ -148,15 +79,7 @@ static bool _dpu_rm_get_hw_locked(struct dpu_rm *rm, struct dpu_rm_hw_iter *i)
 	i->blk = list_prepare_entry(i->blk, blk_list, list);
 
 	list_for_each_entry_continue(i->blk, blk_list, list) {
-		struct dpu_rm_rsvp *rsvp = i->blk->rsvp;
-
-		if (i->blk->type != i->type) {
-			DPU_ERROR("found incorrect block type %d on %d list\n",
-					i->blk->type, i->type);
-			return false;
-		}
-
-		if ((i->enc_id == 0) || (rsvp && rsvp->enc_id == i->enc_id)) {
+		if (i->enc_id == i->blk->enc_id) {
 			i->hw = i->blk->hw;
 			DPU_DEBUG("found type %d id %d for enc %d\n",
 					i->type, i->blk->id, i->enc_id);
@@ -208,34 +131,18 @@ static void _dpu_rm_hw_destroy(enum dpu_hw_blk_type type, void *hw)
 
 int dpu_rm_destroy(struct dpu_rm *rm)
 {
-
-	struct dpu_rm_rsvp *rsvp_cur, *rsvp_nxt;
 	struct dpu_rm_hw_blk *hw_cur, *hw_nxt;
 	enum dpu_hw_blk_type type;
 
-	if (!rm) {
-		DPU_ERROR("invalid rm\n");
-		return -EINVAL;
-	}
-
-	list_for_each_entry_safe(rsvp_cur, rsvp_nxt, &rm->rsvps, list) {
-		list_del(&rsvp_cur->list);
-		kfree(rsvp_cur);
-	}
-
-
 	for (type = 0; type < DPU_HW_BLK_MAX; type++) {
 		list_for_each_entry_safe(hw_cur, hw_nxt, &rm->hw_blks[type],
 				list) {
 			list_del(&hw_cur->list);
-			_dpu_rm_hw_destroy(hw_cur->type, hw_cur->hw);
+			_dpu_rm_hw_destroy(type, hw_cur->hw);
 			kfree(hw_cur);
 		}
 	}
 
-	dpu_hw_mdp_destroy(rm->hw_mdp);
-	rm->hw_mdp = NULL;
-
 	mutex_destroy(&rm->rm_lock);
 
 	return 0;
@@ -250,11 +157,8 @@ static int _dpu_rm_hw_blk_create(
 		void *hw_catalog_info)
 {
 	struct dpu_rm_hw_blk *blk;
-	struct dpu_hw_mdp *hw_mdp;
 	void *hw;
 
-	hw_mdp = rm->hw_mdp;
-
 	switch (type) {
 	case DPU_HW_BLK_LM:
 		hw = dpu_hw_lm_init(id, mmio, cat);
@@ -290,9 +194,9 @@ static int _dpu_rm_hw_blk_create(
 		return -ENOMEM;
 	}
 
-	blk->type = type;
 	blk->id = id;
 	blk->hw = hw;
+	blk->enc_id = 0;
 	list_add_tail(&blk->list, &rm->hw_blks[type]);
 
 	return 0;
@@ -300,13 +204,12 @@ static int _dpu_rm_hw_blk_create(
 
 int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_mdss_cfg *cat,
-		void __iomem *mmio,
-		struct drm_device *dev)
+		void __iomem *mmio)
 {
 	int rc, i;
 	enum dpu_hw_blk_type type;
 
-	if (!rm || !cat || !mmio || !dev) {
+	if (!rm || !cat || !mmio) {
 		DPU_ERROR("invalid kms\n");
 		return -EINVAL;
 	}
@@ -316,21 +219,9 @@ int dpu_rm_init(struct dpu_rm *rm,
 
 	mutex_init(&rm->rm_lock);
 
-	INIT_LIST_HEAD(&rm->rsvps);
 	for (type = 0; type < DPU_HW_BLK_MAX; type++)
 		INIT_LIST_HEAD(&rm->hw_blks[type]);
 
-	rm->dev = dev;
-
-	/* Some of the sub-blocks require an mdptop to be created */
-	rm->hw_mdp = dpu_hw_mdptop_init(MDP_TOP, mmio, cat);
-	if (IS_ERR_OR_NULL(rm->hw_mdp)) {
-		rc = PTR_ERR(rm->hw_mdp);
-		rm->hw_mdp = NULL;
-		DPU_ERROR("failed: mdp hw not available\n");
-		goto fail;
-	}
-
 	/* Interrogate HW catalog and create tracking items for hw blocks */
 	for (i = 0; i < cat->mixer_count; i++) {
 		struct dpu_lm_cfg *lm = &cat->mixer[i];
@@ -410,7 +301,7 @@ static bool _dpu_rm_needs_split_display(const struct msm_display_topology *top)
  *	proposed use case requirements, incl. hardwired dependent blocks like
  *	pingpong
  * @rm: dpu resource manager handle
- * @rsvp: reservation currently being created
+ * @enc_id: encoder id requesting for allocation
  * @reqs: proposed use case requirements
  * @lm: proposed layer mixer, function checks if lm, and all other hardwired
  *      blocks connected to the lm (pp) is available and appropriate
@@ -422,7 +313,7 @@ static bool _dpu_rm_needs_split_display(const struct msm_display_topology *top)
  */
 static bool _dpu_rm_check_lm_and_get_connected_blks(
 		struct dpu_rm *rm,
-		struct dpu_rm_rsvp *rsvp,
+		uint32_t enc_id,
 		struct dpu_rm_requirements *reqs,
 		struct dpu_rm_hw_blk *lm,
 		struct dpu_rm_hw_blk **pp,
@@ -449,7 +340,7 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(
 	}
 
 	/* Already reserved? */
-	if (RESERVED_BY_OTHER(lm, rsvp)) {
+	if (RESERVED_BY_OTHER(lm, enc_id)) {
 		DPU_DEBUG("lm %d already reserved\n", lm_cfg->id);
 		return false;
 	}
@@ -467,7 +358,7 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(
 		return false;
 	}
 
-	if (RESERVED_BY_OTHER(*pp, rsvp)) {
+	if (RESERVED_BY_OTHER(*pp, enc_id)) {
 		DPU_DEBUG("lm %d pp %d already reserved\n", lm->id,
 				(*pp)->id);
 		return false;
@@ -476,10 +367,8 @@ static bool _dpu_rm_check_lm_and_get_connected_blks(
 	return true;
 }
 
-static int _dpu_rm_reserve_lms(
-		struct dpu_rm *rm,
-		struct dpu_rm_rsvp *rsvp,
-		struct dpu_rm_requirements *reqs)
+static int _dpu_rm_reserve_lms(struct dpu_rm *rm, uint32_t enc_id,
+			       struct dpu_rm_requirements *reqs)
 
 {
 	struct dpu_rm_hw_blk *lm[MAX_BLOCKS];
@@ -504,7 +393,7 @@ static int _dpu_rm_reserve_lms(
 		lm[lm_count] = iter_i.blk;
 
 		if (!_dpu_rm_check_lm_and_get_connected_blks(
-				rm, rsvp, reqs, lm[lm_count],
+				rm, enc_id, reqs, lm[lm_count],
 				&pp[lm_count], NULL))
 			continue;
 
@@ -519,7 +408,7 @@ static int _dpu_rm_reserve_lms(
 				continue;
 
 			if (!_dpu_rm_check_lm_and_get_connected_blks(
-					rm, rsvp, reqs, iter_j.blk,
+					rm, enc_id, reqs, iter_j.blk,
 					&pp[lm_count], iter_i.blk))
 				continue;
 
@@ -537,11 +426,10 @@ static int _dpu_rm_reserve_lms(
 		if (!lm[i])
 			break;
 
-		lm[i]->rsvp_nxt = rsvp;
-		pp[i]->rsvp_nxt = rsvp;
+		lm[i]->enc_id = enc_id;
+		pp[i]->enc_id = enc_id;
 
-		trace_dpu_rm_reserve_lms(lm[i]->id, lm[i]->type, rsvp->enc_id,
-					 pp[i]->id);
+		trace_dpu_rm_reserve_lms(lm[i]->id, enc_id, pp[i]->id);
 	}
 
 	return rc;
@@ -549,7 +437,7 @@ static int _dpu_rm_reserve_lms(
 
 static int _dpu_rm_reserve_ctls(
 		struct dpu_rm *rm,
-		struct dpu_rm_rsvp *rsvp,
+		uint32_t enc_id,
 		const struct msm_display_topology *top)
 {
 	struct dpu_rm_hw_blk *ctls[MAX_BLOCKS];
@@ -570,7 +458,7 @@ static int _dpu_rm_reserve_ctls(
 		unsigned long features = ctl->caps->features;
 		bool has_split_display;
 
-		if (RESERVED_BY_OTHER(iter.blk, rsvp))
+		if (RESERVED_BY_OTHER(iter.blk, enc_id))
 			continue;
 
 		has_split_display = BIT(DPU_CTL_SPLIT_DISPLAY) & features;
@@ -591,9 +479,8 @@ static int _dpu_rm_reserve_ctls(
 		return -ENAVAIL;
 
 	for (i = 0; i < ARRAY_SIZE(ctls) && i < num_ctls; i++) {
-		ctls[i]->rsvp_nxt = rsvp;
-		trace_dpu_rm_reserve_ctls(ctls[i]->id, ctls[i]->type,
-					  rsvp->enc_id);
+		ctls[i]->enc_id = enc_id;
+		trace_dpu_rm_reserve_ctls(ctls[i]->id, enc_id);
 	}
 
 	return 0;
@@ -601,7 +488,7 @@ static int _dpu_rm_reserve_ctls(
 
 static int _dpu_rm_reserve_intf(
 		struct dpu_rm *rm,
-		struct dpu_rm_rsvp *rsvp,
+		uint32_t enc_id,
 		uint32_t id,
 		enum dpu_hw_blk_type type)
 {
@@ -614,14 +501,13 @@ static int _dpu_rm_reserve_intf(
 		if (iter.blk->id != id)
 			continue;
 
-		if (RESERVED_BY_OTHER(iter.blk, rsvp)) {
+		if (RESERVED_BY_OTHER(iter.blk, enc_id)) {
 			DPU_ERROR("type %d id %d already reserved\n", type, id);
 			return -ENAVAIL;
 		}
 
-		iter.blk->rsvp_nxt = rsvp;
-		trace_dpu_rm_reserve_intf(iter.blk->id, iter.blk->type,
-					  rsvp->enc_id);
+		iter.blk->enc_id = enc_id;
+		trace_dpu_rm_reserve_intf(iter.blk->id, enc_id);
 		break;
 	}
 
@@ -636,7 +522,7 @@ static int _dpu_rm_reserve_intf(
 
 static int _dpu_rm_reserve_intf_related_hw(
 		struct dpu_rm *rm,
-		struct dpu_rm_rsvp *rsvp,
+		uint32_t enc_id,
 		struct dpu_encoder_hw_resources *hw_res)
 {
 	int i, ret = 0;
@@ -646,7 +532,7 @@ static int _dpu_rm_reserve_intf_related_hw(
 		if (hw_res->intfs[i] == INTF_MODE_NONE)
 			continue;
 		id = i + INTF_0;
-		ret = _dpu_rm_reserve_intf(rm, rsvp, id,
+		ret = _dpu_rm_reserve_intf(rm, enc_id, id,
 				DPU_HW_BLK_INTF);
 		if (ret)
 			return ret;
@@ -655,33 +541,27 @@ static int _dpu_rm_reserve_intf_related_hw(
 	return ret;
 }
 
-static int _dpu_rm_make_next_rsvp(
+static int _dpu_rm_make_reservation(
 		struct dpu_rm *rm,
 		struct drm_encoder *enc,
 		struct drm_crtc_state *crtc_state,
-		struct dpu_rm_rsvp *rsvp,
 		struct dpu_rm_requirements *reqs)
 {
 	int ret;
 
-	/* Create reservation info, tag reserved blocks with it as we go */
-	rsvp->seq = ++rm->rsvp_next_seq;
-	rsvp->enc_id = enc->base.id;
-	list_add_tail(&rsvp->list, &rm->rsvps);
-
-	ret = _dpu_rm_reserve_lms(rm, rsvp, reqs);
+	ret = _dpu_rm_reserve_lms(rm, enc->base.id, reqs);
 	if (ret) {
 		DPU_ERROR("unable to find appropriate mixers\n");
 		return ret;
 	}
 
-	ret = _dpu_rm_reserve_ctls(rm, rsvp, &reqs->topology);
+	ret = _dpu_rm_reserve_ctls(rm, enc->base.id, &reqs->topology);
 	if (ret) {
 		DPU_ERROR("unable to find appropriate CTL\n");
 		return ret;
 	}
 
-	ret = _dpu_rm_reserve_intf_related_hw(rm, rsvp, &reqs->hw_res);
+	ret = _dpu_rm_reserve_intf_related_hw(rm, enc->base.id, &reqs->hw_res);
 	if (ret)
 		return ret;
 
@@ -706,108 +586,31 @@ static int _dpu_rm_populate_requirements(
 	return 0;
 }
 
-static struct dpu_rm_rsvp *_dpu_rm_get_rsvp(
-		struct dpu_rm *rm,
-		struct drm_encoder *enc)
+static void _dpu_rm_release_reservation(struct dpu_rm *rm, uint32_t enc_id)
 {
-	struct dpu_rm_rsvp *i;
-
-	if (!rm || !enc) {
-		DPU_ERROR("invalid params\n");
-		return NULL;
-	}
-
-	if (list_empty(&rm->rsvps))
-		return NULL;
-
-	list_for_each_entry(i, &rm->rsvps, list)
-		if (i->enc_id == enc->base.id)
-			return i;
-
-	return NULL;
-}
-
-/**
- * _dpu_rm_release_rsvp - release resources and release a reservation
- * @rm:	KMS handle
- * @rsvp:	RSVP pointer to release and release resources for
- */
-static void _dpu_rm_release_rsvp(struct dpu_rm *rm, struct dpu_rm_rsvp *rsvp)
-{
-	struct dpu_rm_rsvp *rsvp_c, *rsvp_n;
 	struct dpu_rm_hw_blk *blk;
 	enum dpu_hw_blk_type type;
 
-	if (!rsvp)
-		return;
-
-	DPU_DEBUG("rel rsvp %d enc %d\n", rsvp->seq, rsvp->enc_id);
-
-	list_for_each_entry_safe(rsvp_c, rsvp_n, &rm->rsvps, list) {
-		if (rsvp == rsvp_c) {
-			list_del(&rsvp_c->list);
-			break;
-		}
-	}
-
 	for (type = 0; type < DPU_HW_BLK_MAX; type++) {
 		list_for_each_entry(blk, &rm->hw_blks[type], list) {
-			if (blk->rsvp == rsvp) {
-				blk->rsvp = NULL;
-				DPU_DEBUG("rel rsvp %d enc %d %d %d\n",
-						rsvp->seq, rsvp->enc_id,
-						blk->type, blk->id);
-			}
-			if (blk->rsvp_nxt == rsvp) {
-				blk->rsvp_nxt = NULL;
-				DPU_DEBUG("rel rsvp_nxt %d enc %d %d %d\n",
-						rsvp->seq, rsvp->enc_id,
-						blk->type, blk->id);
+			if (blk->enc_id == enc_id) {
+				blk->enc_id = 0;
+				DPU_DEBUG("rel enc %d %d %d\n", enc_id,
+					  type, blk->id);
 			}
 		}
 	}
-
-	kfree(rsvp);
 }
 
 void dpu_rm_release(struct dpu_rm *rm, struct drm_encoder *enc)
 {
-	struct dpu_rm_rsvp *rsvp;
-
-	if (!rm || !enc) {
-		DPU_ERROR("invalid params\n");
-		return;
-	}
-
 	mutex_lock(&rm->rm_lock);
 
-	rsvp = _dpu_rm_get_rsvp(rm, enc);
-	if (!rsvp) {
-		DPU_ERROR("failed to find rsvp for enc %d\n", enc->base.id);
-		goto end;
-	}
+	_dpu_rm_release_reservation(rm, enc->base.id);
 
-	_dpu_rm_release_rsvp(rm, rsvp);
-end:
 	mutex_unlock(&rm->rm_lock);
 }
 
-static void _dpu_rm_commit_rsvp(struct dpu_rm *rm, struct dpu_rm_rsvp *rsvp)
-{
-	struct dpu_rm_hw_blk *blk;
-	enum dpu_hw_blk_type type;
-
-	/* Swap next rsvp to be the active */
-	for (type = 0; type < DPU_HW_BLK_MAX; type++) {
-		list_for_each_entry(blk, &rm->hw_blks[type], list) {
-			if (blk->rsvp_nxt) {
-				blk->rsvp = blk->rsvp_nxt;
-				blk->rsvp_nxt = NULL;
-			}
-		}
-	}
-}
-
 int dpu_rm_reserve(
 		struct dpu_rm *rm,
 		struct drm_encoder *enc,
@@ -815,7 +618,6 @@ int dpu_rm_reserve(
 		struct msm_display_topology topology,
 		bool test_only)
 {
-	struct dpu_rm_rsvp *rsvp_cur, *rsvp_nxt;
 	struct dpu_rm_requirements reqs;
 	int ret;
 
@@ -828,8 +630,6 @@ int dpu_rm_reserve(
 
 	mutex_lock(&rm->rm_lock);
 
-	_dpu_rm_print_rsvps(rm, DPU_RM_STAGE_BEGIN);
-
 	ret = _dpu_rm_populate_requirements(rm, enc, crtc_state, &reqs,
 					    topology);
 	if (ret) {
@@ -837,50 +637,17 @@ int dpu_rm_reserve(
 		goto end;
 	}
 
-	/*
-	 * We only support one active reservation per-hw-block. But to implement
-	 * transactional semantics for test-only, and for allowing failure while
-	 * modifying your existing reservation, over the course of this
-	 * function we can have two reservations:
-	 * Current: Existing reservation
-	 * Next: Proposed reservation. The proposed reservation may fail, or may
-	 *       be discarded if in test-only mode.
-	 * If reservation is successful, and we're not in test-only, then we
-	 * replace the current with the next.
-	 */
-	rsvp_nxt = kzalloc(sizeof(*rsvp_nxt), GFP_KERNEL);
-	if (!rsvp_nxt) {
-		ret = -ENOMEM;
-		goto end;
-	}
-
-	rsvp_cur = _dpu_rm_get_rsvp(rm, enc);
-
-	/* Check the proposed reservation, store it in hw's "next" field */
-	ret = _dpu_rm_make_next_rsvp(rm, enc, crtc_state, rsvp_nxt, &reqs);
-
-	_dpu_rm_print_rsvps(rm, DPU_RM_STAGE_AFTER_RSVPNEXT);
-
+	ret = _dpu_rm_make_reservation(rm, enc, crtc_state, &reqs);
 	if (ret) {
 		DPU_ERROR("failed to reserve hw resources: %d\n", ret);
-		_dpu_rm_release_rsvp(rm, rsvp_nxt);
+		_dpu_rm_release_reservation(rm, enc->base.id);
 	} else if (test_only) {
-		/*
-		 * Normally, if test_only, test the reservation and then undo
-		 * However, if the user requests LOCK, then keep the reservation
-		 * made during the atomic_check phase.
-		 */
-		DPU_DEBUG("test_only: discard test rsvp[s%de%d]\n",
-				rsvp_nxt->seq, rsvp_nxt->enc_id);
-		_dpu_rm_release_rsvp(rm, rsvp_nxt);
-	} else {
-		_dpu_rm_release_rsvp(rm, rsvp_cur);
-
-		_dpu_rm_commit_rsvp(rm, rsvp_nxt);
+		 /* test_only: test the reservation and then undo */
+		DPU_DEBUG("test_only: discard test [enc: %d]\n",
+				enc->base.id);
+		_dpu_rm_release_reservation(rm, enc->base.id);
 	}
 
-	_dpu_rm_print_rsvps(rm, DPU_RM_STAGE_FINAL);
-
 end:
 	mutex_unlock(&rm->rm_lock);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
index b8273bd23801..381611fc5877 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_rm.h
@@ -22,22 +22,14 @@
 
 /**
  * struct dpu_rm - DPU dynamic hardware resource manager
- * @dev: device handle for event logging purposes
- * @rsvps: list of hardware reservations by each crtc->encoder->connector
  * @hw_blks: array of lists of hardware resources present in the system, one
  *	list per type of hardware block
- * @hw_mdp: hardware object for mdp_top
  * @lm_max_width: cached layer mixer maximum width
- * @rsvp_next_seq: sequence number for next reservation for debugging purposes
  * @rm_lock: resource manager mutex
  */
 struct dpu_rm {
-	struct drm_device *dev;
-	struct list_head rsvps;
 	struct list_head hw_blks[DPU_HW_BLK_MAX];
-	struct dpu_hw_mdp *hw_mdp;
 	uint32_t lm_max_width;
-	uint32_t rsvp_next_seq;
 	struct mutex rm_lock;
 };
 
@@ -67,13 +59,11 @@ struct dpu_rm_hw_iter {
  * @rm: DPU Resource Manager handle
  * @cat: Pointer to hardware catalog
  * @mmio: mapped register io address of MDP
- * @dev: device handle for event logging purposes
  * @Return: 0 on Success otherwise -ERROR
  */
 int dpu_rm_init(struct dpu_rm *rm,
 		struct dpu_mdss_cfg *cat,
-		void __iomem *mmio,
-		struct drm_device *dev);
+		void __iomem *mmio);
 
 /**
  * dpu_rm_destroy - Free all memory allocated by dpu_rm_init
@@ -112,14 +102,6 @@ int dpu_rm_reserve(struct dpu_rm *rm,
 void dpu_rm_release(struct dpu_rm *rm, struct drm_encoder *enc);
 
 /**
- * dpu_rm_get_mdp - Retrieve HW block for MDP TOP.
- *	This is never reserved, and is usable by any display.
- * @rm: DPU Resource Manager handle
- * @Return: Pointer to hw block or NULL
- */
-struct dpu_hw_mdp *dpu_rm_get_mdp(struct dpu_rm *rm);
-
-/**
  * dpu_rm_init_hw_iter - setup given iterator for new iteration over hw list
  *	using dpu_rm_get_hw
  * @iter: iter object to initialize
@@ -144,12 +126,4 @@ void dpu_rm_init_hw_iter(
  * @Return: true on match found, false on no match found
  */
 bool dpu_rm_get_hw(struct dpu_rm *rm, struct dpu_rm_hw_iter *iter);
-
-/**
- * dpu_rm_check_property_topctl - validate property bitmask before it is set
- * @val: user's proposed topology control bitmask
- * @Return: 0 on success or error
- */
-int dpu_rm_check_property_topctl(uint64_t val);
-
 #endif /* __DPU_RM_H__ */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
index c78b521ceda1..8bb46090bd16 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_trace.h
@@ -831,48 +831,42 @@ TRACE_EVENT(dpu_plane_disable,
 );
 
 DECLARE_EVENT_CLASS(dpu_rm_iter_template,
-	TP_PROTO(uint32_t id, enum dpu_hw_blk_type type, uint32_t enc_id),
-	TP_ARGS(id, type, enc_id),
+	TP_PROTO(uint32_t id, uint32_t enc_id),
+	TP_ARGS(id, enc_id),
 	TP_STRUCT__entry(
 		__field(	uint32_t,		id	)
-		__field(	enum dpu_hw_blk_type,	type	)
 		__field(	uint32_t,		enc_id	)
 	),
 	TP_fast_assign(
 		__entry->id = id;
-		__entry->type = type;
 		__entry->enc_id = enc_id;
 	),
-	TP_printk("id:%d type:%d enc_id:%u", __entry->id, __entry->type,
-		  __entry->enc_id)
+	TP_printk("id:%d enc_id:%u", __entry->id, __entry->enc_id)
 );
 DEFINE_EVENT(dpu_rm_iter_template, dpu_rm_reserve_intf,
-	TP_PROTO(uint32_t id, enum dpu_hw_blk_type type, uint32_t enc_id),
-	TP_ARGS(id, type, enc_id)
+	TP_PROTO(uint32_t id, uint32_t enc_id),
+	TP_ARGS(id, enc_id)
 );
 DEFINE_EVENT(dpu_rm_iter_template, dpu_rm_reserve_ctls,
-	TP_PROTO(uint32_t id, enum dpu_hw_blk_type type, uint32_t enc_id),
-	TP_ARGS(id, type, enc_id)
+	TP_PROTO(uint32_t id, uint32_t enc_id),
+	TP_ARGS(id, enc_id)
 );
 
 TRACE_EVENT(dpu_rm_reserve_lms,
-	TP_PROTO(uint32_t id, enum dpu_hw_blk_type type, uint32_t enc_id,
-		 uint32_t pp_id),
-	TP_ARGS(id, type, enc_id, pp_id),
+	TP_PROTO(uint32_t id, uint32_t enc_id, uint32_t pp_id),
+	TP_ARGS(id, enc_id, pp_id),
 	TP_STRUCT__entry(
 		__field(	uint32_t,		id	)
-		__field(	enum dpu_hw_blk_type,	type	)
 		__field(	uint32_t,		enc_id	)
 		__field(	uint32_t,		pp_id	)
 	),
 	TP_fast_assign(
 		__entry->id = id;
-		__entry->type = type;
 		__entry->enc_id = enc_id;
 		__entry->pp_id = pp_id;
 	),
-	TP_printk("id:%d type:%d enc_id:%u pp_id:%u", __entry->id,
-		  __entry->type, __entry->enc_id, __entry->pp_id)
+	TP_printk("id:%d enc_id:%u pp_id:%u", __entry->id,
+		  __entry->enc_id, __entry->pp_id)
 );
 
 TRACE_EVENT(dpu_vbif_wait_xin_halt_fail,
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 8747fb32a106..0bdd93648761 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -207,62 +207,44 @@ u32 msm_readl(const void __iomem *addr)
 	return val;
 }
 
-struct vblank_event {
-	struct list_head node;
+struct msm_vblank_work {
+	struct work_struct work;
 	int crtc_id;
 	bool enable;
+	struct msm_drm_private *priv;
 };
 
-static void vblank_ctrl_worker(struct kthread_work *work)
+static void vblank_ctrl_worker(struct work_struct *work)
 {
-	struct msm_vblank_ctrl *vbl_ctrl = container_of(work,
-						struct msm_vblank_ctrl, work);
-	struct msm_drm_private *priv = container_of(vbl_ctrl,
-					struct msm_drm_private, vblank_ctrl);
+	struct msm_vblank_work *vbl_work = container_of(work,
+						struct msm_vblank_work, work);
+	struct msm_drm_private *priv = vbl_work->priv;
 	struct msm_kms *kms = priv->kms;
-	struct vblank_event *vbl_ev, *tmp;
-	unsigned long flags;
-
-	spin_lock_irqsave(&vbl_ctrl->lock, flags);
-	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node) {
-		list_del(&vbl_ev->node);
-		spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
-
-		if (vbl_ev->enable)
-			kms->funcs->enable_vblank(kms,
-						priv->crtcs[vbl_ev->crtc_id]);
-		else
-			kms->funcs->disable_vblank(kms,
-						priv->crtcs[vbl_ev->crtc_id]);
-
-		kfree(vbl_ev);
 
-		spin_lock_irqsave(&vbl_ctrl->lock, flags);
-	}
+	if (vbl_work->enable)
+		kms->funcs->enable_vblank(kms, priv->crtcs[vbl_work->crtc_id]);
+	else
+		kms->funcs->disable_vblank(kms,	priv->crtcs[vbl_work->crtc_id]);
 
-	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
+	kfree(vbl_work);
 }
 
 static int vblank_ctrl_queue_work(struct msm_drm_private *priv,
 					int crtc_id, bool enable)
 {
-	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
-	struct vblank_event *vbl_ev;
-	unsigned long flags;
+	struct msm_vblank_work *vbl_work;
 
-	vbl_ev = kzalloc(sizeof(*vbl_ev), GFP_ATOMIC);
-	if (!vbl_ev)
+	vbl_work = kzalloc(sizeof(*vbl_work), GFP_ATOMIC);
+	if (!vbl_work)
 		return -ENOMEM;
 
-	vbl_ev->crtc_id = crtc_id;
-	vbl_ev->enable = enable;
+	INIT_WORK(&vbl_work->work, vblank_ctrl_worker);
 
-	spin_lock_irqsave(&vbl_ctrl->lock, flags);
-	list_add_tail(&vbl_ev->node, &vbl_ctrl->event_list);
-	spin_unlock_irqrestore(&vbl_ctrl->lock, flags);
+	vbl_work->crtc_id = crtc_id;
+	vbl_work->enable = enable;
+	vbl_work->priv = priv;
 
-	kthread_queue_work(&priv->disp_thread[crtc_id].worker,
-			&vbl_ctrl->work);
+	queue_work(priv->wq, &vbl_work->work);
 
 	return 0;
 }
@@ -274,31 +256,20 @@ static int msm_drm_uninit(struct device *dev)
 	struct msm_drm_private *priv = ddev->dev_private;
 	struct msm_kms *kms = priv->kms;
 	struct msm_mdss *mdss = priv->mdss;
-	struct msm_vblank_ctrl *vbl_ctrl = &priv->vblank_ctrl;
-	struct vblank_event *vbl_ev, *tmp;
 	int i;
 
 	/* We must cancel and cleanup any pending vblank enable/disable
 	 * work before drm_irq_uninstall() to avoid work re-enabling an
 	 * irq after uninstall has disabled it.
 	 */
-	kthread_flush_work(&vbl_ctrl->work);
-	list_for_each_entry_safe(vbl_ev, tmp, &vbl_ctrl->event_list, node) {
-		list_del(&vbl_ev->node);
-		kfree(vbl_ev);
-	}
 
-	/* clean up display commit/event worker threads */
-	for (i = 0; i < priv->num_crtcs; i++) {
-		if (priv->disp_thread[i].thread) {
-			kthread_flush_worker(&priv->disp_thread[i].worker);
-			kthread_stop(priv->disp_thread[i].thread);
-			priv->disp_thread[i].thread = NULL;
-		}
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
 
+	/* clean up event worker threads */
+	for (i = 0; i < priv->num_crtcs; i++) {
 		if (priv->event_thread[i].thread) {
-			kthread_flush_worker(&priv->event_thread[i].worker);
-			kthread_stop(priv->event_thread[i].thread);
+			kthread_destroy_worker(&priv->event_thread[i].worker);
 			priv->event_thread[i].thread = NULL;
 		}
 	}
@@ -323,9 +294,6 @@ static int msm_drm_uninit(struct device *dev)
 	drm_irq_uninstall(ddev);
 	pm_runtime_put_sync(dev);
 
-	flush_workqueue(priv->wq);
-	destroy_workqueue(priv->wq);
-
 	if (kms && kms->funcs)
 		kms->funcs->destroy(kms);
 
@@ -490,9 +458,6 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	priv->wq = alloc_ordered_workqueue("msm", 0);
 
 	INIT_LIST_HEAD(&priv->inactive_list);
-	INIT_LIST_HEAD(&priv->vblank_ctrl.event_list);
-	kthread_init_work(&priv->vblank_ctrl.work, vblank_ctrl_worker);
-	spin_lock_init(&priv->vblank_ctrl.lock);
 
 	drm_mode_config_init(ddev);
 
@@ -554,27 +519,6 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	 */
 	param.sched_priority = 16;
 	for (i = 0; i < priv->num_crtcs; i++) {
-
-		/* initialize display thread */
-		priv->disp_thread[i].crtc_id = priv->crtcs[i]->base.id;
-		kthread_init_worker(&priv->disp_thread[i].worker);
-		priv->disp_thread[i].dev = ddev;
-		priv->disp_thread[i].thread =
-			kthread_run(kthread_worker_fn,
-				&priv->disp_thread[i].worker,
-				"crtc_commit:%d", priv->disp_thread[i].crtc_id);
-		if (IS_ERR(priv->disp_thread[i].thread)) {
-			DRM_DEV_ERROR(dev, "failed to create crtc_commit kthread\n");
-			priv->disp_thread[i].thread = NULL;
-			goto err_msm_uninit;
-		}
-
-		ret = sched_setscheduler(priv->disp_thread[i].thread,
-					 SCHED_FIFO, &param);
-		if (ret)
-			dev_warn(dev, "disp_thread set priority failed: %d\n",
-				 ret);
-
 		/* initialize event thread */
 		priv->event_thread[i].crtc_id = priv->crtcs[i]->base.id;
 		kthread_init_worker(&priv->event_thread[i].worker);
@@ -589,13 +533,6 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 			goto err_msm_uninit;
 		}
 
-		/**
-		 * event thread should also run at same priority as disp_thread
-		 * because it is handling frame_done events. A lower priority
-		 * event thread and higher priority disp_thread can causes
-		 * frame_pending counters beyond 2. This can lead to commit
-		 * failure at crtc commit level.
-		 */
 		ret = sched_setscheduler(priv->event_thread[i].thread,
 					 SCHED_FIFO, &param);
 		if (ret)
@@ -914,8 +851,12 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 			ret = -EINVAL;
 			break;
 		}
-		ret = copy_from_user(msm_obj->name,
-			u64_to_user_ptr(args->value), args->len);
+		if (copy_from_user(msm_obj->name, u64_to_user_ptr(args->value),
+				   args->len)) {
+			msm_obj->name[0] = '\0';
+			ret = -EFAULT;
+			break;
+		}
 		msm_obj->name[args->len] = '\0';
 		for (i = 0; i < args->len; i++) {
 			if (!isprint(msm_obj->name[i])) {
@@ -931,8 +872,9 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		}
 		args->len = strlen(msm_obj->name);
 		if (args->value) {
-			ret = copy_to_user(u64_to_user_ptr(args->value),
-					msm_obj->name, args->len);
+			if (copy_to_user(u64_to_user_ptr(args->value),
+					 msm_obj->name, args->len))
+				ret = -EFAULT;
 		}
 		break;
 	}
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 4e0c6c2f9a86..c56dade2c1dc 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -77,12 +77,6 @@ enum msm_mdp_plane_property {
 	PLANE_PROP_MAX_NUM
 };
 
-struct msm_vblank_ctrl {
-	struct kthread_work work;
-	struct list_head event_list;
-	spinlock_t lock;
-};
-
 #define MSM_GPU_MAX_RINGS 4
 #define MAX_H_TILES_PER_DISPLAY 2
 
@@ -126,7 +120,7 @@ struct msm_display_topology {
 
 /**
  * struct msm_display_info - defines display properties
- * @intf_type:          DRM_MODE_CONNECTOR_ display type
+ * @intf_type:          DRM_MODE_ENCODER_ type
  * @capabilities:       Bitmask of display flags
  * @num_of_h_tiles:     Number of horizontal tiles in case of split interface
  * @h_tile_instance:    Controller instance used per tile. Number of elements is
@@ -199,7 +193,6 @@ struct msm_drm_private {
 	unsigned int num_crtcs;
 	struct drm_crtc *crtcs[MAX_CRTCS];
 
-	struct msm_drm_thread disp_thread[MAX_CRTCS];
 	struct msm_drm_thread event_thread[MAX_CRTCS];
 
 	unsigned int num_encoders;
@@ -228,7 +221,6 @@ struct msm_drm_private {
 	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;
 
-	struct msm_vblank_ctrl vblank_ctrl;
 	struct drm_atomic_state *pm_state;
 };
 
@@ -250,7 +242,8 @@ void msm_gem_purge_vma(struct msm_gem_address_space *aspace,
 void msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 		struct msm_gem_vma *vma);
 int msm_gem_map_vma(struct msm_gem_address_space *aspace,
-		struct msm_gem_vma *vma, struct sg_table *sgt, int npages);
+		struct msm_gem_vma *vma, int prot,
+		struct sg_table *sgt, int npages);
 void msm_gem_close_vma(struct msm_gem_address_space *aspace,
 		struct msm_gem_vma *vma);
 
@@ -333,6 +326,7 @@ void msm_gem_kernel_put(struct drm_gem_object *bo,
 struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 		struct dma_buf *dmabuf, struct sg_table *sgt);
 
+__printf(2, 3)
 void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...);
 
 int msm_framebuffer_prepare(struct drm_framebuffer *fb,
@@ -396,12 +390,14 @@ void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
 int msm_debugfs_late_init(struct drm_device *dev);
 int msm_rd_debugfs_init(struct drm_minor *minor);
 void msm_rd_debugfs_cleanup(struct msm_drm_private *priv);
+__printf(3, 4)
 void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
 		const char *fmt, ...);
 int msm_perf_debugfs_init(struct drm_minor *minor);
 void msm_perf_debugfs_cleanup(struct msm_drm_private *priv);
 #else
 static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; }
+__printf(3, 4)
 static inline void msm_rd_dump_submit(struct msm_rd_state *rd, struct msm_gem_submit *submit,
 		const char *fmt, ...) {}
 static inline void msm_rd_debugfs_cleanup(struct msm_drm_private *priv) {}
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 20c979a7fc9c..18ca651ab942 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -391,6 +391,10 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj,
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	struct msm_gem_vma *vma;
 	struct page **pages;
+	int prot = IOMMU_READ;
+
+	if (!(msm_obj->flags & MSM_BO_GPU_READONLY))
+		prot |= IOMMU_WRITE;
 
 	WARN_ON(!mutex_is_locked(&msm_obj->lock));
 
@@ -405,8 +409,8 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj,
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
-	return msm_gem_map_vma(aspace, vma, msm_obj->sgt,
-			obj->size >> PAGE_SHIFT);
+	return msm_gem_map_vma(aspace, vma, prot,
+			msm_obj->sgt, obj->size >> PAGE_SHIFT);
 }
 
 /* get iova and pin it. Should have a matching put */
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 557360788084..49c04829cf34 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -68,7 +68,8 @@ void msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 
 int
 msm_gem_map_vma(struct msm_gem_address_space *aspace,
-		struct msm_gem_vma *vma, struct sg_table *sgt, int npages)
+		struct msm_gem_vma *vma, int prot,
+		struct sg_table *sgt, int npages)
 {
 	unsigned size = npages << PAGE_SHIFT;
 	int ret = 0;
@@ -86,7 +87,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace,
 
 	if (aspace->mmu)
 		ret = aspace->mmu->funcs->map(aspace->mmu, vma->iova, sgt,
-				size, IOMMU_READ | IOMMU_WRITE);
+				size, prot);
 
 	if (ret)
 		vma->mapped = false;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 5f3eff304355..10babd18e286 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -900,7 +900,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	}
 
 	/* Get Interrupt: */
-	gpu->irq = platform_get_irq_byname(pdev, config->irqname);
+	gpu->irq = platform_get_irq(pdev, 0);
 	if (gpu->irq < 0) {
 		ret = gpu->irq;
 		DRM_DEV_ERROR(drm->dev, "failed to get irq: %d\n", ret);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index efb49bb64191..ca17086f72c9 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -31,7 +31,6 @@ struct msm_gpu_state;
 
 struct msm_gpu_config {
 	const char *ioname;
-	const char *irqname;
 	uint64_t va_start;
 	uint64_t va_end;
 	unsigned int nr_rings;
@@ -63,7 +62,7 @@ struct msm_gpu_funcs {
 	struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu);
 	void (*recover)(struct msm_gpu *gpu);
 	void (*destroy)(struct msm_gpu *gpu);
-#ifdef CONFIG_DEBUG_FS
+#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
 	/* show GPU status in debugfs: */
 	void (*show)(struct msm_gpu *gpu, struct msm_gpu_state *state,
 			struct drm_printer *p);
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 90e9d0a48dc0..d21172933d92 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -115,7 +115,9 @@ static void rd_write(struct msm_rd_state *rd, const void *buf, int sz)
 		char *fptr = &fifo->buf[fifo->head];
 		int n;
 
-		wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0);
+		wait_event(rd->fifo_event, circ_space(&rd->fifo) > 0 || !rd->open);
+		if (!rd->open)
+			return;
 
 		/* Note that smp_load_acquire() is not strictly required
 		 * as CIRC_SPACE_TO_END() does not access the tail more
@@ -213,7 +215,10 @@ out:
 static int rd_release(struct inode *inode, struct file *file)
 {
 	struct msm_rd_state *rd = inode->i_private;
+
 	rd->open = false;
+	wake_up_all(&rd->fifo_event);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index b17843dd050d..581404e6544d 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -30,6 +30,8 @@ nouveau-y += nouveau_vga.o
 # DRM - memory management
 nouveau-y += nouveau_bo.o
 nouveau-y += nouveau_gem.o
+nouveau-$(CONFIG_DRM_NOUVEAU_SVM) += nouveau_svm.o
+nouveau-$(CONFIG_DRM_NOUVEAU_SVM) += nouveau_dmem.o
 nouveau-y += nouveau_mem.o
 nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 432c440223bb..00cd9ab8948d 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -71,3 +71,15 @@ config DRM_NOUVEAU_BACKLIGHT
 	help
 	  Say Y here if you want to control the backlight of your display
 	  (e.g. a laptop panel).
+
+config DRM_NOUVEAU_SVM
+	bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
+	depends on ARCH_HAS_HMM
+	depends on DRM_NOUVEAU
+	depends on STAGING
+	select HMM_MIRROR
+	select DEVICE_PRIVATE
+	default n
+	help
+	  Say Y here if you want to enable experimental support for
+	  Shared Virtual Memory (SVM).
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 2c569e264df3..f22f01020625 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -40,6 +40,7 @@
 #include "nvreg.h"
 #include "nouveau_fbcon.h"
 #include "disp.h"
+#include "nouveau_dma.h"
 
 #include <subdev/bios/pll.h>
 #include <subdev/clk.h>
@@ -1077,12 +1078,223 @@ nouveau_crtc_set_config(struct drm_mode_set *set,
 	return ret;
 }
 
+struct nv04_page_flip_state {
+	struct list_head head;
+	struct drm_pending_vblank_event *event;
+	struct drm_crtc *crtc;
+	int bpp, pitch;
+	u64 offset;
+};
+
+static int
+nv04_finish_page_flip(struct nouveau_channel *chan,
+		      struct nv04_page_flip_state *ps)
+{
+	struct nouveau_fence_chan *fctx = chan->fence;
+	struct nouveau_drm *drm = chan->drm;
+	struct drm_device *dev = drm->dev;
+	struct nv04_page_flip_state *s;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+
+	if (list_empty(&fctx->flip)) {
+		NV_ERROR(drm, "unexpected pageflip\n");
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+		return -EINVAL;
+	}
+
+	s = list_first_entry(&fctx->flip, struct nv04_page_flip_state, head);
+	if (s->event) {
+		drm_crtc_arm_vblank_event(s->crtc, s->event);
+	} else {
+		/* Give up ownership of vblank for page-flipped crtc */
+		drm_crtc_vblank_put(s->crtc);
+	}
+
+	list_del(&s->head);
+	if (ps)
+		*ps = *s;
+	kfree(s);
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+	return 0;
+}
+
+int
+nv04_flip_complete(struct nvif_notify *notify)
+{
+	struct nouveau_cli *cli = (void *)notify->object->client;
+	struct nouveau_drm *drm = cli->drm;
+	struct nouveau_channel *chan = drm->channel;
+	struct nv04_page_flip_state state;
+
+	if (!nv04_finish_page_flip(chan, &state)) {
+		nv_set_crtc_base(drm->dev, drm_crtc_index(state.crtc),
+				 state.offset + state.crtc->y *
+				 state.pitch + state.crtc->x *
+				 state.bpp / 8);
+	}
+
+	return NVIF_NOTIFY_KEEP;
+}
+
+static int
+nv04_page_flip_emit(struct nouveau_channel *chan,
+		    struct nouveau_bo *old_bo,
+		    struct nouveau_bo *new_bo,
+		    struct nv04_page_flip_state *s,
+		    struct nouveau_fence **pfence)
+{
+	struct nouveau_fence_chan *fctx = chan->fence;
+	struct nouveau_drm *drm = chan->drm;
+	struct drm_device *dev = drm->dev;
+	unsigned long flags;
+	int ret;
+
+	/* Queue it to the pending list */
+	spin_lock_irqsave(&dev->event_lock, flags);
+	list_add_tail(&s->head, &fctx->flip);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	/* Synchronize with the old framebuffer */
+	ret = nouveau_fence_sync(old_bo, chan, false, false);
+	if (ret)
+		goto fail;
+
+	/* Emit the pageflip */
+	ret = RING_SPACE(chan, 2);
+	if (ret)
+		goto fail;
+
+	BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
+	OUT_RING  (chan, 0x00000000);
+	FIRE_RING (chan);
+
+	ret = nouveau_fence_new(chan, false, pfence);
+	if (ret)
+		goto fail;
+
+	return 0;
+fail:
+	spin_lock_irqsave(&dev->event_lock, flags);
+	list_del(&s->head);
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+	return ret;
+}
+
+static int
+nv04_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		    struct drm_pending_vblank_event *event, u32 flags,
+		    struct drm_modeset_acquire_ctx *ctx)
+{
+	const int swap_interval = (flags & DRM_MODE_PAGE_FLIP_ASYNC) ? 0 : 1;
+	struct drm_device *dev = crtc->dev;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->primary->fb)->nvbo;
+	struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo;
+	struct nv04_page_flip_state *s;
+	struct nouveau_channel *chan;
+	struct nouveau_cli *cli;
+	struct nouveau_fence *fence;
+	struct nv04_display *dispnv04 = nv04_display(dev);
+	int head = nouveau_crtc(crtc)->index;
+	int ret;
+
+	chan = drm->channel;
+	if (!chan)
+		return -ENODEV;
+	cli = (void *)chan->user.client;
+
+	s = kzalloc(sizeof(*s), GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	if (new_bo != old_bo) {
+		ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM, true);
+		if (ret)
+			goto fail_free;
+	}
+
+	mutex_lock(&cli->mutex);
+	ret = ttm_bo_reserve(&new_bo->bo, true, false, NULL);
+	if (ret)
+		goto fail_unpin;
+
+	/* synchronise rendering channel with the kernel's channel */
+	ret = nouveau_fence_sync(new_bo, chan, false, true);
+	if (ret) {
+		ttm_bo_unreserve(&new_bo->bo);
+		goto fail_unpin;
+	}
+
+	if (new_bo != old_bo) {
+		ttm_bo_unreserve(&new_bo->bo);
+
+		ret = ttm_bo_reserve(&old_bo->bo, true, false, NULL);
+		if (ret)
+			goto fail_unpin;
+	}
+
+	/* Initialize a page flip struct */
+	*s = (struct nv04_page_flip_state)
+		{ { }, event, crtc, fb->format->cpp[0] * 8, fb->pitches[0],
+		  new_bo->bo.offset };
+
+	/* Keep vblanks on during flip, for the target crtc of this flip */
+	drm_crtc_vblank_get(crtc);
+
+	/* Emit a page flip */
+	if (swap_interval) {
+		ret = RING_SPACE(chan, 8);
+		if (ret)
+			goto fail_unreserve;
+
+		BEGIN_NV04(chan, NvSubImageBlit, 0x012c, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x0134, 1);
+		OUT_RING  (chan, head);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x0100, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x0130, 1);
+		OUT_RING  (chan, 0);
+	}
+
+	nouveau_bo_ref(new_bo, &dispnv04->image[head]);
+
+	ret = nv04_page_flip_emit(chan, old_bo, new_bo, s, &fence);
+	if (ret)
+		goto fail_unreserve;
+	mutex_unlock(&cli->mutex);
+
+	/* Update the crtc struct and cleanup */
+	crtc->primary->fb = fb;
+
+	nouveau_bo_fence(old_bo, fence, false);
+	ttm_bo_unreserve(&old_bo->bo);
+	if (old_bo != new_bo)
+		nouveau_bo_unpin(old_bo);
+	nouveau_fence_unref(&fence);
+	return 0;
+
+fail_unreserve:
+	drm_crtc_vblank_put(crtc);
+	ttm_bo_unreserve(&old_bo->bo);
+fail_unpin:
+	mutex_unlock(&cli->mutex);
+	if (old_bo != new_bo)
+		nouveau_bo_unpin(new_bo);
+fail_free:
+	kfree(s);
+	return ret;
+}
+
 static const struct drm_crtc_funcs nv04_crtc_funcs = {
 	.cursor_set = nv04_crtc_cursor_set,
 	.cursor_move = nv04_crtc_cursor_move,
 	.gamma_set = nv_crtc_gamma_set,
 	.set_config = nouveau_crtc_set_config,
-	.page_flip = nouveau_crtc_page_flip,
+	.page_flip = nv04_crtc_page_flip,
 	.destroy = nv_crtc_destroy,
 };
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 1727d399833c..5713bacaee80 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -30,6 +30,160 @@
 #include "hw.h"
 #include "nouveau_encoder.h"
 #include "nouveau_connector.h"
+#include "nouveau_bo.h"
+
+#include <nvif/if0004.h>
+
+static void
+nv04_display_fini(struct drm_device *dev, bool suspend)
+{
+	struct nv04_display *disp = nv04_display(dev);
+	struct drm_crtc *crtc;
+
+	/* Disable flip completion events. */
+	nvif_notify_put(&disp->flip);
+
+	/* Disable vblank interrupts. */
+	NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0);
+	if (nv_two_heads(dev))
+		NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0);
+
+	if (!suspend)
+		return;
+
+	/* Un-pin FB and cursors so they'll be evicted to system memory. */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_framebuffer *nouveau_fb;
+
+		nouveau_fb = nouveau_framebuffer(crtc->primary->fb);
+		if (!nouveau_fb || !nouveau_fb->nvbo)
+			continue;
+
+		nouveau_bo_unpin(nouveau_fb->nvbo);
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+		if (nv_crtc->cursor.nvbo) {
+			if (nv_crtc->cursor.set_offset)
+				nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+			nouveau_bo_unpin(nv_crtc->cursor.nvbo);
+		}
+	}
+}
+
+static int
+nv04_display_init(struct drm_device *dev, bool resume, bool runtime)
+{
+	struct nv04_display *disp = nv04_display(dev);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_encoder *encoder;
+	struct drm_crtc *crtc;
+	int ret;
+
+	/* meh.. modeset apparently doesn't setup all the regs and depends
+	 * on pre-existing state, for now load the state of the card *before*
+	 * nouveau was loaded, and then do a modeset.
+	 *
+	 * best thing to do probably is to make save/restore routines not
+	 * save/restore "pre-load" state, but more general so we can save
+	 * on suspend too.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+		nv_crtc->save(&nv_crtc->base);
+	}
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
+		encoder->enc_save(&encoder->base.base);
+
+	/* Enable flip completion events. */
+	nvif_notify_get(&disp->flip);
+
+	if (!resume)
+		return 0;
+
+	/* Re-pin FB/cursors. */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_framebuffer *nouveau_fb;
+
+		nouveau_fb = nouveau_framebuffer(crtc->primary->fb);
+		if (!nouveau_fb || !nouveau_fb->nvbo)
+			continue;
+
+		ret = nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM, true);
+		if (ret)
+			NV_ERROR(drm, "Could not pin framebuffer\n");
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+		if (!nv_crtc->cursor.nvbo)
+			continue;
+
+		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, true);
+		if (!ret && nv_crtc->cursor.set_offset)
+			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
+		if (ret)
+			NV_ERROR(drm, "Could not pin/map cursor.\n");
+	}
+
+	/* Force CLUT to get re-loaded during modeset. */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+
+		nv_crtc->lut.depth = 0;
+	}
+
+	/* This should ensure we don't hit a locking problem when someone
+	 * wakes us up via a connector.  We should never go into suspend
+	 * while the display is on anyways.
+	 */
+	if (runtime)
+		return 0;
+
+	/* Restore mode. */
+	drm_helper_resume_force_mode(dev);
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+
+		if (!nv_crtc->cursor.nvbo)
+			continue;
+
+		if (nv_crtc->cursor.set_offset)
+			nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.offset);
+		nv_crtc->cursor.set_pos(nv_crtc, nv_crtc->cursor_saved_x,
+						 nv_crtc->cursor_saved_y);
+	}
+
+	return 0;
+}
+
+static void
+nv04_display_destroy(struct drm_device *dev)
+{
+	struct nv04_display *disp = nv04_display(dev);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_encoder *encoder;
+	struct nouveau_crtc *nv_crtc;
+
+	/* Restore state */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
+		encoder->enc_restore(&encoder->base.base);
+
+	list_for_each_entry(nv_crtc, &dev->mode_config.crtc_list, base.head)
+		nv_crtc->restore(&nv_crtc->base);
+
+	nouveau_hw_save_vga_fonts(dev, 0);
+
+	nvif_notify_fini(&disp->flip);
+
+	nouveau_display(dev)->priv = NULL;
+	kfree(disp);
+
+	nvif_object_unmap(&drm->client.device.object);
+}
 
 int
 nv04_display_create(struct drm_device *dev)
@@ -58,6 +212,13 @@ nv04_display_create(struct drm_device *dev)
 	/* Pre-nv50 doesn't support atomic, so don't expose the ioctls */
 	dev->driver->driver_features &= ~DRIVER_ATOMIC;
 
+	/* Request page flip completion event. */
+	if (drm->nvsw.client) {
+		nvif_notify_init(&drm->nvsw, nv04_flip_complete,
+				 false, NV04_NVSW_NTFY_UEVENT,
+				 NULL, 0, 0, &disp->flip);
+	}
+
 	nouveau_hw_save_vga_fonts(dev, 1);
 
 	nv04_crtc_create(dev, 0);
@@ -121,58 +282,3 @@ nv04_display_create(struct drm_device *dev)
 
 	return 0;
 }
-
-void
-nv04_display_destroy(struct drm_device *dev)
-{
-	struct nv04_display *disp = nv04_display(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_encoder *encoder;
-	struct nouveau_crtc *nv_crtc;
-
-	/* Restore state */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
-		encoder->enc_restore(&encoder->base.base);
-
-	list_for_each_entry(nv_crtc, &dev->mode_config.crtc_list, base.head)
-		nv_crtc->restore(&nv_crtc->base);
-
-	nouveau_hw_save_vga_fonts(dev, 0);
-
-	nouveau_display(dev)->priv = NULL;
-	kfree(disp);
-
-	nvif_object_unmap(&drm->client.device.object);
-}
-
-int
-nv04_display_init(struct drm_device *dev)
-{
-	struct nouveau_encoder *encoder;
-	struct nouveau_crtc *crtc;
-
-	/* meh.. modeset apparently doesn't setup all the regs and depends
-	 * on pre-existing state, for now load the state of the card *before*
-	 * nouveau was loaded, and then do a modeset.
-	 *
-	 * best thing to do probably is to make save/restore routines not
-	 * save/restore "pre-load" state, but more general so we can save
-	 * on suspend too.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
-		crtc->save(&crtc->base);
-
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
-		encoder->enc_save(&encoder->base.base);
-
-	return 0;
-}
-
-void
-nv04_display_fini(struct drm_device *dev)
-{
-	/* disable vblank interrupts */
-	NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0);
-	if (nv_two_heads(dev))
-		NVWriteCRTC(dev, 1, NV_PCRTC_INTR_EN_0, 0);
-}
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h
index f74f1f2b186e..c6ed20a09f4a 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h
@@ -82,6 +82,7 @@ struct nv04_display {
 	uint32_t saved_vga_font[4][16384];
 	uint32_t dac_users[4];
 	struct nouveau_bo *image[2];
+	struct nvif_notify flip;
 };
 
 static inline struct nv04_display *
@@ -92,9 +93,6 @@ nv04_display(struct drm_device *dev)
 
 /* nv04_display.c */
 int nv04_display_create(struct drm_device *);
-void nv04_display_destroy(struct drm_device *);
-int nv04_display_init(struct drm_device *);
-void nv04_display_fini(struct drm_device *);
 
 /* nv04_crtc.c */
 int nv04_crtc_create(struct drm_device *, int index);
@@ -176,4 +174,5 @@ nouveau_bios_run_init_table(struct drm_device *dev, u16 table,
 	);
 }
 
+int nv04_flip_complete(struct nvif_notify *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h
index a194990d2b0d..b5fae5ab3fa8 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/atom.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h
@@ -116,6 +116,12 @@ struct nv50_head_atom {
 		u8 depth:4;
 	} or;
 
+	/* Currently only used for MST */
+	struct {
+		int pbn;
+		u8 tu:6;
+	} dp;
+
 	union nv50_head_atom_mask {
 		struct {
 			bool olut:1;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c
index c25e0ebe3c92..27ea3f34706d 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.c
@@ -42,7 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore)
 		int version;
 		int (*new)(struct nouveau_drm *, s32, struct nv50_core **);
 	} cores[] = {
-		{ TU104_DISP_CORE_CHANNEL_DMA, 0, corec57d_new },
+		{ TU102_DISP_CORE_CHANNEL_DMA, 0, corec57d_new },
 		{ GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new },
 		{ GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
 		{ GP100_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c
index cb6e4d2b1b45..121c24a18f11 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/curs.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c
@@ -31,7 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw)
 		int version;
 		int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **);
 	} curses[] = {
-		{ TU104_DISP_CURSOR, 0, cursc37a_new },
+		{ TU102_DISP_CURSOR, 0, cursc37a_new },
 		{ GV100_DISP_CURSOR, 0, cursc37a_new },
 		{ GK104_DISP_CURSOR, 0, curs907a_new },
 		{ GF110_DISP_CURSOR, 0, curs907a_new },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 2e8a5fd9b262..4b1650f51955 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -659,8 +659,6 @@ struct nv50_mstc {
 
 	struct drm_display_mode *native;
 	struct edid *edid;
-
-	int pbn;
 };
 
 struct nv50_msto {
@@ -765,18 +763,26 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
 	struct drm_connector *connector = conn_state->connector;
 	struct nv50_mstc *mstc = nv50_mstc(connector);
 	struct nv50_mstm *mstm = mstc->mstm;
-	int bpp = connector->display_info.bpc * 3;
+	struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
 	int slots;
 
-	mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock,
-					 bpp);
+	/* When restoring duplicated states, we need to make sure that the
+	 * bw remains the same and avoid recalculating it, as the connector's
+	 * bpc may have changed after the state was duplicated
+	 */
+	if (!state->duplicated)
+		asyh->dp.pbn =
+			drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock,
+					     connector->display_info.bpc * 3);
 
-	if (drm_atomic_crtc_needs_modeset(crtc_state) &&
-	    !drm_connector_is_unregistered(connector)) {
+	if (drm_atomic_crtc_needs_modeset(crtc_state)) {
 		slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr,
-						      mstc->port, mstc->pbn);
+						      mstc->port,
+						      asyh->dp.pbn);
 		if (slots < 0)
 			return slots;
+
+		asyh->dp.tu = slots;
 	}
 
 	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
@@ -787,13 +793,13 @@ static void
 nv50_msto_enable(struct drm_encoder *encoder)
 {
 	struct nv50_head *head = nv50_head(encoder->crtc);
+	struct nv50_head_atom *armh = nv50_head_atom(head->base.base.state);
 	struct nv50_msto *msto = nv50_msto(encoder);
 	struct nv50_mstc *mstc = NULL;
 	struct nv50_mstm *mstm = NULL;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	u8 proto, depth;
-	int slots;
 	bool r;
 
 	drm_connector_list_iter_begin(encoder->dev, &conn_iter);
@@ -809,9 +815,10 @@ nv50_msto_enable(struct drm_encoder *encoder)
 	if (WARN_ON(!mstc))
 		return;
 
-	slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
-	r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, mstc->pbn, slots);
-	WARN_ON(!r);
+	r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, armh->dp.pbn,
+				     armh->dp.tu);
+	if (!r)
+		DRM_DEBUG_KMS("Failed to allocate VCPI\n");
 
 	if (!mstm->links++)
 		nv50_outp_acquire(mstm->outp);
@@ -828,8 +835,7 @@ nv50_msto_enable(struct drm_encoder *encoder)
 	default: depth = 0x6; break;
 	}
 
-	mstm->outp->update(mstm->outp, head->base.index,
-			   nv50_head_atom(head->base.base.state), proto, depth);
+	mstm->outp->update(mstm->outp, head->base.index, armh, proto, depth);
 
 	msto->head = head;
 	msto->mstc = mstc;
@@ -2215,8 +2221,8 @@ nv50_disp_func = {
  * Init
  *****************************************************************************/
 
-void
-nv50_display_fini(struct drm_device *dev)
+static void
+nv50_display_fini(struct drm_device *dev, bool suspend)
 {
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
@@ -2237,8 +2243,8 @@ nv50_display_fini(struct drm_device *dev)
 	}
 }
 
-int
-nv50_display_init(struct drm_device *dev)
+static int
+nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
 {
 	struct nv50_core *core = nv50_disp(dev)->core;
 	struct drm_encoder *encoder;
@@ -2264,7 +2270,7 @@ nv50_display_init(struct drm_device *dev)
 	return 0;
 }
 
-void
+static void
 nv50_display_destroy(struct drm_device *dev)
 {
 	struct nv50_disp *disp = nv50_disp(dev);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index ac97ebce5b35..2e7a0c347ddb 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -413,6 +413,7 @@ nv50_head_atomic_duplicate_state(struct drm_crtc *crtc)
 	asyh->ovly = armh->ovly;
 	asyh->dither = armh->dither;
 	asyh->procamp = armh->procamp;
+	asyh->dp = armh->dp;
 	asyh->clr.mask = 0;
 	asyh->set.mask = 0;
 	return &asyh->state;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
index bc9eeaf212ae..a1ac153d5e98 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
@@ -31,7 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw)
 		int version;
 		int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *);
 	} wimms[] = {
-		{ TU104_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
+		{ TU102_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{ GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{}
 	};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index ba9eea2ff16b..b95181027b31 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -626,7 +626,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
 		int (*new)(struct nouveau_drm *, enum drm_plane_type,
 			   int, s32, struct nv50_wndw **);
 	} wndws[] = {
-		{ TU104_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new },
+		{ TU102_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new },
 		{ GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new },
 		{}
 	};
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 1d82cbf70cf4..7d556a1c92fa 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -54,6 +54,9 @@
 
 #define VOLTA_USERMODE_A                                             0x0000c361
 
+#define MAXWELL_FAULT_BUFFER_A                        /* clb069.h */ 0x0000b069
+#define VOLTA_FAULT_BUFFER_A                          /* clb069.h */ 0x0000c369
+
 #define NV03_CHANNEL_DMA                              /* cl506b.h */ 0x0000006b
 #define NV10_CHANNEL_DMA                              /* cl506b.h */ 0x0000006e
 #define NV17_CHANNEL_DMA                              /* cl506b.h */ 0x0000176e
@@ -84,7 +87,7 @@
 #define GP100_DISP                                    /* cl5070.h */ 0x00009770
 #define GP102_DISP                                    /* cl5070.h */ 0x00009870
 #define GV100_DISP                                    /* cl5070.h */ 0x0000c370
-#define TU104_DISP                                    /* cl5070.h */ 0x0000c570
+#define TU102_DISP                                    /* cl5070.h */ 0x0000c570
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -97,7 +100,7 @@
 #define GF110_DISP_CURSOR                             /* cl507a.h */ 0x0000907a
 #define GK104_DISP_CURSOR                             /* cl507a.h */ 0x0000917a
 #define GV100_DISP_CURSOR                             /* cl507a.h */ 0x0000c37a
-#define TU104_DISP_CURSOR                             /* cl507a.h */ 0x0000c57a
+#define TU102_DISP_CURSOR                             /* cl507a.h */ 0x0000c57a
 
 #define NV50_DISP_OVERLAY                             /* cl507b.h */ 0x0000507b
 #define G82_DISP_OVERLAY                              /* cl507b.h */ 0x0000827b
@@ -106,7 +109,7 @@
 #define GK104_DISP_OVERLAY                            /* cl507b.h */ 0x0000917b
 
 #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c37b
-#define TU104_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c57b
+#define TU102_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c57b
 
 #define NV50_DISP_BASE_CHANNEL_DMA                    /* cl507c.h */ 0x0000507c
 #define G82_DISP_BASE_CHANNEL_DMA                     /* cl507c.h */ 0x0000827c
@@ -129,7 +132,7 @@
 #define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
 #define GP102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 #define GV100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c37d
-#define TU104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c57d
+#define TU102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c57d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -139,7 +142,7 @@
 #define GK104_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000917e
 
 #define GV100_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c37e
-#define TU104_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c57e
+#define TU102_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c57e
 
 #define NV50_TESLA                                                   0x00005097
 #define G82_TESLA                                                    0x00008297
diff --git a/drivers/gpu/drm/nouveau/include/nvif/clb069.h b/drivers/gpu/drm/nouveau/include/nvif/clb069.h
new file mode 100644
index 000000000000..eef5d0227bab
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/clb069.h
@@ -0,0 +1,12 @@
+#ifndef __NVIF_CLB069_H__
+#define __NVIF_CLB069_H__
+struct nvif_clb069_v0 {
+	__u8  version;
+	__u8  pad01[3];
+	__u32 entries;
+	__u32 get;
+	__u32 put;
+};
+
+#define NVB069_V0_NTFY_FAULT                                                0x00
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
index 2928ecd989ad..d6dd40f21eed 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
@@ -3,7 +3,8 @@
 struct nvif_vmm_v0 {
 	__u8  version;
 	__u8  page_nr;
-	__u8  pad02[6];
+	__u8  managed;
+	__u8  pad03[5];
 	__u64 addr;
 	__u64 size;
 	__u8  data[];
@@ -14,6 +15,9 @@ struct nvif_vmm_v0 {
 #define NVIF_VMM_V0_PUT                                                    0x02
 #define NVIF_VMM_V0_MAP                                                    0x03
 #define NVIF_VMM_V0_UNMAP                                                  0x04
+#define NVIF_VMM_V0_PFNMAP                                                 0x05
+#define NVIF_VMM_V0_PFNCLR                                                 0x06
+#define NVIF_VMM_V0_MTHD(i)                                         ((i) + 0x80)
 
 struct nvif_vmm_page_v0 {
 	__u8  version;
@@ -61,4 +65,28 @@ struct nvif_vmm_unmap_v0 {
 	__u8  pad01[7];
 	__u64 addr;
 };
+
+struct nvif_vmm_pfnmap_v0 {
+	__u8  version;
+	__u8  page;
+	__u8  pad02[6];
+	__u64 addr;
+	__u64 size;
+#define NVIF_VMM_PFNMAP_V0_ADDR                           0xfffffffffffff000ULL
+#define NVIF_VMM_PFNMAP_V0_ADDR_SHIFT                                        12
+#define NVIF_VMM_PFNMAP_V0_APER                           0x00000000000000f0ULL
+#define NVIF_VMM_PFNMAP_V0_HOST                           0x0000000000000000ULL
+#define NVIF_VMM_PFNMAP_V0_VRAM                           0x0000000000000010ULL
+#define NVIF_VMM_PFNMAP_V0_W                              0x0000000000000002ULL
+#define NVIF_VMM_PFNMAP_V0_V                              0x0000000000000001ULL
+#define NVIF_VMM_PFNMAP_V0_NONE                           0x0000000000000000ULL
+	__u64 phys[];
+};
+
+struct nvif_vmm_pfnclr_v0 {
+	__u8  version;
+	__u8  pad01[7];
+	__u64 addr;
+	__u64 size;
+};
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h
index 1d9c637859f3..4cabd613a280 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/ifc00d.h
@@ -6,6 +6,12 @@ struct gp100_vmm_vn {
 	/* nvif_vmm_vX ... */
 };
 
+struct gp100_vmm_v0 {
+	/* nvif_vmm_vX ... */
+	__u8  version;
+	__u8  fault_replay;
+};
+
 struct gp100_vmm_map_vn {
 	/* nvif_vmm_map_vX ... */
 };
@@ -18,4 +24,19 @@ struct gp100_vmm_map_v0 {
 	__u8  priv;
 	__u8  kind;
 };
+
+#define GP100_VMM_VN_FAULT_REPLAY                         NVIF_VMM_V0_MTHD(0x00)
+#define GP100_VMM_VN_FAULT_CANCEL                         NVIF_VMM_V0_MTHD(0x01)
+
+struct gp100_vmm_fault_replay_vn {
+};
+
+struct gp100_vmm_fault_cancel_v0 {
+	__u8  version;
+	__u8  hub;
+	__u8  gpc;
+	__u8  client;
+	__u8  pad04[4];
+	__u64 inst;
+};
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
index c5db8a2e82df..79bf85d2f43a 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
@@ -30,8 +30,8 @@ struct nvif_vmm {
 	int page_nr;
 };
 
-int nvif_vmm_init(struct nvif_mmu *, s32 oclass, u64 addr, u64 size,
-		  void *argv, u32 argc, struct nvif_vmm *);
+int nvif_vmm_init(struct nvif_mmu *, s32 oclass, bool managed, u64 addr,
+		  u64 size, void *argv, u32 argc, struct nvif_vmm *);
 void nvif_vmm_fini(struct nvif_vmm *);
 int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse,
 		 u8 page, u8 align, u64 size, struct nvif_vma *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 72e4dc1f0236..642492344196 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -28,6 +28,7 @@ enum nvkm_devidx {
 	NVKM_SUBDEV_ICCSENSE,
 	NVKM_SUBDEV_THERM,
 	NVKM_SUBDEV_CLK,
+	NVKM_SUBDEV_GSP,
 	NVKM_SUBDEV_SECBOOT,
 
 	NVKM_ENGINE_BSP,
@@ -137,6 +138,7 @@ struct nvkm_device {
 	struct nvkm_fb *fb;
 	struct nvkm_fuse *fuse;
 	struct nvkm_gpio *gpio;
+	struct nvkm_gsp *gsp;
 	struct nvkm_i2c *i2c;
 	struct nvkm_subdev *ibus;
 	struct nvkm_iccsense *iccsense;
@@ -209,6 +211,7 @@ struct nvkm_device_chip {
 	int (*fb      )(struct nvkm_device *, int idx, struct nvkm_fb **);
 	int (*fuse    )(struct nvkm_device *, int idx, struct nvkm_fuse **);
 	int (*gpio    )(struct nvkm_device *, int idx, struct nvkm_gpio **);
+	int (*gsp     )(struct nvkm_device *, int idx, struct nvkm_gsp **);
 	int (*i2c     )(struct nvkm_device *, int idx, struct nvkm_i2c **);
 	int (*ibus    )(struct nvkm_device *, int idx, struct nvkm_subdev **);
 	int (*iccsense)(struct nvkm_device *, int idx, struct nvkm_iccsense **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index 86abe76023c2..5f3650692e4d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -11,5 +11,5 @@ int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp102_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gv100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
-int tu104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int tu102_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index 5ca86e178bb9..3026b22d44fb 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -36,5 +36,5 @@ int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
-int tu104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int tu102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 3b2b685778eb..b7fc04dd1628 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -74,5 +74,5 @@ int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gp10b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gv100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
-int tu104_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int tu102_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index ba1518ff8b66..1e924c7f7ba7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -10,6 +10,9 @@ struct nvkm_gr {
 
 u64 nvkm_gr_units(struct nvkm_gr *);
 int nvkm_gr_tlb_flush(struct nvkm_gr *);
+int nvkm_gr_ctxsw_pause(struct nvkm_device *);
+int nvkm_gr_ctxsw_resume(struct nvkm_device *);
+u32 nvkm_gr_ctxsw_inst(struct nvkm_device *);
 
 int nv04_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int nv10_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
index fe716859d4a9..b72a4844c5f7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
@@ -6,6 +6,8 @@
 
 struct nvkm_nvdec {
 	struct nvkm_engine engine;
+	u32 addr;
+
 	struct nvkm_falcon *falcon;
 };
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h
index f7d89822b905..c93ad332461a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h
@@ -5,10 +5,13 @@
 
 struct nvkm_sec2 {
 	struct nvkm_engine engine;
+	u32 addr;
+
 	struct nvkm_falcon *falcon;
 	struct nvkm_msgqueue *queue;
 	struct work_struct work;
 };
 
 int gp102_sec2_new(struct nvkm_device *, int, struct nvkm_sec2 **);
+int tu102_sec2_new(struct nvkm_device *, int, struct nvkm_sec2 **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h
index fd9d713b611c..da14486317ca 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h
@@ -29,5 +29,5 @@ int gf100_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 int gk20a_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 int gm107_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 int gm20b_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
-int tu104_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
+int tu102_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
index 1b71812a790b..8ba982c2fdfb 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
@@ -31,5 +31,5 @@ int gf100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
-int tu104_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
+int tu102_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
index 127f48066026..97322f95b3ee 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
@@ -13,6 +13,8 @@ struct nvkm_fault {
 	struct nvkm_event event;
 
 	struct nvkm_notify nrpfb;
+
+	struct nvkm_device_oclass user;
 };
 
 struct nvkm_fault_data {
@@ -30,5 +32,5 @@ struct nvkm_fault_data {
 
 int gp100_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
 int gv100_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
-int tu104_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
+int tu102_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
new file mode 100644
index 000000000000..4c672a5c4cd5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -0,0 +1,14 @@
+#ifndef __NVKM_GSP_H__
+#define __NVKM_GSP_H__
+#define nvkm_gsp(p) container_of((p), struct nvkm_gsp, subdev)
+#include <core/subdev.h>
+
+struct nvkm_gsp {
+	struct nvkm_subdev subdev;
+	u32 addr;
+
+	struct nvkm_falcon *falcon;
+};
+
+int gv100_gsp_new(struct nvkm_device *, int, struct nvkm_gsp **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index b66dedd8abb6..e38f4958dea2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -31,5 +31,5 @@ int gk104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
-int tu104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
+int tu102_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 0a0e064f22e5..28ade86f74c5 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -17,6 +17,7 @@ struct nvkm_vma {
 	bool part:1; /* Region was split from an allocated region by map(). */
 	bool user:1; /* Region user-allocated. */
 	bool busy:1; /* Region busy (for temporarily preventing user access). */
+	bool mapped:1; /* Region contains valid pages. */
 	struct nvkm_memory *memory; /* Memory currently mapped into VMA. */
 	struct nvkm_tags *tags; /* Compression tag reference. */
 };
@@ -44,6 +45,8 @@ struct nvkm_vmm {
 
 	dma_addr_t null;
 	void *nullp;
+
+	bool replay;
 };
 
 int nvkm_vmm_new(struct nvkm_device *, u64 addr, u64 size, void *argv, u32 argc,
@@ -63,6 +66,7 @@ struct nvkm_vmm_map {
 	struct nvkm_mm_node *mem;
 	struct scatterlist *sgl;
 	dma_addr_t *dma;
+	u64 *pfn;
 	u64 off;
 
 	const struct nvkm_vmm_page *page;
@@ -130,5 +134,5 @@ int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gv100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
-int tu104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
+int tu102_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index f7d3eb647e2e..2904e67d79d2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -9,6 +9,7 @@ struct nvkm_top {
 	struct list_head device;
 };
 
+u32 nvkm_top_addr(struct nvkm_device *, enum nvkm_devidx);
 u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
 u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
 u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index 8a0f85f5fc1a..6a765682fbfa 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -38,6 +38,7 @@ int nvkm_volt_set_id(struct nvkm_volt *, u8 id, u8 min_id, u8 temp,
 
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gf100_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
+int gf117_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index b06cdac8f3a2..c3fd5dd39ed9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -214,6 +214,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 			WARN_ON(1);
 			break;
 		}
+		break;
 	case NOUVEAU_GETPARAM_FB_SIZE:
 		getparam->value = drm->gem.vram_available;
 		break;
@@ -338,7 +339,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 		goto done;
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_vma_new(chan->ntfy, &cli->vmm, &chan->ntfy_vma);
+		ret = nouveau_vma_new(chan->ntfy, chan->chan->vmm,
+				      &chan->ntfy_vma);
 		if (ret)
 			goto done;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 5f5be6368aed..c7a94c94dbf3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -253,6 +253,9 @@ nouveau_backlight_init(struct drm_connector *connector)
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
+	case NV_DEVICE_INFO_V0_VOLTA:
+	case NV_DEVICE_INFO_V0_TURING:
 		ret = nv50_backlight_init(nv_encoder, &props, &ops);
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 73eff52036d2..34a998012bf6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -194,7 +194,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 	struct nouveau_drm *drm = cli->drm;
 	struct nouveau_bo *nvbo;
 	struct nvif_mmu *mmu = &cli->mmu;
-	struct nvif_vmm *vmm = &cli->vmm.vmm;
+	struct nvif_vmm *vmm = cli->svm.cli ? &cli->svm.vmm : &cli->vmm.vmm;
 	size_t acc_size;
 	int type = ttm_bo_type_device;
 	int ret, i, pi = -1;
@@ -1434,7 +1434,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 		if (drm->client.mem->oclass < NVIF_CLASS_MEM_NV50 || !mem->kind)
 			/* untiled */
 			break;
-		/* fallthrough, tiled memory */
+		/* fall through - tiled memory */
 	case TTM_PL_VRAM:
 		reg->bus.offset = reg->start << PAGE_SHIFT;
 		reg->bus.base = device->func->resource_addr(device, 1);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 668afbc29c3e..282fd90b65e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -42,6 +42,7 @@
 #include "nouveau_fence.h"
 #include "nouveau_abi16.h"
 #include "nouveau_vmm.h"
+#include "nouveau_svm.h"
 
 MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
 int nouveau_vram_pushbuf;
@@ -95,6 +96,10 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 
 		if (chan->fence)
 			nouveau_fence(chan->drm)->context_del(chan);
+
+		if (cli)
+			nouveau_svmm_part(chan->vmm->svmm, chan->inst);
+
 		nvif_object_fini(&chan->nvsw);
 		nvif_object_fini(&chan->gart);
 		nvif_object_fini(&chan->vram);
@@ -130,6 +135,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 	chan->device = device;
 	chan->drm = drm;
+	chan->vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
 	atomic_set(&chan->killed, 0);
 
 	/* allocate memory for dma push buffer */
@@ -157,7 +163,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 	chan->push.addr = chan->push.buffer->bo.offset;
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_vma_new(chan->push.buffer, &cli->vmm,
+		ret = nouveau_vma_new(chan->push.buffer, chan->vmm,
 				      &chan->push.vma);
 		if (ret) {
 			nouveau_channel_del(pchan);
@@ -172,7 +178,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 		args.target = NV_DMA_V0_TARGET_VM;
 		args.access = NV_DMA_V0_ACCESS_VM;
 		args.start = 0;
-		args.limit = cli->vmm.vmm.limit - 1;
+		args.limit = chan->vmm->vmm.limit - 1;
 	} else
 	if (chan->push.buffer->bo.mem.mem_type == TTM_PL_VRAM) {
 		if (device->info.family == NV_DEVICE_INFO_V0_TNT) {
@@ -202,7 +208,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 			args.target = NV_DMA_V0_TARGET_VM;
 			args.access = NV_DMA_V0_ACCESS_RDWR;
 			args.start = 0;
-			args.limit = cli->vmm.vmm.limit - 1;
+			args.limit = chan->vmm->vmm.limit - 1;
 		}
 	}
 
@@ -220,7 +226,6 @@ static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		    u64 runlist, bool priv, struct nouveau_channel **pchan)
 {
-	struct nouveau_cli *cli = (void *)device->object.client;
 	static const u16 oclasses[] = { TURING_CHANNEL_GPFIFO_A,
 					VOLTA_CHANNEL_GPFIFO_A,
 					PASCAL_CHANNEL_GPFIFO_A,
@@ -255,7 +260,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 			args.volta.ilength = 0x02000;
 			args.volta.ioffset = 0x10000 + chan->push.addr;
 			args.volta.runlist = runlist;
-			args.volta.vmm = nvif_handle(&cli->vmm.vmm.object);
+			args.volta.vmm = nvif_handle(&chan->vmm->vmm.object);
 			args.volta.priv = priv;
 			size = sizeof(args.volta);
 		} else
@@ -264,7 +269,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 			args.kepler.ilength = 0x02000;
 			args.kepler.ioffset = 0x10000 + chan->push.addr;
 			args.kepler.runlist = runlist;
-			args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object);
+			args.kepler.vmm = nvif_handle(&chan->vmm->vmm.object);
 			args.kepler.priv = priv;
 			size = sizeof(args.kepler);
 		} else
@@ -272,14 +277,14 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 			args.fermi.version = 0;
 			args.fermi.ilength = 0x02000;
 			args.fermi.ioffset = 0x10000 + chan->push.addr;
-			args.fermi.vmm = nvif_handle(&cli->vmm.vmm.object);
+			args.fermi.vmm = nvif_handle(&chan->vmm->vmm.object);
 			size = sizeof(args.fermi);
 		} else {
 			args.nv50.version = 0;
 			args.nv50.ilength = 0x02000;
 			args.nv50.ioffset = 0x10000 + chan->push.addr;
 			args.nv50.pushbuf = nvif_handle(&chan->push.ctxdma);
-			args.nv50.vmm = nvif_handle(&cli->vmm.vmm.object);
+			args.nv50.vmm = nvif_handle(&chan->vmm->vmm.object);
 			size = sizeof(args.nv50);
 		}
 
@@ -350,7 +355,6 @@ static int
 nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 {
 	struct nvif_device *device = chan->device;
-	struct nouveau_cli *cli = (void *)chan->user.client;
 	struct nouveau_drm *drm = chan->drm;
 	struct nv_dma_v0 args = {};
 	int ret, i;
@@ -376,7 +380,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 			args.target = NV_DMA_V0_TARGET_VM;
 			args.access = NV_DMA_V0_ACCESS_VM;
 			args.start = 0;
-			args.limit = cli->vmm.vmm.limit - 1;
+			args.limit = chan->vmm->vmm.limit - 1;
 		} else {
 			args.target = NV_DMA_V0_TARGET_VRAM;
 			args.access = NV_DMA_V0_ACCESS_RDWR;
@@ -393,7 +397,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 			args.target = NV_DMA_V0_TARGET_VM;
 			args.access = NV_DMA_V0_ACCESS_VM;
 			args.start = 0;
-			args.limit = cli->vmm.vmm.limit - 1;
+			args.limit = chan->vmm->vmm.limit - 1;
 		} else
 		if (chan->drm->agp.bridge) {
 			args.target = NV_DMA_V0_TARGET_AGP;
@@ -405,7 +409,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 			args.target = NV_DMA_V0_TARGET_VM;
 			args.access = NV_DMA_V0_ACCESS_RDWR;
 			args.start = 0;
-			args.limit = cli->vmm.vmm.limit - 1;
+			args.limit = chan->vmm->vmm.limit - 1;
 		}
 
 		ret = nvif_object_init(&chan->user, gart, NV_DMA_IN_MEMORY,
@@ -495,6 +499,10 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
 		nouveau_channel_del(pchan);
 	}
 
+	ret = nouveau_svmm_join((*pchan)->vmm->svmm, (*pchan)->inst);
+	if (ret)
+		nouveau_channel_del(pchan);
+
 done:
 	cli->base.super = super;
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 28418f4e5748..93814d1d31e4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -8,6 +8,7 @@ struct nvif_device;
 struct nouveau_channel {
 	struct nvif_device *device;
 	struct nouveau_drm *drm;
+	struct nouveau_vmm *vmm;
 
 	int chid;
 	u64 inst;
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 56b6ac1b8edd..55c0fa451163 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -32,18 +32,13 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 
-#include <nvif/class.h>
-
 #include "nouveau_fbcon.h"
-#include "dispnv04/hw.h"
 #include "nouveau_crtc.h"
-#include "nouveau_dma.h"
 #include "nouveau_gem.h"
 #include "nouveau_connector.h"
 #include "nv50_display.h"
 
-#include "nouveau_fence.h"
-
+#include <nvif/class.h>
 #include <nvif/cl0046.h>
 #include <nvif/event.h>
 
@@ -412,15 +407,14 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val,
 #endif
 
 int
-nouveau_display_init(struct drm_device *dev)
+nouveau_display_init(struct drm_device *dev, bool resume, bool runtime)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	int ret;
 
-	ret = disp->init(dev);
+	ret = disp->init(dev, resume, runtime);
 	if (ret)
 		return ret;
 
@@ -437,8 +431,6 @@ nouveau_display_init(struct drm_device *dev)
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
-	/* enable flip completion events */
-	nvif_notify_get(&drm->flip);
 	return ret;
 }
 
@@ -457,9 +449,6 @@ nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime)
 			drm_helper_force_disable_all(dev);
 	}
 
-	/* disable flip completion events */
-	nvif_notify_put(&drm->flip);
-
 	/* disable hotplug interrupts */
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) {
@@ -472,7 +461,7 @@ nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime)
 		cancel_work_sync(&drm->hpd_work);
 
 	drm_kms_helper_poll_disable(dev);
-	disp->fini(dev);
+	disp->fini(dev, suspend);
 }
 
 static void
@@ -625,7 +614,6 @@ int
 nouveau_display_suspend(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
-	struct drm_crtc *crtc;
 
 	if (drm_drv_uses_atomic_modeset(dev)) {
 		if (!runtime) {
@@ -636,32 +624,9 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime)
 				return ret;
 			}
 		}
-
-		nouveau_display_fini(dev, true, runtime);
-		return 0;
 	}
 
 	nouveau_display_fini(dev, true, runtime);
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_framebuffer *nouveau_fb;
-
-		nouveau_fb = nouveau_framebuffer(crtc->primary->fb);
-		if (!nouveau_fb || !nouveau_fb->nvbo)
-			continue;
-
-		nouveau_bo_unpin(nouveau_fb->nvbo);
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-		if (nv_crtc->cursor.nvbo) {
-			if (nv_crtc->cursor.set_offset)
-				nouveau_bo_unmap(nv_crtc->cursor.nvbo);
-			nouveau_bo_unpin(nv_crtc->cursor.nvbo);
-		}
-	}
-
 	return 0;
 }
 
@@ -669,275 +634,16 @@ void
 nouveau_display_resume(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct drm_crtc *crtc;
-	int ret;
+
+	nouveau_display_init(dev, true, runtime);
 
 	if (drm_drv_uses_atomic_modeset(dev)) {
-		nouveau_display_init(dev);
 		if (disp->suspend) {
 			drm_atomic_helper_resume(dev, disp->suspend);
 			disp->suspend = NULL;
 		}
 		return;
 	}
-
-	/* re-pin fb/cursors */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_framebuffer *nouveau_fb;
-
-		nouveau_fb = nouveau_framebuffer(crtc->primary->fb);
-		if (!nouveau_fb || !nouveau_fb->nvbo)
-			continue;
-
-		ret = nouveau_bo_pin(nouveau_fb->nvbo, TTM_PL_FLAG_VRAM, true);
-		if (ret)
-			NV_ERROR(drm, "Could not pin framebuffer\n");
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-		if (!nv_crtc->cursor.nvbo)
-			continue;
-
-		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, true);
-		if (!ret && nv_crtc->cursor.set_offset)
-			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
-		if (ret)
-			NV_ERROR(drm, "Could not pin/map cursor.\n");
-	}
-
-	nouveau_display_init(dev);
-
-	/* Force CLUT to get re-loaded during modeset */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-
-		nv_crtc->lut.depth = 0;
-	}
-
-	/* This should ensure we don't hit a locking problem when someone
-	 * wakes us up via a connector.  We should never go into suspend
-	 * while the display is on anyways.
-	 */
-	if (runtime)
-		return;
-
-	drm_helper_resume_force_mode(dev);
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-
-		if (!nv_crtc->cursor.nvbo)
-			continue;
-
-		if (nv_crtc->cursor.set_offset)
-			nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.nvbo->bo.offset);
-		nv_crtc->cursor.set_pos(nv_crtc, nv_crtc->cursor_saved_x,
-						 nv_crtc->cursor_saved_y);
-	}
-}
-
-static int
-nouveau_page_flip_emit(struct nouveau_channel *chan,
-		       struct nouveau_bo *old_bo,
-		       struct nouveau_bo *new_bo,
-		       struct nouveau_page_flip_state *s,
-		       struct nouveau_fence **pfence)
-{
-	struct nouveau_fence_chan *fctx = chan->fence;
-	struct nouveau_drm *drm = chan->drm;
-	struct drm_device *dev = drm->dev;
-	unsigned long flags;
-	int ret;
-
-	/* Queue it to the pending list */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	list_add_tail(&s->head, &fctx->flip);
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-
-	/* Synchronize with the old framebuffer */
-	ret = nouveau_fence_sync(old_bo, chan, false, false);
-	if (ret)
-		goto fail;
-
-	/* Emit the pageflip */
-	ret = RING_SPACE(chan, 2);
-	if (ret)
-		goto fail;
-
-	BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
-	OUT_RING  (chan, 0x00000000);
-	FIRE_RING (chan);
-
-	ret = nouveau_fence_new(chan, false, pfence);
-	if (ret)
-		goto fail;
-
-	return 0;
-fail:
-	spin_lock_irqsave(&dev->event_lock, flags);
-	list_del(&s->head);
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-	return ret;
-}
-
-int
-nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		       struct drm_pending_vblank_event *event, u32 flags,
-		       struct drm_modeset_acquire_ctx *ctx)
-{
-	const int swap_interval = (flags & DRM_MODE_PAGE_FLIP_ASYNC) ? 0 : 1;
-	struct drm_device *dev = crtc->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_bo *old_bo = nouveau_framebuffer(crtc->primary->fb)->nvbo;
-	struct nouveau_bo *new_bo = nouveau_framebuffer(fb)->nvbo;
-	struct nouveau_page_flip_state *s;
-	struct nouveau_channel *chan;
-	struct nouveau_cli *cli;
-	struct nouveau_fence *fence;
-	struct nv04_display *dispnv04 = nv04_display(dev);
-	int head = nouveau_crtc(crtc)->index;
-	int ret;
-
-	chan = drm->channel;
-	if (!chan)
-		return -ENODEV;
-	cli = (void *)chan->user.client;
-
-	s = kzalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	if (new_bo != old_bo) {
-		ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM, true);
-		if (ret)
-			goto fail_free;
-	}
-
-	mutex_lock(&cli->mutex);
-	ret = ttm_bo_reserve(&new_bo->bo, true, false, NULL);
-	if (ret)
-		goto fail_unpin;
-
-	/* synchronise rendering channel with the kernel's channel */
-	ret = nouveau_fence_sync(new_bo, chan, false, true);
-	if (ret) {
-		ttm_bo_unreserve(&new_bo->bo);
-		goto fail_unpin;
-	}
-
-	if (new_bo != old_bo) {
-		ttm_bo_unreserve(&new_bo->bo);
-
-		ret = ttm_bo_reserve(&old_bo->bo, true, false, NULL);
-		if (ret)
-			goto fail_unpin;
-	}
-
-	/* Initialize a page flip struct */
-	*s = (struct nouveau_page_flip_state)
-		{ { }, event, crtc, fb->format->cpp[0] * 8, fb->pitches[0],
-		  new_bo->bo.offset };
-
-	/* Keep vblanks on during flip, for the target crtc of this flip */
-	drm_crtc_vblank_get(crtc);
-
-	/* Emit a page flip */
-	if (swap_interval) {
-		ret = RING_SPACE(chan, 8);
-		if (ret)
-			goto fail_unreserve;
-
-		BEGIN_NV04(chan, NvSubImageBlit, 0x012c, 1);
-		OUT_RING  (chan, 0);
-		BEGIN_NV04(chan, NvSubImageBlit, 0x0134, 1);
-		OUT_RING  (chan, head);
-		BEGIN_NV04(chan, NvSubImageBlit, 0x0100, 1);
-		OUT_RING  (chan, 0);
-		BEGIN_NV04(chan, NvSubImageBlit, 0x0130, 1);
-		OUT_RING  (chan, 0);
-	}
-
-	nouveau_bo_ref(new_bo, &dispnv04->image[head]);
-
-	ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
-	if (ret)
-		goto fail_unreserve;
-	mutex_unlock(&cli->mutex);
-
-	/* Update the crtc struct and cleanup */
-	crtc->primary->fb = fb;
-
-	nouveau_bo_fence(old_bo, fence, false);
-	ttm_bo_unreserve(&old_bo->bo);
-	if (old_bo != new_bo)
-		nouveau_bo_unpin(old_bo);
-	nouveau_fence_unref(&fence);
-	return 0;
-
-fail_unreserve:
-	drm_crtc_vblank_put(crtc);
-	ttm_bo_unreserve(&old_bo->bo);
-fail_unpin:
-	mutex_unlock(&cli->mutex);
-	if (old_bo != new_bo)
-		nouveau_bo_unpin(new_bo);
-fail_free:
-	kfree(s);
-	return ret;
-}
-
-int
-nouveau_finish_page_flip(struct nouveau_channel *chan,
-			 struct nouveau_page_flip_state *ps)
-{
-	struct nouveau_fence_chan *fctx = chan->fence;
-	struct nouveau_drm *drm = chan->drm;
-	struct drm_device *dev = drm->dev;
-	struct nouveau_page_flip_state *s;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev->event_lock, flags);
-
-	if (list_empty(&fctx->flip)) {
-		NV_ERROR(drm, "unexpected pageflip\n");
-		spin_unlock_irqrestore(&dev->event_lock, flags);
-		return -EINVAL;
-	}
-
-	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
-	if (s->event) {
-		drm_crtc_arm_vblank_event(s->crtc, s->event);
-	} else {
-		/* Give up ownership of vblank for page-flipped crtc */
-		drm_crtc_vblank_put(s->crtc);
-	}
-
-	list_del(&s->head);
-	if (ps)
-		*ps = *s;
-	kfree(s);
-
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-	return 0;
-}
-
-int
-nouveau_flip_complete(struct nvif_notify *notify)
-{
-	struct nouveau_drm *drm = container_of(notify, typeof(*drm), flip);
-	struct nouveau_channel *chan = drm->channel;
-	struct nouveau_page_flip_state state;
-
-	if (!nouveau_finish_page_flip(chan, &state)) {
-		nv_set_crtc_base(drm->dev, drm_crtc_index(state.crtc),
-				 state.offset + state.crtc->y *
-				 state.pitch + state.crtc->x *
-				 state.bpp / 8);
-	}
-
-	return NVIF_NOTIFY_KEEP;
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index eb77e41c2d4e..311e175f0513 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -25,19 +25,11 @@ int nouveau_framebuffer_new(struct drm_device *,
 			    const struct drm_mode_fb_cmd2 *,
 			    struct nouveau_bo *, struct nouveau_framebuffer **);
 
-struct nouveau_page_flip_state {
-	struct list_head head;
-	struct drm_pending_vblank_event *event;
-	struct drm_crtc *crtc;
-	int bpp, pitch;
-	u64 offset;
-};
-
 struct nouveau_display {
 	void *priv;
 	void (*dtor)(struct drm_device *);
-	int  (*init)(struct drm_device *);
-	void (*fini)(struct drm_device *);
+	int  (*init)(struct drm_device *, bool resume, bool runtime);
+	void (*fini)(struct drm_device *, bool suspend);
 
 	struct nvif_disp disp;
 
@@ -61,7 +53,7 @@ nouveau_display(struct drm_device *dev)
 
 int  nouveau_display_create(struct drm_device *dev);
 void nouveau_display_destroy(struct drm_device *dev);
-int  nouveau_display_init(struct drm_device *dev);
+int  nouveau_display_init(struct drm_device *dev, bool resume, bool runtime);
 void nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime);
 int  nouveau_display_suspend(struct drm_device *dev, bool runtime);
 void nouveau_display_resume(struct drm_device *dev, bool runtime);
@@ -71,13 +63,6 @@ bool  nouveau_display_scanoutpos(struct drm_device *, unsigned int,
 				 bool, int *, int *, ktime_t *,
 				 ktime_t *, const struct drm_display_mode *);
 
-int  nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			    struct drm_pending_vblank_event *event,
-			    uint32_t page_flip_flags,
-			    struct drm_modeset_acquire_ctx *ctx);
-int  nouveau_finish_page_flip(struct nouveau_channel *,
-			      struct nouveau_page_flip_state *);
-
 int  nouveau_display_dumb_create(struct drm_file *, struct drm_device *,
 				 struct drm_mode_create_dumb *args);
 int  nouveau_display_dumb_map_offset(struct drm_file *, struct drm_device *,
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
new file mode 100644
index 000000000000..8be7a83ced9b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -0,0 +1,887 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nouveau_dmem.h"
+#include "nouveau_drv.h"
+#include "nouveau_chan.h"
+#include "nouveau_dma.h"
+#include "nouveau_mem.h"
+#include "nouveau_bo.h"
+
+#include <nvif/class.h>
+#include <nvif/object.h>
+#include <nvif/if500b.h>
+#include <nvif/if900b.h>
+
+#include <linux/sched/mm.h>
+#include <linux/hmm.h>
+
+/*
+ * FIXME: this is ugly right now we are using TTM to allocate vram and we pin
+ * it in vram while in use. We likely want to overhaul memory management for
+ * nouveau to be more page like (not necessarily with system page size but a
+ * bigger page size) at lowest level and have some shim layer on top that would
+ * provide the same functionality as TTM.
+ */
+#define DMEM_CHUNK_SIZE (2UL << 20)
+#define DMEM_CHUNK_NPAGES (DMEM_CHUNK_SIZE >> PAGE_SHIFT)
+
+struct nouveau_migrate;
+
+enum nouveau_aper {
+	NOUVEAU_APER_VIRT,
+	NOUVEAU_APER_VRAM,
+	NOUVEAU_APER_HOST,
+};
+
+typedef int (*nouveau_migrate_copy_t)(struct nouveau_drm *drm, u64 npages,
+				      enum nouveau_aper, u64 dst_addr,
+				      enum nouveau_aper, u64 src_addr);
+
+struct nouveau_dmem_chunk {
+	struct list_head list;
+	struct nouveau_bo *bo;
+	struct nouveau_drm *drm;
+	unsigned long pfn_first;
+	unsigned long callocated;
+	unsigned long bitmap[BITS_TO_LONGS(DMEM_CHUNK_NPAGES)];
+	spinlock_t lock;
+};
+
+struct nouveau_dmem_migrate {
+	nouveau_migrate_copy_t copy_func;
+	struct nouveau_channel *chan;
+};
+
+struct nouveau_dmem {
+	struct hmm_devmem *devmem;
+	struct nouveau_dmem_migrate migrate;
+	struct list_head chunk_free;
+	struct list_head chunk_full;
+	struct list_head chunk_empty;
+	struct mutex mutex;
+};
+
+struct nouveau_dmem_fault {
+	struct nouveau_drm *drm;
+	struct nouveau_fence *fence;
+	dma_addr_t *dma;
+	unsigned long npages;
+};
+
+struct nouveau_migrate {
+	struct vm_area_struct *vma;
+	struct nouveau_drm *drm;
+	struct nouveau_fence *fence;
+	unsigned long npages;
+	dma_addr_t *dma;
+	unsigned long dma_nr;
+};
+
+static void
+nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page)
+{
+	struct nouveau_dmem_chunk *chunk;
+	struct nouveau_drm *drm;
+	unsigned long idx;
+
+	chunk = (void *)hmm_devmem_page_get_drvdata(page);
+	idx = page_to_pfn(page) - chunk->pfn_first;
+	drm = chunk->drm;
+
+	/*
+	 * FIXME:
+	 *
+	 * This is really a bad example, we need to overhaul nouveau memory
+	 * management to be more page focus and allow lighter locking scheme
+	 * to be use in the process.
+	 */
+	spin_lock(&chunk->lock);
+	clear_bit(idx, chunk->bitmap);
+	WARN_ON(!chunk->callocated);
+	chunk->callocated--;
+	/*
+	 * FIXME when chunk->callocated reach 0 we should add the chunk to
+	 * a reclaim list so that it can be freed in case of memory pressure.
+	 */
+	spin_unlock(&chunk->lock);
+}
+
+static void
+nouveau_dmem_fault_alloc_and_copy(struct vm_area_struct *vma,
+				  const unsigned long *src_pfns,
+				  unsigned long *dst_pfns,
+				  unsigned long start,
+				  unsigned long end,
+				  void *private)
+{
+	struct nouveau_dmem_fault *fault = private;
+	struct nouveau_drm *drm = fault->drm;
+	struct device *dev = drm->dev->dev;
+	unsigned long addr, i, npages = 0;
+	nouveau_migrate_copy_t copy;
+	int ret;
+
+
+	/* First allocate new memory */
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
+		struct page *dpage, *spage;
+
+		dst_pfns[i] = 0;
+		spage = migrate_pfn_to_page(src_pfns[i]);
+		if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+			continue;
+
+		dpage = hmm_vma_alloc_locked_page(vma, addr);
+		if (!dpage) {
+			dst_pfns[i] = MIGRATE_PFN_ERROR;
+			continue;
+		}
+
+		dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
+			      MIGRATE_PFN_LOCKED;
+		npages++;
+	}
+
+	/* Allocate storage for DMA addresses, so we can unmap later. */
+	fault->dma = kmalloc(sizeof(*fault->dma) * npages, GFP_KERNEL);
+	if (!fault->dma)
+		goto error;
+
+	/* Copy things over */
+	copy = drm->dmem->migrate.copy_func;
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
+		struct nouveau_dmem_chunk *chunk;
+		struct page *spage, *dpage;
+		u64 src_addr, dst_addr;
+
+		dpage = migrate_pfn_to_page(dst_pfns[i]);
+		if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
+			continue;
+
+		spage = migrate_pfn_to_page(src_pfns[i]);
+		if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
+			dst_pfns[i] = MIGRATE_PFN_ERROR;
+			__free_page(dpage);
+			continue;
+		}
+
+		fault->dma[fault->npages] =
+			dma_map_page_attrs(dev, dpage, 0, PAGE_SIZE,
+					   PCI_DMA_BIDIRECTIONAL,
+					   DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(dev, fault->dma[fault->npages])) {
+			dst_pfns[i] = MIGRATE_PFN_ERROR;
+			__free_page(dpage);
+			continue;
+		}
+
+		dst_addr = fault->dma[fault->npages++];
+
+		chunk = (void *)hmm_devmem_page_get_drvdata(spage);
+		src_addr = page_to_pfn(spage) - chunk->pfn_first;
+		src_addr = (src_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
+
+		ret = copy(drm, 1, NOUVEAU_APER_HOST, dst_addr,
+				   NOUVEAU_APER_VRAM, src_addr);
+		if (ret) {
+			dst_pfns[i] = MIGRATE_PFN_ERROR;
+			__free_page(dpage);
+			continue;
+		}
+	}
+
+	nouveau_fence_new(drm->dmem->migrate.chan, false, &fault->fence);
+
+	return;
+
+error:
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
+		struct page *page;
+
+		if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
+			continue;
+
+		page = migrate_pfn_to_page(dst_pfns[i]);
+		dst_pfns[i] = MIGRATE_PFN_ERROR;
+		if (page == NULL)
+			continue;
+
+		__free_page(page);
+	}
+}
+
+void nouveau_dmem_fault_finalize_and_map(struct vm_area_struct *vma,
+					 const unsigned long *src_pfns,
+					 const unsigned long *dst_pfns,
+					 unsigned long start,
+					 unsigned long end,
+					 void *private)
+{
+	struct nouveau_dmem_fault *fault = private;
+	struct nouveau_drm *drm = fault->drm;
+
+	if (fault->fence) {
+		nouveau_fence_wait(fault->fence, true, false);
+		nouveau_fence_unref(&fault->fence);
+	} else {
+		/*
+		 * FIXME wait for channel to be IDLE before calling finalizing
+		 * the hmem object below (nouveau_migrate_hmem_fini()).
+		 */
+	}
+
+	while (fault->npages--) {
+		dma_unmap_page(drm->dev->dev, fault->dma[fault->npages],
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	}
+	kfree(fault->dma);
+}
+
+static const struct migrate_vma_ops nouveau_dmem_fault_migrate_ops = {
+	.alloc_and_copy		= nouveau_dmem_fault_alloc_and_copy,
+	.finalize_and_map	= nouveau_dmem_fault_finalize_and_map,
+};
+
+static int
+nouveau_dmem_fault(struct hmm_devmem *devmem,
+		   struct vm_area_struct *vma,
+		   unsigned long addr,
+		   const struct page *page,
+		   unsigned int flags,
+		   pmd_t *pmdp)
+{
+	struct drm_device *drm_dev = dev_get_drvdata(devmem->device);
+	unsigned long src[1] = {0}, dst[1] = {0};
+	struct nouveau_dmem_fault fault = {0};
+	int ret;
+
+
+
+	/*
+	 * FIXME what we really want is to find some heuristic to migrate more
+	 * than just one page on CPU fault. When such fault happens it is very
+	 * likely that more surrounding page will CPU fault too.
+	 */
+	fault.drm = nouveau_drm(drm_dev);
+	ret = migrate_vma(&nouveau_dmem_fault_migrate_ops, vma, addr,
+			  addr + PAGE_SIZE, src, dst, &fault);
+	if (ret)
+		return VM_FAULT_SIGBUS;
+
+	if (dst[0] == MIGRATE_PFN_ERROR)
+		return VM_FAULT_SIGBUS;
+
+	return 0;
+}
+
+static const struct hmm_devmem_ops
+nouveau_dmem_devmem_ops = {
+	.free = nouveau_dmem_free,
+	.fault = nouveau_dmem_fault,
+};
+
+static int
+nouveau_dmem_chunk_alloc(struct nouveau_drm *drm)
+{
+	struct nouveau_dmem_chunk *chunk;
+	int ret;
+
+	if (drm->dmem == NULL)
+		return -EINVAL;
+
+	mutex_lock(&drm->dmem->mutex);
+	chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
+					 struct nouveau_dmem_chunk,
+					 list);
+	if (chunk == NULL) {
+		mutex_unlock(&drm->dmem->mutex);
+		return -ENOMEM;
+	}
+
+	list_del(&chunk->list);
+	mutex_unlock(&drm->dmem->mutex);
+
+	ret = nouveau_bo_new(&drm->client, DMEM_CHUNK_SIZE, 0,
+			     TTM_PL_FLAG_VRAM, 0, 0, NULL, NULL,
+			     &chunk->bo);
+	if (ret)
+		goto out;
+
+	ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
+	if (ret) {
+		nouveau_bo_ref(NULL, &chunk->bo);
+		goto out;
+	}
+
+	bitmap_zero(chunk->bitmap, DMEM_CHUNK_NPAGES);
+	spin_lock_init(&chunk->lock);
+
+out:
+	mutex_lock(&drm->dmem->mutex);
+	if (chunk->bo)
+		list_add(&chunk->list, &drm->dmem->chunk_empty);
+	else
+		list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
+	mutex_unlock(&drm->dmem->mutex);
+
+	return ret;
+}
+
+static struct nouveau_dmem_chunk *
+nouveau_dmem_chunk_first_free_locked(struct nouveau_drm *drm)
+{
+	struct nouveau_dmem_chunk *chunk;
+
+	chunk = list_first_entry_or_null(&drm->dmem->chunk_free,
+					 struct nouveau_dmem_chunk,
+					 list);
+	if (chunk)
+		return chunk;
+
+	chunk = list_first_entry_or_null(&drm->dmem->chunk_empty,
+					 struct nouveau_dmem_chunk,
+					 list);
+	if (chunk->bo)
+		return chunk;
+
+	return NULL;
+}
+
+static int
+nouveau_dmem_pages_alloc(struct nouveau_drm *drm,
+			 unsigned long npages,
+			 unsigned long *pages)
+{
+	struct nouveau_dmem_chunk *chunk;
+	unsigned long c;
+	int ret;
+
+	memset(pages, 0xff, npages * sizeof(*pages));
+
+	mutex_lock(&drm->dmem->mutex);
+	for (c = 0; c < npages;) {
+		unsigned long i;
+
+		chunk = nouveau_dmem_chunk_first_free_locked(drm);
+		if (chunk == NULL) {
+			mutex_unlock(&drm->dmem->mutex);
+			ret = nouveau_dmem_chunk_alloc(drm);
+			if (ret) {
+				if (c)
+					break;
+				return ret;
+			}
+			continue;
+		}
+
+		spin_lock(&chunk->lock);
+		i = find_first_zero_bit(chunk->bitmap, DMEM_CHUNK_NPAGES);
+		while (i < DMEM_CHUNK_NPAGES && c < npages) {
+			pages[c] = chunk->pfn_first + i;
+			set_bit(i, chunk->bitmap);
+			chunk->callocated++;
+			c++;
+
+			i = find_next_zero_bit(chunk->bitmap,
+					DMEM_CHUNK_NPAGES, i);
+		}
+		spin_unlock(&chunk->lock);
+	}
+	mutex_unlock(&drm->dmem->mutex);
+
+	return 0;
+}
+
+static struct page *
+nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
+{
+	unsigned long pfns[1];
+	struct page *page;
+	int ret;
+
+	/* FIXME stop all the miss-match API ... */
+	ret = nouveau_dmem_pages_alloc(drm, 1, pfns);
+	if (ret)
+		return NULL;
+
+	page = pfn_to_page(pfns[0]);
+	get_page(page);
+	lock_page(page);
+	return page;
+}
+
+static void
+nouveau_dmem_page_free_locked(struct nouveau_drm *drm, struct page *page)
+{
+	unlock_page(page);
+	put_page(page);
+}
+
+void
+nouveau_dmem_resume(struct nouveau_drm *drm)
+{
+	struct nouveau_dmem_chunk *chunk;
+	int ret;
+
+	if (drm->dmem == NULL)
+		return;
+
+	mutex_lock(&drm->dmem->mutex);
+	list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
+		ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
+		/* FIXME handle pin failure */
+		WARN_ON(ret);
+	}
+	list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
+		ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
+		/* FIXME handle pin failure */
+		WARN_ON(ret);
+	}
+	list_for_each_entry (chunk, &drm->dmem->chunk_empty, list) {
+		ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false);
+		/* FIXME handle pin failure */
+		WARN_ON(ret);
+	}
+	mutex_unlock(&drm->dmem->mutex);
+}
+
+void
+nouveau_dmem_suspend(struct nouveau_drm *drm)
+{
+	struct nouveau_dmem_chunk *chunk;
+
+	if (drm->dmem == NULL)
+		return;
+
+	mutex_lock(&drm->dmem->mutex);
+	list_for_each_entry (chunk, &drm->dmem->chunk_free, list) {
+		nouveau_bo_unpin(chunk->bo);
+	}
+	list_for_each_entry (chunk, &drm->dmem->chunk_full, list) {
+		nouveau_bo_unpin(chunk->bo);
+	}
+	list_for_each_entry (chunk, &drm->dmem->chunk_empty, list) {
+		nouveau_bo_unpin(chunk->bo);
+	}
+	mutex_unlock(&drm->dmem->mutex);
+}
+
+void
+nouveau_dmem_fini(struct nouveau_drm *drm)
+{
+	struct nouveau_dmem_chunk *chunk, *tmp;
+
+	if (drm->dmem == NULL)
+		return;
+
+	mutex_lock(&drm->dmem->mutex);
+
+	WARN_ON(!list_empty(&drm->dmem->chunk_free));
+	WARN_ON(!list_empty(&drm->dmem->chunk_full));
+
+	list_for_each_entry_safe (chunk, tmp, &drm->dmem->chunk_empty, list) {
+		if (chunk->bo) {
+			nouveau_bo_unpin(chunk->bo);
+			nouveau_bo_ref(NULL, &chunk->bo);
+		}
+		list_del(&chunk->list);
+		kfree(chunk);
+	}
+
+	mutex_unlock(&drm->dmem->mutex);
+}
+
+static int
+nvc0b5_migrate_copy(struct nouveau_drm *drm, u64 npages,
+		    enum nouveau_aper dst_aper, u64 dst_addr,
+		    enum nouveau_aper src_aper, u64 src_addr)
+{
+	struct nouveau_channel *chan = drm->dmem->migrate.chan;
+	u32 launch_dma = (1 << 9) /* MULTI_LINE_ENABLE. */ |
+			 (1 << 8) /* DST_MEMORY_LAYOUT_PITCH. */ |
+			 (1 << 7) /* SRC_MEMORY_LAYOUT_PITCH. */ |
+			 (1 << 2) /* FLUSH_ENABLE_TRUE. */ |
+			 (2 << 0) /* DATA_TRANSFER_TYPE_NON_PIPELINED. */;
+	int ret;
+
+	ret = RING_SPACE(chan, 13);
+	if (ret)
+		return ret;
+
+	if (src_aper != NOUVEAU_APER_VIRT) {
+		switch (src_aper) {
+		case NOUVEAU_APER_VRAM:
+			BEGIN_IMC0(chan, NvSubCopy, 0x0260, 0);
+			break;
+		case NOUVEAU_APER_HOST:
+			BEGIN_IMC0(chan, NvSubCopy, 0x0260, 1);
+			break;
+		default:
+			return -EINVAL;
+		}
+		launch_dma |= 0x00001000; /* SRC_TYPE_PHYSICAL. */
+	}
+
+	if (dst_aper != NOUVEAU_APER_VIRT) {
+		switch (dst_aper) {
+		case NOUVEAU_APER_VRAM:
+			BEGIN_IMC0(chan, NvSubCopy, 0x0264, 0);
+			break;
+		case NOUVEAU_APER_HOST:
+			BEGIN_IMC0(chan, NvSubCopy, 0x0264, 1);
+			break;
+		default:
+			return -EINVAL;
+		}
+		launch_dma |= 0x00002000; /* DST_TYPE_PHYSICAL. */
+	}
+
+	BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
+	OUT_RING  (chan, upper_32_bits(src_addr));
+	OUT_RING  (chan, lower_32_bits(src_addr));
+	OUT_RING  (chan, upper_32_bits(dst_addr));
+	OUT_RING  (chan, lower_32_bits(dst_addr));
+	OUT_RING  (chan, PAGE_SIZE);
+	OUT_RING  (chan, PAGE_SIZE);
+	OUT_RING  (chan, PAGE_SIZE);
+	OUT_RING  (chan, npages);
+	BEGIN_NVC0(chan, NvSubCopy, 0x0300, 1);
+	OUT_RING  (chan, launch_dma);
+	return 0;
+}
+
+static int
+nouveau_dmem_migrate_init(struct nouveau_drm *drm)
+{
+	switch (drm->ttm.copy.oclass) {
+	case PASCAL_DMA_COPY_A:
+	case PASCAL_DMA_COPY_B:
+	case  VOLTA_DMA_COPY_A:
+	case TURING_DMA_COPY_A:
+		drm->dmem->migrate.copy_func = nvc0b5_migrate_copy;
+		drm->dmem->migrate.chan = drm->ttm.chan;
+		return 0;
+	default:
+		break;
+	}
+	return -ENODEV;
+}
+
+void
+nouveau_dmem_init(struct nouveau_drm *drm)
+{
+	struct device *device = drm->dev->dev;
+	unsigned long i, size;
+	int ret;
+
+	/* This only make sense on PASCAL or newer */
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_PASCAL)
+		return;
+
+	if (!(drm->dmem = kzalloc(sizeof(*drm->dmem), GFP_KERNEL)))
+		return;
+
+	mutex_init(&drm->dmem->mutex);
+	INIT_LIST_HEAD(&drm->dmem->chunk_free);
+	INIT_LIST_HEAD(&drm->dmem->chunk_full);
+	INIT_LIST_HEAD(&drm->dmem->chunk_empty);
+
+	size = ALIGN(drm->client.device.info.ram_user, DMEM_CHUNK_SIZE);
+
+	/* Initialize migration dma helpers before registering memory */
+	ret = nouveau_dmem_migrate_init(drm);
+	if (ret) {
+		kfree(drm->dmem);
+		drm->dmem = NULL;
+		return;
+	}
+
+	/*
+	 * FIXME we need some kind of policy to decide how much VRAM we
+	 * want to register with HMM. For now just register everything
+	 * and latter if we want to do thing like over commit then we
+	 * could revisit this.
+	 */
+	drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops,
+					   device, size);
+	if (drm->dmem->devmem == NULL) {
+		kfree(drm->dmem);
+		drm->dmem = NULL;
+		return;
+	}
+
+	for (i = 0; i < (size / DMEM_CHUNK_SIZE); ++i) {
+		struct nouveau_dmem_chunk *chunk;
+		struct page *page;
+		unsigned long j;
+
+		chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
+		if (chunk == NULL) {
+			nouveau_dmem_fini(drm);
+			return;
+		}
+
+		chunk->drm = drm;
+		chunk->pfn_first = drm->dmem->devmem->pfn_first;
+		chunk->pfn_first += (i * DMEM_CHUNK_NPAGES);
+		list_add_tail(&chunk->list, &drm->dmem->chunk_empty);
+
+		page = pfn_to_page(chunk->pfn_first);
+		for (j = 0; j < DMEM_CHUNK_NPAGES; ++j, ++page) {
+			hmm_devmem_page_set_drvdata(page, (long)chunk);
+		}
+	}
+
+	NV_INFO(drm, "DMEM: registered %ldMB of device memory\n", size >> 20);
+}
+
+static void
+nouveau_dmem_migrate_alloc_and_copy(struct vm_area_struct *vma,
+				    const unsigned long *src_pfns,
+				    unsigned long *dst_pfns,
+				    unsigned long start,
+				    unsigned long end,
+				    void *private)
+{
+	struct nouveau_migrate *migrate = private;
+	struct nouveau_drm *drm = migrate->drm;
+	struct device *dev = drm->dev->dev;
+	unsigned long addr, i, npages = 0;
+	nouveau_migrate_copy_t copy;
+	int ret;
+
+	/* First allocate new memory */
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
+		struct page *dpage, *spage;
+
+		dst_pfns[i] = 0;
+		spage = migrate_pfn_to_page(src_pfns[i]);
+		if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE))
+			continue;
+
+		dpage = nouveau_dmem_page_alloc_locked(drm);
+		if (!dpage)
+			continue;
+
+		dst_pfns[i] = migrate_pfn(page_to_pfn(dpage)) |
+			      MIGRATE_PFN_LOCKED |
+			      MIGRATE_PFN_DEVICE;
+		npages++;
+	}
+
+	if (!npages)
+		return;
+
+	/* Allocate storage for DMA addresses, so we can unmap later. */
+	migrate->dma = kmalloc(sizeof(*migrate->dma) * npages, GFP_KERNEL);
+	if (!migrate->dma)
+		goto error;
+
+	/* Copy things over */
+	copy = drm->dmem->migrate.copy_func;
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, i++) {
+		struct nouveau_dmem_chunk *chunk;
+		struct page *spage, *dpage;
+		u64 src_addr, dst_addr;
+
+		dpage = migrate_pfn_to_page(dst_pfns[i]);
+		if (!dpage || dst_pfns[i] == MIGRATE_PFN_ERROR)
+			continue;
+
+		chunk = (void *)hmm_devmem_page_get_drvdata(dpage);
+		dst_addr = page_to_pfn(dpage) - chunk->pfn_first;
+		dst_addr = (dst_addr << PAGE_SHIFT) + chunk->bo->bo.offset;
+
+		spage = migrate_pfn_to_page(src_pfns[i]);
+		if (!spage || !(src_pfns[i] & MIGRATE_PFN_MIGRATE)) {
+			nouveau_dmem_page_free_locked(drm, dpage);
+			dst_pfns[i] = 0;
+			continue;
+		}
+
+		migrate->dma[migrate->dma_nr] =
+			dma_map_page_attrs(dev, spage, 0, PAGE_SIZE,
+					   PCI_DMA_BIDIRECTIONAL,
+					   DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(dev, migrate->dma[migrate->dma_nr])) {
+			nouveau_dmem_page_free_locked(drm, dpage);
+			dst_pfns[i] = 0;
+			continue;
+		}
+
+		src_addr = migrate->dma[migrate->dma_nr++];
+
+		ret = copy(drm, 1, NOUVEAU_APER_VRAM, dst_addr,
+				   NOUVEAU_APER_HOST, src_addr);
+		if (ret) {
+			nouveau_dmem_page_free_locked(drm, dpage);
+			dst_pfns[i] = 0;
+			continue;
+		}
+	}
+
+	nouveau_fence_new(drm->dmem->migrate.chan, false, &migrate->fence);
+
+	return;
+
+error:
+	for (addr = start, i = 0; addr < end; addr += PAGE_SIZE, ++i) {
+		struct page *page;
+
+		if (!dst_pfns[i] || dst_pfns[i] == MIGRATE_PFN_ERROR)
+			continue;
+
+		page = migrate_pfn_to_page(dst_pfns[i]);
+		dst_pfns[i] = MIGRATE_PFN_ERROR;
+		if (page == NULL)
+			continue;
+
+		__free_page(page);
+	}
+}
+
+void nouveau_dmem_migrate_finalize_and_map(struct vm_area_struct *vma,
+					   const unsigned long *src_pfns,
+					   const unsigned long *dst_pfns,
+					   unsigned long start,
+					   unsigned long end,
+					   void *private)
+{
+	struct nouveau_migrate *migrate = private;
+	struct nouveau_drm *drm = migrate->drm;
+
+	if (migrate->fence) {
+		nouveau_fence_wait(migrate->fence, true, false);
+		nouveau_fence_unref(&migrate->fence);
+	} else {
+		/*
+		 * FIXME wait for channel to be IDLE before finalizing
+		 * the hmem object below (nouveau_migrate_hmem_fini()) ?
+		 */
+	}
+
+	while (migrate->dma_nr--) {
+		dma_unmap_page(drm->dev->dev, migrate->dma[migrate->dma_nr],
+			       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+	}
+	kfree(migrate->dma);
+
+	/*
+	 * FIXME optimization: update GPU page table to point to newly
+	 * migrated memory.
+	 */
+}
+
+static const struct migrate_vma_ops nouveau_dmem_migrate_ops = {
+	.alloc_and_copy		= nouveau_dmem_migrate_alloc_and_copy,
+	.finalize_and_map	= nouveau_dmem_migrate_finalize_and_map,
+};
+
+int
+nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+			 struct vm_area_struct *vma,
+			 unsigned long start,
+			 unsigned long end)
+{
+	unsigned long *src_pfns, *dst_pfns, npages;
+	struct nouveau_migrate migrate = {0};
+	unsigned long i, c, max;
+	int ret = 0;
+
+	npages = (end - start) >> PAGE_SHIFT;
+	max = min(SG_MAX_SINGLE_ALLOC, npages);
+	src_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
+	if (src_pfns == NULL)
+		return -ENOMEM;
+	dst_pfns = kzalloc(sizeof(long) * max, GFP_KERNEL);
+	if (dst_pfns == NULL) {
+		kfree(src_pfns);
+		return -ENOMEM;
+	}
+
+	migrate.drm = drm;
+	migrate.vma = vma;
+	migrate.npages = npages;
+	for (i = 0; i < npages; i += c) {
+		unsigned long next;
+
+		c = min(SG_MAX_SINGLE_ALLOC, npages);
+		next = start + (c << PAGE_SHIFT);
+		ret = migrate_vma(&nouveau_dmem_migrate_ops, vma, start,
+				  next, src_pfns, dst_pfns, &migrate);
+		if (ret)
+			goto out;
+		start = next;
+	}
+
+out:
+	kfree(dst_pfns);
+	kfree(src_pfns);
+	return ret;
+}
+
+static inline bool
+nouveau_dmem_page(struct nouveau_drm *drm, struct page *page)
+{
+	if (!is_device_private_page(page))
+		return false;
+
+	if (drm->dmem->devmem != page->pgmap->data)
+		return false;
+
+	return true;
+}
+
+void
+nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
+			 struct hmm_range *range)
+{
+	unsigned long i, npages;
+
+	npages = (range->end - range->start) >> PAGE_SHIFT;
+	for (i = 0; i < npages; ++i) {
+		struct nouveau_dmem_chunk *chunk;
+		struct page *page;
+		uint64_t addr;
+
+		page = hmm_pfn_to_page(range, range->pfns[i]);
+		if (page == NULL)
+			continue;
+
+		if (!(range->pfns[i] & range->flags[HMM_PFN_DEVICE_PRIVATE])) {
+			continue;
+		}
+
+		if (!nouveau_dmem_page(drm, page)) {
+			WARN(1, "Some unknown device memory !\n");
+			range->pfns[i] = 0;
+			continue;
+		}
+
+		chunk = (void *)hmm_devmem_page_get_drvdata(page);
+		addr = page_to_pfn(page) - chunk->pfn_first;
+		addr = (addr + chunk->bo->bo.mem.start) << PAGE_SHIFT;
+
+		range->pfns[i] &= ((1UL << range->pfn_shift) - 1);
+		range->pfns[i] |= (addr >> PAGE_SHIFT) << range->pfn_shift;
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.h b/drivers/gpu/drm/nouveau/nouveau_dmem.h
new file mode 100644
index 000000000000..9d97d756fb7d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NOUVEAU_DMEM_H__
+#define __NOUVEAU_DMEM_H__
+#include <nvif/os.h>
+struct drm_device;
+struct drm_file;
+struct nouveau_drm;
+struct hmm_range;
+
+#if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)
+void nouveau_dmem_init(struct nouveau_drm *);
+void nouveau_dmem_fini(struct nouveau_drm *);
+void nouveau_dmem_suspend(struct nouveau_drm *);
+void nouveau_dmem_resume(struct nouveau_drm *);
+
+int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+			     struct vm_area_struct *vma,
+			     unsigned long start,
+			     unsigned long end);
+
+void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
+			      struct hmm_range *range);
+#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
+static inline void nouveau_dmem_init(struct nouveau_drm *drm) {}
+static inline void nouveau_dmem_fini(struct nouveau_drm *drm) {}
+static inline void nouveau_dmem_suspend(struct nouveau_drm *drm) {}
+static inline void nouveau_dmem_resume(struct nouveau_drm *drm) {}
+
+static inline int nouveau_dmem_migrate_vma(struct nouveau_drm *drm,
+					   struct vm_area_struct *vma,
+					   unsigned long start,
+					   unsigned long end)
+{
+	return 0;
+}
+
+static inline void nouveau_dmem_convert_pfn(struct nouveau_drm *drm,
+					    struct hmm_range *range) {}
+#endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index f900e94592f8..5020265bfbd9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -44,7 +44,6 @@
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
 #include <nvif/cla06f.h>
-#include <nvif/if0004.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
@@ -63,6 +62,8 @@
 #include "nouveau_usif.h"
 #include "nouveau_connector.h"
 #include "nouveau_platform.h"
+#include "nouveau_svm.h"
+#include "nouveau_dmem.h"
 
 MODULE_PARM_DESC(config, "option string to pass to driver core");
 static char *nouveau_config;
@@ -173,6 +174,7 @@ nouveau_cli_fini(struct nouveau_cli *cli)
 	WARN_ON(!list_empty(&cli->worker));
 
 	usif_client_fini(cli);
+	nouveau_vmm_fini(&cli->svm);
 	nouveau_vmm_fini(&cli->vmm);
 	nvif_mmu_fini(&cli->mmu);
 	nvif_device_fini(&cli->device);
@@ -283,19 +285,134 @@ done:
 }
 
 static void
-nouveau_accel_fini(struct nouveau_drm *drm)
+nouveau_accel_ce_fini(struct nouveau_drm *drm)
+{
+	nouveau_channel_idle(drm->cechan);
+	nvif_object_fini(&drm->ttm.copy);
+	nouveau_channel_del(&drm->cechan);
+}
+
+static void
+nouveau_accel_ce_init(struct nouveau_drm *drm)
+{
+	struct nvif_device *device = &drm->client.device;
+	int ret = 0;
+
+	/* Allocate channel that has access to a (preferably async) copy
+	 * engine, to use for TTM buffer moves.
+	 */
+	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
+		ret = nouveau_channel_new(drm, device,
+					  nvif_fifo_runlist_ce(device), 0,
+					  true, &drm->cechan);
+	} else
+	if (device->info.chipset >= 0xa3 &&
+	    device->info.chipset != 0xaa &&
+	    device->info.chipset != 0xac) {
+		/* Prior to Kepler, there's only a single runlist, so all
+		 * engines can be accessed from any channel.
+		 *
+		 * We still want to use a separate channel though.
+		 */
+		ret = nouveau_channel_new(drm, device, NvDmaFB, NvDmaTT, false,
+					  &drm->cechan);
+	}
+
+	if (ret)
+		NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
+}
+
+static void
+nouveau_accel_gr_fini(struct nouveau_drm *drm)
 {
 	nouveau_channel_idle(drm->channel);
 	nvif_object_fini(&drm->ntfy);
 	nvkm_gpuobj_del(&drm->notify);
-	nvif_notify_fini(&drm->flip);
 	nvif_object_fini(&drm->nvsw);
 	nouveau_channel_del(&drm->channel);
+}
 
-	nouveau_channel_idle(drm->cechan);
-	nvif_object_fini(&drm->ttm.copy);
-	nouveau_channel_del(&drm->cechan);
+static void
+nouveau_accel_gr_init(struct nouveau_drm *drm)
+{
+	struct nvif_device *device = &drm->client.device;
+	u32 arg0, arg1;
+	int ret;
+
+	/* Allocate channel that has access to the graphics engine. */
+	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
+		arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR);
+		arg1 = 1;
+	} else {
+		arg0 = NvDmaFB;
+		arg1 = NvDmaTT;
+	}
 
+	ret = nouveau_channel_new(drm, device, arg0, arg1, false,
+				  &drm->channel);
+	if (ret) {
+		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
+		nouveau_accel_gr_fini(drm);
+		return;
+	}
+
+	/* A SW class is used on pre-NV50 HW to assist with handling the
+	 * synchronisation of page flips, as well as to implement fences
+	 * on TNT/TNT2 HW that lacks any kind of support in host.
+	 */
+	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
+		ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW,
+				       nouveau_abi16_swclass(drm), NULL, 0,
+				       &drm->nvsw);
+		if (ret == 0) {
+			ret = RING_SPACE(drm->channel, 2);
+			if (ret == 0) {
+				BEGIN_NV04(drm->channel, NvSubSw, 0, 1);
+				OUT_RING  (drm->channel, drm->nvsw.handle);
+			}
+		}
+
+		if (ret) {
+			NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
+			nouveau_accel_gr_fini(drm);
+			return;
+		}
+	}
+
+	/* NvMemoryToMemoryFormat requires a notifier ctxdma for some reason,
+	 * even if notification is never requested, so, allocate a ctxdma on
+	 * any GPU where it's possible we'll end up using M2MF for BO moves.
+	 */
+	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
+		ret = nvkm_gpuobj_new(nvxx_device(device), 32, 0, false, NULL,
+				      &drm->notify);
+		if (ret) {
+			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
+			nouveau_accel_gr_fini(drm);
+			return;
+		}
+
+		ret = nvif_object_init(&drm->channel->user, NvNotify0,
+				       NV_DMA_IN_MEMORY,
+				       &(struct nv_dma_v0) {
+						.target = NV_DMA_V0_TARGET_VRAM,
+						.access = NV_DMA_V0_ACCESS_RDWR,
+						.start = drm->notify->addr,
+						.limit = drm->notify->addr + 31
+				       }, sizeof(struct nv_dma_v0),
+				       &drm->ntfy);
+		if (ret) {
+			nouveau_accel_gr_fini(drm);
+			return;
+		}
+	}
+}
+
+static void
+nouveau_accel_fini(struct nouveau_drm *drm)
+{
+	nouveau_accel_ce_fini(drm);
+	nouveau_accel_gr_fini(drm);
 	if (drm->fence)
 		nouveau_fence(drm)->dtor(drm);
 }
@@ -305,23 +422,16 @@ nouveau_accel_init(struct nouveau_drm *drm)
 {
 	struct nvif_device *device = &drm->client.device;
 	struct nvif_sclass *sclass;
-	u32 arg0, arg1;
 	int ret, i, n;
 
 	if (nouveau_noaccel)
 		return;
 
+	/* Initialise global support for channels, and synchronisation. */
 	ret = nouveau_channels_init(drm);
 	if (ret)
 		return;
 
-	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) {
-		ret = nvif_user_init(device);
-		if (ret)
-			return;
-	}
-
-	/* initialise synchronisation routines */
 	/*XXX: this is crap, but the fence/channel stuff is a little
 	 *     backwards in some places.  this will be fixed.
 	 */
@@ -368,95 +478,18 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		return;
 	}
 
-	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		ret = nouveau_channel_new(drm, &drm->client.device,
-					  nvif_fifo_runlist_ce(device), 0,
-					  true, &drm->cechan);
-		if (ret)
-			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
-
-		arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR);
-		arg1 = 1;
-	} else
-	if (device->info.chipset >= 0xa3 &&
-	    device->info.chipset != 0xaa &&
-	    device->info.chipset != 0xac) {
-		ret = nouveau_channel_new(drm, &drm->client.device,
-					  NvDmaFB, NvDmaTT, false,
-					  &drm->cechan);
+	/* Volta requires access to a doorbell register for kickoff. */
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) {
+		ret = nvif_user_init(device);
 		if (ret)
-			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
-
-		arg0 = NvDmaFB;
-		arg1 = NvDmaTT;
-	} else {
-		arg0 = NvDmaFB;
-		arg1 = NvDmaTT;
-	}
-
-	ret = nouveau_channel_new(drm, &drm->client.device,
-				  arg0, arg1, false, &drm->channel);
-	if (ret) {
-		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
-		nouveau_accel_fini(drm);
-		return;
-	}
-
-	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
-		ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW,
-				       nouveau_abi16_swclass(drm), NULL, 0,
-				       &drm->nvsw);
-		if (ret == 0) {
-			ret = RING_SPACE(drm->channel, 2);
-			if (ret == 0) {
-				BEGIN_NV04(drm->channel, NvSubSw, 0, 1);
-				OUT_RING  (drm->channel, drm->nvsw.handle);
-			}
-
-			ret = nvif_notify_init(&drm->nvsw,
-					       nouveau_flip_complete,
-					       false, NV04_NVSW_NTFY_UEVENT,
-					       NULL, 0, 0, &drm->flip);
-			if (ret == 0)
-				ret = nvif_notify_get(&drm->flip);
-			if (ret) {
-				nouveau_accel_fini(drm);
-				return;
-			}
-		}
-
-		if (ret) {
-			NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
-			nouveau_accel_fini(drm);
-			return;
-		}
-	}
-
-	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
-		ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0,
-				      false, NULL, &drm->notify);
-		if (ret) {
-			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
-			nouveau_accel_fini(drm);
 			return;
-		}
-
-		ret = nvif_object_init(&drm->channel->user, NvNotify0,
-				       NV_DMA_IN_MEMORY,
-				       &(struct nv_dma_v0) {
-						.target = NV_DMA_V0_TARGET_VRAM,
-						.access = NV_DMA_V0_ACCESS_RDWR,
-						.start = drm->notify->addr,
-						.limit = drm->notify->addr + 31
-				       }, sizeof(struct nv_dma_v0),
-				       &drm->ntfy);
-		if (ret) {
-			nouveau_accel_fini(drm);
-			return;
-		}
 	}
 
+	/* Allocate channels we need to support various functions. */
+	nouveau_accel_gr_init(drm);
+	nouveau_accel_ce_init(drm);
 
+	/* Initialise accelerated TTM buffer moves. */
 	nouveau_bo_move_init(drm);
 }
 
@@ -504,19 +537,22 @@ nouveau_drm_device_init(struct drm_device *dev)
 	if (ret)
 		goto fail_bios;
 
+	nouveau_accel_init(drm);
+
 	ret = nouveau_display_create(dev);
 	if (ret)
 		goto fail_dispctor;
 
 	if (dev->mode_config.num_crtc) {
-		ret = nouveau_display_init(dev);
+		ret = nouveau_display_init(dev, false, false);
 		if (ret)
 			goto fail_dispinit;
 	}
 
 	nouveau_debugfs_init(drm);
 	nouveau_hwmon_init(dev);
-	nouveau_accel_init(drm);
+	nouveau_svm_init(drm);
+	nouveau_dmem_init(drm);
 	nouveau_fbcon_init(dev);
 	nouveau_led_init(dev);
 
@@ -534,6 +570,7 @@ nouveau_drm_device_init(struct drm_device *dev)
 fail_dispinit:
 	nouveau_display_destroy(dev);
 fail_dispctor:
+	nouveau_accel_fini(drm);
 	nouveau_bios_takedown(dev);
 fail_bios:
 	nouveau_ttm_fini(drm);
@@ -559,7 +596,8 @@ nouveau_drm_device_fini(struct drm_device *dev)
 
 	nouveau_led_fini(dev);
 	nouveau_fbcon_fini(dev);
-	nouveau_accel_fini(drm);
+	nouveau_dmem_fini(drm);
+	nouveau_svm_fini(drm);
 	nouveau_hwmon_fini(dev);
 	nouveau_debugfs_fini(drm);
 
@@ -567,6 +605,7 @@ nouveau_drm_device_fini(struct drm_device *dev)
 		nouveau_display_fini(dev, false, false);
 	nouveau_display_destroy(dev);
 
+	nouveau_accel_fini(drm);
 	nouveau_bios_takedown(dev);
 
 	nouveau_ttm_fini(drm);
@@ -704,6 +743,8 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	int ret;
 
+	nouveau_svm_suspend(drm);
+	nouveau_dmem_suspend(drm);
 	nouveau_led_suspend(dev);
 
 	if (dev->mode_config.num_crtc) {
@@ -780,7 +821,8 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
 	}
 
 	nouveau_led_resume(dev);
-
+	nouveau_dmem_resume(drm);
+	nouveau_svm_resume(drm);
 	return 0;
 }
 
@@ -1000,6 +1042,8 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index d20b9ba4b1c1..da847244479d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -96,6 +96,7 @@ struct nouveau_cli {
 	struct nvif_device device;
 	struct nvif_mmu mmu;
 	struct nouveau_vmm vmm;
+	struct nouveau_vmm svm;
 	const struct nvif_mclass *mem;
 
 	struct list_head head;
@@ -181,7 +182,6 @@ struct nouveau_drm {
 	struct nouveau_fbdev *fbcon;
 	struct nvif_object nvsw;
 	struct nvif_object ntfy;
-	struct nvif_notify flip;
 
 	/* nv10-nv40 tiling regions */
 	struct {
@@ -210,6 +210,10 @@ struct nouveau_drm {
 	bool have_disp_power_ref;
 
 	struct dev_pm_domain vga_pm_domain;
+
+	struct nouveau_svm *svm;
+
+	struct nouveau_dmem *dmem;
 };
 
 static inline struct nouveau_drm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index d275418edd24..0d3cd4e05728 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -353,7 +353,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	chan = nouveau_nofbaccel ? NULL : drm->channel;
 	if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_vma_new(nvbo, &drm->client.vmm, &fb->vma);
+		ret = nouveau_vma_new(nvbo, chan->vmm, &fb->vma);
 		if (ret) {
 			NV_ERROR(drm, "failed to map fb into chan: %d\n", ret);
 			chan = NULL;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index b999e6058046..ad27caeca0fd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -82,8 +82,6 @@ int nv50_fence_create(struct nouveau_drm *);
 int nv84_fence_create(struct nouveau_drm *);
 int nvc0_fence_create(struct nouveau_drm *);
 
-int nouveau_flip_complete(struct nvif_notify *);
-
 struct nv84_fence_chan {
 	struct nouveau_fence_chan base;
 	struct nouveau_vma *vma;
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index fb028e3b5f51..b4bda716564d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -68,10 +68,11 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct device *dev = drm->dev->dev;
+	struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
 	struct nouveau_vma *vma;
 	int ret;
 
-	if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50)
+	if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
 		return 0;
 
 	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
@@ -82,7 +83,7 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 	if (ret < 0 && ret != -EACCES)
 		goto out;
 
-	ret = nouveau_vma_new(nvbo, &cli->vmm, &vma);
+	ret = nouveau_vma_new(nvbo, vmm, &vma);
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
 out:
@@ -142,17 +143,18 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct device *dev = drm->dev->dev;
+	struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : & cli->vmm;
 	struct nouveau_vma *vma;
 	int ret;
 
-	if (cli->vmm.vmm.object.oclass < NVIF_CLASS_VMM_NV50)
+	if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
 		return;
 
 	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
 	if (ret)
 		return;
 
-	vma = nouveau_vma_find(nvbo, &cli->vmm);
+	vma = nouveau_vma_find(nvbo, vmm);
 	if (vma) {
 		if (--vma->refs == 0) {
 			ret = pm_runtime_get_sync(dev);
@@ -219,6 +221,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem,
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
+	struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
 	struct nouveau_vma *vma;
 
 	if (is_power_of_2(nvbo->valid_domains))
@@ -228,8 +231,8 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem,
 	else
 		rep->domain = NOUVEAU_GEM_DOMAIN_VRAM;
 	rep->offset = nvbo->bo.offset;
-	if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
-		vma = nouveau_vma_find(nvbo, &cli->vmm);
+	if (vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
+		vma = nouveau_vma_find(nvbo, vmm);
 		if (!vma)
 			return -EINVAL;
 
@@ -321,7 +324,8 @@ struct validate_op {
 };
 
 static void
-validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence,
+validate_fini_no_ticket(struct validate_op *op, struct nouveau_channel *chan,
+			struct nouveau_fence *fence,
 			struct drm_nouveau_gem_pushbuf_bo *pbbo)
 {
 	struct nouveau_bo *nvbo;
@@ -332,13 +336,11 @@ validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence,
 		b = &pbbo[nvbo->pbbo_index];
 
 		if (likely(fence)) {
-			struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-			struct nouveau_vma *vma;
-
 			nouveau_bo_fence(nvbo, fence, !!b->write_domains);
 
-			if (drm->client.vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
-				vma = (void *)(unsigned long)b->user_priv;
+			if (chan->vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
+				struct nouveau_vma *vma =
+					(void *)(unsigned long)b->user_priv;
 				nouveau_fence_unref(&vma->fence);
 				dma_fence_get(&fence->base);
 				vma->fence = fence;
@@ -358,10 +360,11 @@ validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence,
 }
 
 static void
-validate_fini(struct validate_op *op, struct nouveau_fence *fence,
+validate_fini(struct validate_op *op, struct nouveau_channel *chan,
+	      struct nouveau_fence *fence,
 	      struct drm_nouveau_gem_pushbuf_bo *pbbo)
 {
-	validate_fini_no_ticket(op, fence, pbbo);
+	validate_fini_no_ticket(op, chan, fence, pbbo);
 	ww_acquire_fini(&op->ticket);
 }
 
@@ -416,7 +419,7 @@ retry:
 			list_splice_tail_init(&vram_list, &op->list);
 			list_splice_tail_init(&gart_list, &op->list);
 			list_splice_tail_init(&both_list, &op->list);
-			validate_fini_no_ticket(op, NULL, NULL);
+			validate_fini_no_ticket(op, chan, NULL, NULL);
 			if (unlikely(ret == -EDEADLK)) {
 				ret = ttm_bo_reserve_slowpath(&nvbo->bo, true,
 							      &op->ticket);
@@ -430,8 +433,8 @@ retry:
 			}
 		}
 
-		if (cli->vmm.vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
-			struct nouveau_vmm *vmm = &cli->vmm;
+		if (chan->vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
+			struct nouveau_vmm *vmm = chan->vmm;
 			struct nouveau_vma *vma = nouveau_vma_find(nvbo, vmm);
 			if (!vma) {
 				NV_PRINTK(err, cli, "vma not found!\n");
@@ -471,7 +474,7 @@ retry:
 	list_splice_tail(&gart_list, &op->list);
 	list_splice_tail(&both_list, &op->list);
 	if (ret)
-		validate_fini(op, NULL, NULL);
+		validate_fini(op, chan, NULL, NULL);
 	return ret;
 
 }
@@ -563,7 +566,7 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 	if (unlikely(ret < 0)) {
 		if (ret != -ERESTARTSYS)
 			NV_PRINTK(err, cli, "validating bo list\n");
-		validate_fini(op, NULL, NULL);
+		validate_fini(op, chan, NULL, NULL);
 		return ret;
 	}
 	*apply_relocs = ret;
@@ -842,7 +845,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	}
 
 out:
-	validate_fini(&op, fence, bo);
+	validate_fini(&op, chan, fence, bo);
 	nouveau_fence_unref(&fence);
 
 out_prevalid:
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
new file mode 100644
index 000000000000..93ed43c413f0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -0,0 +1,835 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nouveau_svm.h"
+#include "nouveau_drv.h"
+#include "nouveau_chan.h"
+#include "nouveau_dmem.h"
+
+#include <nvif/notify.h>
+#include <nvif/object.h>
+#include <nvif/vmm.h>
+
+#include <nvif/class.h>
+#include <nvif/clb069.h>
+#include <nvif/ifc00d.h>
+
+#include <linux/sched/mm.h>
+#include <linux/sort.h>
+#include <linux/hmm.h>
+
+struct nouveau_svm {
+	struct nouveau_drm *drm;
+	struct mutex mutex;
+	struct list_head inst;
+
+	struct nouveau_svm_fault_buffer {
+		int id;
+		struct nvif_object object;
+		u32 entries;
+		u32 getaddr;
+		u32 putaddr;
+		u32 get;
+		u32 put;
+		struct nvif_notify notify;
+
+		struct nouveau_svm_fault {
+			u64 inst;
+			u64 addr;
+			u64 time;
+			u32 engine;
+			u8  gpc;
+			u8  hub;
+			u8  access;
+			u8  client;
+			u8  fault;
+			struct nouveau_svmm *svmm;
+		} **fault;
+		int fault_nr;
+	} buffer[1];
+};
+
+#define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
+#define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
+
+struct nouveau_ivmm {
+	struct nouveau_svmm *svmm;
+	u64 inst;
+	struct list_head head;
+};
+
+static struct nouveau_ivmm *
+nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
+{
+	struct nouveau_ivmm *ivmm;
+	list_for_each_entry(ivmm, &svm->inst, head) {
+		if (ivmm->inst == inst)
+			return ivmm;
+	}
+	return NULL;
+}
+
+struct nouveau_svmm {
+	struct nouveau_vmm *vmm;
+	struct {
+		unsigned long start;
+		unsigned long limit;
+	} unmanaged;
+
+	struct mutex mutex;
+
+	struct mm_struct *mm;
+	struct hmm_mirror mirror;
+};
+
+#define SVMM_DBG(s,f,a...)                                                     \
+	NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
+#define SVMM_ERR(s,f,a...)                                                     \
+	NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
+
+int
+nouveau_svmm_bind(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct drm_nouveau_svm_bind *args = data;
+	unsigned target, cmd, priority;
+	unsigned long addr, end, size;
+	struct mm_struct *mm;
+
+	args->va_start &= PAGE_MASK;
+	args->va_end &= PAGE_MASK;
+
+	/* Sanity check arguments */
+	if (args->reserved0 || args->reserved1)
+		return -EINVAL;
+	if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
+		return -EINVAL;
+	if (args->va_start >= args->va_end)
+		return -EINVAL;
+	if (!args->npages)
+		return -EINVAL;
+
+	cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
+	cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
+	switch (cmd) {
+	case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
+	priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
+
+	/* FIXME support CPU target ie all target value < GPU_VRAM */
+	target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
+	target &= NOUVEAU_SVM_BIND_TARGET_MASK;
+	switch (target) {
+	case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * FIXME: For now refuse non 0 stride, we need to change the migrate
+	 * kernel function to handle stride to avoid to create a mess within
+	 * each device driver.
+	 */
+	if (args->stride)
+		return -EINVAL;
+
+	size = ((unsigned long)args->npages) << PAGE_SHIFT;
+	if ((args->va_start + size) <= args->va_start)
+		return -EINVAL;
+	if ((args->va_start + size) > args->va_end)
+		return -EINVAL;
+
+	/*
+	 * Ok we are ask to do something sane, for now we only support migrate
+	 * commands but we will add things like memory policy (what to do on
+	 * page fault) and maybe some other commands.
+	 */
+
+	mm = get_task_mm(current);
+	down_read(&mm->mmap_sem);
+
+	for (addr = args->va_start, end = args->va_start + size; addr < end;) {
+		struct vm_area_struct *vma;
+		unsigned long next;
+
+		vma = find_vma_intersection(mm, addr, end);
+		if (!vma)
+			break;
+
+		next = min(vma->vm_end, end);
+		/* This is a best effort so we ignore errors */
+		nouveau_dmem_migrate_vma(cli->drm, vma, addr, next);
+		addr = next;
+	}
+
+	/*
+	 * FIXME Return the number of page we have migrated, again we need to
+	 * update the migrate API to return that information so that we can
+	 * report it to user space.
+	 */
+	args->result = 0;
+
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+
+	return 0;
+}
+
+/* Unlink channel instance from SVMM. */
+void
+nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
+{
+	struct nouveau_ivmm *ivmm;
+	if (svmm) {
+		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
+		ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst);
+		if (ivmm) {
+			list_del(&ivmm->head);
+			kfree(ivmm);
+		}
+		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
+	}
+}
+
+/* Link channel instance to SVMM. */
+int
+nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
+{
+	struct nouveau_ivmm *ivmm;
+	if (svmm) {
+		if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL)))
+			return -ENOMEM;
+		ivmm->svmm = svmm;
+		ivmm->inst = inst;
+
+		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
+		list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst);
+		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
+	}
+	return 0;
+}
+
+/* Invalidate SVMM address-range on GPU. */
+static void
+nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
+{
+	if (limit > start) {
+		bool super = svmm->vmm->vmm.object.client->super;
+		svmm->vmm->vmm.object.client->super = true;
+		nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
+				 &(struct nvif_vmm_pfnclr_v0) {
+					.addr = start,
+					.size = limit - start,
+				 }, sizeof(struct nvif_vmm_pfnclr_v0));
+		svmm->vmm->vmm.object.client->super = super;
+	}
+}
+
+static int
+nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
+					const struct hmm_update *update)
+{
+	struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
+	unsigned long start = update->start;
+	unsigned long limit = update->end;
+
+	if (!update->blockable)
+		return -EAGAIN;
+
+	SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
+
+	mutex_lock(&svmm->mutex);
+	if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
+		if (start < svmm->unmanaged.start) {
+			nouveau_svmm_invalidate(svmm, start,
+						svmm->unmanaged.limit);
+		}
+		start = svmm->unmanaged.limit;
+	}
+
+	nouveau_svmm_invalidate(svmm, start, limit);
+	mutex_unlock(&svmm->mutex);
+	return 0;
+}
+
+static void
+nouveau_svmm_release(struct hmm_mirror *mirror)
+{
+}
+
+static const struct hmm_mirror_ops
+nouveau_svmm = {
+	.sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
+	.release = nouveau_svmm_release,
+};
+
+void
+nouveau_svmm_fini(struct nouveau_svmm **psvmm)
+{
+	struct nouveau_svmm *svmm = *psvmm;
+	if (svmm) {
+		hmm_mirror_unregister(&svmm->mirror);
+		kfree(*psvmm);
+		*psvmm = NULL;
+	}
+}
+
+int
+nouveau_svmm_init(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_svmm *svmm;
+	struct drm_nouveau_svm_init *args = data;
+	int ret;
+
+	/* Allocate tracking for SVM-enabled VMM. */
+	if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL)))
+		return -ENOMEM;
+	svmm->vmm = &cli->svm;
+	svmm->unmanaged.start = args->unmanaged_addr;
+	svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
+	mutex_init(&svmm->mutex);
+
+	/* Check that SVM isn't already enabled for the client. */
+	mutex_lock(&cli->mutex);
+	if (cli->svm.cli) {
+		ret = -EBUSY;
+		goto done;
+	}
+
+	/* Allocate a new GPU VMM that can support SVM (managed by the
+	 * client, with replayable faults enabled).
+	 *
+	 * All future channel/memory allocations will make use of this
+	 * VMM instead of the standard one.
+	 */
+	ret = nvif_vmm_init(&cli->mmu, cli->vmm.vmm.object.oclass, true,
+			    args->unmanaged_addr, args->unmanaged_size,
+			    &(struct gp100_vmm_v0) {
+				.fault_replay = true,
+			    }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
+	if (ret)
+		goto done;
+
+	/* Enable HMM mirroring of CPU address-space to VMM. */
+	svmm->mm = get_task_mm(current);
+	down_write(&svmm->mm->mmap_sem);
+	svmm->mirror.ops = &nouveau_svmm;
+	ret = hmm_mirror_register(&svmm->mirror, svmm->mm);
+	if (ret == 0) {
+		cli->svm.svmm = svmm;
+		cli->svm.cli = cli;
+	}
+	up_write(&svmm->mm->mmap_sem);
+	mmput(svmm->mm);
+
+done:
+	if (ret)
+		nouveau_svmm_fini(&svmm);
+	mutex_unlock(&cli->mutex);
+	return ret;
+}
+
+static const u64
+nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = {
+	[HMM_PFN_VALID         ] = NVIF_VMM_PFNMAP_V0_V,
+	[HMM_PFN_WRITE         ] = NVIF_VMM_PFNMAP_V0_W,
+	[HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM,
+};
+
+static const u64
+nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = {
+	[HMM_PFN_ERROR  ] = ~NVIF_VMM_PFNMAP_V0_V,
+	[HMM_PFN_NONE   ] =  NVIF_VMM_PFNMAP_V0_NONE,
+	[HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V,
+};
+
+/* Issue fault replay for GPU to retry accesses that faulted previously. */
+static void
+nouveau_svm_fault_replay(struct nouveau_svm *svm)
+{
+	SVM_DBG(svm, "replay");
+	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
+				 GP100_VMM_VN_FAULT_REPLAY,
+				 &(struct gp100_vmm_fault_replay_vn) {},
+				 sizeof(struct gp100_vmm_fault_replay_vn)));
+}
+
+/* Cancel a replayable fault that could not be handled.
+ *
+ * Cancelling the fault will trigger recovery to reset the engine
+ * and kill the offending channel (ie. GPU SIGSEGV).
+ */
+static void
+nouveau_svm_fault_cancel(struct nouveau_svm *svm,
+			 u64 inst, u8 hub, u8 gpc, u8 client)
+{
+	SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
+	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
+				 GP100_VMM_VN_FAULT_CANCEL,
+				 &(struct gp100_vmm_fault_cancel_v0) {
+					.hub = hub,
+					.gpc = gpc,
+					.client = client,
+					.inst = inst,
+				 }, sizeof(struct gp100_vmm_fault_cancel_v0)));
+}
+
+static void
+nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
+			       struct nouveau_svm_fault *fault)
+{
+	nouveau_svm_fault_cancel(svm, fault->inst,
+				      fault->hub,
+				      fault->gpc,
+				      fault->client);
+}
+
+static int
+nouveau_svm_fault_cmp(const void *a, const void *b)
+{
+	const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a;
+	const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b;
+	int ret;
+	if ((ret = (s64)fa->inst - fb->inst))
+		return ret;
+	if ((ret = (s64)fa->addr - fb->addr))
+		return ret;
+	/*XXX: atomic? */
+	return (fa->access == 0 || fa->access == 3) -
+	       (fb->access == 0 || fb->access == 3);
+}
+
+static void
+nouveau_svm_fault_cache(struct nouveau_svm *svm,
+			struct nouveau_svm_fault_buffer *buffer, u32 offset)
+{
+	struct nvif_object *memory = &buffer->object;
+	const u32 instlo = nvif_rd32(memory, offset + 0x00);
+	const u32 insthi = nvif_rd32(memory, offset + 0x04);
+	const u32 addrlo = nvif_rd32(memory, offset + 0x08);
+	const u32 addrhi = nvif_rd32(memory, offset + 0x0c);
+	const u32 timelo = nvif_rd32(memory, offset + 0x10);
+	const u32 timehi = nvif_rd32(memory, offset + 0x14);
+	const u32 engine = nvif_rd32(memory, offset + 0x18);
+	const u32   info = nvif_rd32(memory, offset + 0x1c);
+	const u64   inst = (u64)insthi << 32 | instlo;
+	const u8     gpc = (info & 0x1f000000) >> 24;
+	const u8     hub = (info & 0x00100000) >> 20;
+	const u8  client = (info & 0x00007f00) >> 8;
+	struct nouveau_svm_fault *fault;
+
+	//XXX: i think we're supposed to spin waiting */
+	if (WARN_ON(!(info & 0x80000000)))
+		return;
+
+	nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000);
+
+	if (!buffer->fault[buffer->fault_nr]) {
+		fault = kmalloc(sizeof(*fault), GFP_KERNEL);
+		if (WARN_ON(!fault)) {
+			nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
+			return;
+		}
+		buffer->fault[buffer->fault_nr] = fault;
+	}
+
+	fault = buffer->fault[buffer->fault_nr++];
+	fault->inst   = inst;
+	fault->addr   = (u64)addrhi << 32 | addrlo;
+	fault->time   = (u64)timehi << 32 | timelo;
+	fault->engine = engine;
+	fault->gpc    = gpc;
+	fault->hub    = hub;
+	fault->access = (info & 0x000f0000) >> 16;
+	fault->client = client;
+	fault->fault  = (info & 0x0000001f);
+
+	SVM_DBG(svm, "fault %016llx %016llx %02x",
+		fault->inst, fault->addr, fault->access);
+}
+
+static int
+nouveau_svm_fault(struct nvif_notify *notify)
+{
+	struct nouveau_svm_fault_buffer *buffer =
+		container_of(notify, typeof(*buffer), notify);
+	struct nouveau_svm *svm =
+		container_of(buffer, typeof(*svm), buffer[buffer->id]);
+	struct nvif_object *device = &svm->drm->client.device.object;
+	struct nouveau_svmm *svmm;
+	struct {
+		struct {
+			struct nvif_ioctl_v0 i;
+			struct nvif_ioctl_mthd_v0 m;
+			struct nvif_vmm_pfnmap_v0 p;
+		} i;
+		u64 phys[16];
+	} args;
+	struct hmm_range range;
+	struct vm_area_struct *vma;
+	u64 inst, start, limit;
+	int fi, fn, pi, fill;
+	int replay = 0, ret;
+
+	/* Parse available fault buffer entries into a cache, and update
+	 * the GET pointer so HW can reuse the entries.
+	 */
+	SVM_DBG(svm, "fault handler");
+	if (buffer->get == buffer->put) {
+		buffer->put = nvif_rd32(device, buffer->putaddr);
+		buffer->get = nvif_rd32(device, buffer->getaddr);
+		if (buffer->get == buffer->put)
+			return NVIF_NOTIFY_KEEP;
+	}
+	buffer->fault_nr = 0;
+
+	SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
+	while (buffer->get != buffer->put) {
+		nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20);
+		if (++buffer->get == buffer->entries)
+			buffer->get = 0;
+	}
+	nvif_wr32(device, buffer->getaddr, buffer->get);
+	SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
+
+	/* Sort parsed faults by instance pointer to prevent unnecessary
+	 * instance to SVMM translations, followed by address and access
+	 * type to reduce the amount of work when handling the faults.
+	 */
+	sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault),
+	     nouveau_svm_fault_cmp, NULL);
+
+	/* Lookup SVMM structure for each unique instance pointer. */
+	mutex_lock(&svm->mutex);
+	for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) {
+		if (!svmm || buffer->fault[fi]->inst != inst) {
+			struct nouveau_ivmm *ivmm =
+				nouveau_ivmm_find(svm, buffer->fault[fi]->inst);
+			svmm = ivmm ? ivmm->svmm : NULL;
+			inst = buffer->fault[fi]->inst;
+			SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
+		}
+		buffer->fault[fi]->svmm = svmm;
+	}
+	mutex_unlock(&svm->mutex);
+
+	/* Process list of faults. */
+	args.i.i.version = 0;
+	args.i.i.type = NVIF_IOCTL_V0_MTHD;
+	args.i.m.version = 0;
+	args.i.m.method = NVIF_VMM_V0_PFNMAP;
+	args.i.p.version = 0;
+
+	for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
+		/* Cancel any faults from non-SVM channels. */
+		if (!(svmm = buffer->fault[fi]->svmm)) {
+			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+			continue;
+		}
+		SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
+
+		/* We try and group handling of faults within a small
+		 * window into a single update.
+		 */
+		start = buffer->fault[fi]->addr;
+		limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT);
+		if (start < svmm->unmanaged.limit)
+			limit = min_t(u64, limit, svmm->unmanaged.start);
+		else
+		if (limit > svmm->unmanaged.start)
+			start = max_t(u64, start, svmm->unmanaged.limit);
+		SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+
+		/* Intersect fault window with the CPU VMA, cancelling
+		 * the fault if the address is invalid.
+		 */
+		down_read(&svmm->mm->mmap_sem);
+		vma = find_vma_intersection(svmm->mm, start, limit);
+		if (!vma) {
+			SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
+			up_read(&svmm->mm->mmap_sem);
+			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+			continue;
+		}
+		start = max_t(u64, start, vma->vm_start);
+		limit = min_t(u64, limit, vma->vm_end);
+		SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+
+		if (buffer->fault[fi]->addr != start) {
+			SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
+			up_read(&svmm->mm->mmap_sem);
+			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+			continue;
+		}
+
+		/* Prepare the GPU-side update of all pages within the
+		 * fault window, determining required pages and access
+		 * permissions based on pending faults.
+		 */
+		args.i.p.page = PAGE_SHIFT;
+		args.i.p.addr = start;
+		for (fn = fi, pi = 0;;) {
+			/* Determine required permissions based on GPU fault
+			 * access flags.
+			 *XXX: atomic?
+			 */
+			if (buffer->fault[fn]->access != 0 /* READ. */ &&
+			    buffer->fault[fn]->access != 3 /* PREFETCH. */) {
+				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V |
+						  NVIF_VMM_PFNMAP_V0_W;
+			} else {
+				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V;
+			}
+			args.i.p.size = pi << PAGE_SHIFT;
+
+			/* It's okay to skip over duplicate addresses from the
+			 * same SVMM as faults are ordered by access type such
+			 * that only the first one needs to be handled.
+			 *
+			 * ie. WRITE faults appear first, thus any handling of
+			 * pending READ faults will already be satisfied.
+			 */
+			while (++fn < buffer->fault_nr &&
+			       buffer->fault[fn]->svmm == svmm &&
+			       buffer->fault[fn    ]->addr ==
+			       buffer->fault[fn - 1]->addr);
+
+			/* If the next fault is outside the window, or all GPU
+			 * faults have been dealt with, we're done here.
+			 */
+			if (fn >= buffer->fault_nr ||
+			    buffer->fault[fn]->svmm != svmm ||
+			    buffer->fault[fn]->addr >= limit)
+				break;
+
+			/* Fill in the gap between this fault and the next. */
+			fill = (buffer->fault[fn    ]->addr -
+				buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
+			while (--fill)
+				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE;
+		}
+
+		SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
+			 args.i.p.addr,
+			 args.i.p.addr + args.i.p.size, fn - fi);
+
+		/* Have HMM fault pages within the fault window to the GPU. */
+		range.vma = vma;
+		range.start = args.i.p.addr;
+		range.end = args.i.p.addr + args.i.p.size;
+		range.pfns = args.phys;
+		range.flags = nouveau_svm_pfn_flags;
+		range.values = nouveau_svm_pfn_values;
+		range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
+again:
+		ret = hmm_vma_fault(&range, true);
+		if (ret == 0) {
+			mutex_lock(&svmm->mutex);
+			if (!hmm_vma_range_done(&range)) {
+				mutex_unlock(&svmm->mutex);
+				goto again;
+			}
+
+			nouveau_dmem_convert_pfn(svm->drm, &range);
+
+			svmm->vmm->vmm.object.client->super = true;
+			ret = nvif_object_ioctl(&svmm->vmm->vmm.object,
+						&args, sizeof(args.i) +
+						pi * sizeof(args.phys[0]),
+						NULL);
+			svmm->vmm->vmm.object.client->super = false;
+			mutex_unlock(&svmm->mutex);
+		}
+		up_read(&svmm->mm->mmap_sem);
+
+		/* Cancel any faults in the window whose pages didn't manage
+		 * to keep their valid bit, or stay writeable when required.
+		 *
+		 * If handling failed completely, cancel all faults.
+		 */
+		while (fi < fn) {
+			struct nouveau_svm_fault *fault = buffer->fault[fi++];
+			pi = (fault->addr - range.start) >> PAGE_SHIFT;
+			if (ret ||
+			     !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) ||
+			    (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) &&
+			     fault->access != 0 && fault->access != 3)) {
+				nouveau_svm_fault_cancel_fault(svm, fault);
+				continue;
+			}
+			replay++;
+		}
+	}
+
+	/* Issue fault replay to the GPU. */
+	if (replay)
+		nouveau_svm_fault_replay(svm);
+	return NVIF_NOTIFY_KEEP;
+}
+
+static void
+nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	nvif_notify_put(&buffer->notify);
+}
+
+static int
+nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	struct nvif_object *device = &svm->drm->client.device.object;
+	buffer->get = nvif_rd32(device, buffer->getaddr);
+	buffer->put = nvif_rd32(device, buffer->putaddr);
+	SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
+	return nvif_notify_get(&buffer->notify);
+}
+
+static void
+nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	int i;
+
+	if (buffer->fault) {
+		for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
+			kfree(buffer->fault[i]);
+		kvfree(buffer->fault);
+	}
+
+	nouveau_svm_fault_buffer_fini(svm, id);
+
+	nvif_notify_fini(&buffer->notify);
+	nvif_object_fini(&buffer->object);
+}
+
+static int
+nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	struct nouveau_drm *drm = svm->drm;
+	struct nvif_object *device = &drm->client.device.object;
+	struct nvif_clb069_v0 args = {};
+	int ret;
+
+	buffer->id = id;
+
+	ret = nvif_object_init(device, 0, oclass, &args, sizeof(args),
+			       &buffer->object);
+	if (ret < 0) {
+		SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
+		return ret;
+	}
+
+	nvif_object_map(&buffer->object, NULL, 0);
+	buffer->entries = args.entries;
+	buffer->getaddr = args.get;
+	buffer->putaddr = args.put;
+
+	ret = nvif_notify_init(&buffer->object, nouveau_svm_fault, true,
+			       NVB069_V0_NTFY_FAULT, NULL, 0, 0,
+			       &buffer->notify);
+	if (ret)
+		return ret;
+
+	buffer->fault = kvzalloc(sizeof(*buffer->fault) * buffer->entries, GFP_KERNEL);
+	if (!buffer->fault)
+		return -ENOMEM;
+
+	return nouveau_svm_fault_buffer_init(svm, id);
+}
+
+void
+nouveau_svm_resume(struct nouveau_drm *drm)
+{
+	struct nouveau_svm *svm = drm->svm;
+	if (svm)
+		nouveau_svm_fault_buffer_init(svm, 0);
+}
+
+void
+nouveau_svm_suspend(struct nouveau_drm *drm)
+{
+	struct nouveau_svm *svm = drm->svm;
+	if (svm)
+		nouveau_svm_fault_buffer_fini(svm, 0);
+}
+
+void
+nouveau_svm_fini(struct nouveau_drm *drm)
+{
+	struct nouveau_svm *svm = drm->svm;
+	if (svm) {
+		nouveau_svm_fault_buffer_dtor(svm, 0);
+		kfree(drm->svm);
+		drm->svm = NULL;
+	}
+}
+
+void
+nouveau_svm_init(struct nouveau_drm *drm)
+{
+	static const struct nvif_mclass buffers[] = {
+		{   VOLTA_FAULT_BUFFER_A, 0 },
+		{ MAXWELL_FAULT_BUFFER_A, 0 },
+		{}
+	};
+	struct nouveau_svm *svm;
+	int ret;
+
+	/* Disable on Volta and newer until channel recovery is fixed,
+	 * otherwise clients will have a trivial way to trash the GPU
+	 * for everyone.
+	 */
+	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
+		return;
+
+	if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL)))
+		return;
+
+	drm->svm->drm = drm;
+	mutex_init(&drm->svm->mutex);
+	INIT_LIST_HEAD(&drm->svm->inst);
+
+	ret = nvif_mclass(&drm->client.device.object, buffers);
+	if (ret < 0) {
+		SVM_DBG(svm, "No supported fault buffer class");
+		nouveau_svm_fini(drm);
+		return;
+	}
+
+	ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0);
+	if (ret) {
+		nouveau_svm_fini(drm);
+		return;
+	}
+
+	SVM_DBG(svm, "Initialised");
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h
new file mode 100644
index 000000000000..e839d8189461
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.h
@@ -0,0 +1,48 @@
+#ifndef __NOUVEAU_SVM_H__
+#define __NOUVEAU_SVM_H__
+#include <nvif/os.h>
+struct drm_device;
+struct drm_file;
+struct nouveau_drm;
+
+struct nouveau_svmm;
+
+#if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)
+void nouveau_svm_init(struct nouveau_drm *);
+void nouveau_svm_fini(struct nouveau_drm *);
+void nouveau_svm_suspend(struct nouveau_drm *);
+void nouveau_svm_resume(struct nouveau_drm *);
+
+int nouveau_svmm_init(struct drm_device *, void *, struct drm_file *);
+void nouveau_svmm_fini(struct nouveau_svmm **);
+int nouveau_svmm_join(struct nouveau_svmm *, u64 inst);
+void nouveau_svmm_part(struct nouveau_svmm *, u64 inst);
+int nouveau_svmm_bind(struct drm_device *, void *, struct drm_file *);
+#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
+static inline void nouveau_svm_init(struct nouveau_drm *drm) {}
+static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}
+static inline void nouveau_svm_suspend(struct nouveau_drm *drm) {}
+static inline void nouveau_svm_resume(struct nouveau_drm *drm) {}
+
+static inline int nouveau_svmm_init(struct drm_device *device, void *p,
+				    struct drm_file *file)
+{
+	return -ENOSYS;
+}
+
+static inline void nouveau_svmm_fini(struct nouveau_svmm **svmmp) {}
+
+static inline int nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
+{
+	return 0;
+}
+
+static inline void nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) {}
+
+static inline int nouveau_svmm_bind(struct drm_device *device, void *p,
+				    struct drm_file *file)
+{
+	return -ENOSYS;
+}
+#endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 2032c3e4f6e5..77061182a1cf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -22,6 +22,7 @@
 #include "nouveau_vmm.h"
 #include "nouveau_drv.h"
 #include "nouveau_bo.h"
+#include "nouveau_svm.h"
 #include "nouveau_mem.h"
 
 void
@@ -119,6 +120,7 @@ done:
 void
 nouveau_vmm_fini(struct nouveau_vmm *vmm)
 {
+	nouveau_svmm_fini(&vmm->svmm);
 	nvif_vmm_fini(&vmm->vmm);
 	vmm->cli = NULL;
 }
@@ -126,7 +128,7 @@ nouveau_vmm_fini(struct nouveau_vmm *vmm)
 int
 nouveau_vmm_init(struct nouveau_cli *cli, s32 oclass, struct nouveau_vmm *vmm)
 {
-	int ret = nvif_vmm_init(&cli->mmu, oclass, PAGE_SIZE, 0, NULL, 0,
+	int ret = nvif_vmm_init(&cli->mmu, oclass, false, PAGE_SIZE, 0, NULL, 0,
 				&vmm->vmm);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h
index ede872f6f668..2b98d975f37e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h
@@ -25,6 +25,7 @@ void nouveau_vma_unmap(struct nouveau_vma *);
 struct nouveau_vmm {
 	struct nouveau_cli *cli;
 	struct nvif_vmm vmm;
+	struct nouveau_svmm *svmm;
 };
 
 int nouveau_vmm_init(struct nouveau_cli *, s32 oclass, struct nouveau_vmm *);
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index e721bb2163a0..f07da00f285f 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -109,7 +109,6 @@ nv84_fence_context_del(struct nouveau_channel *chan)
 int
 nv84_fence_context_new(struct nouveau_channel *chan)
 {
-	struct nouveau_cli *cli = (void *)chan->user.client;
 	struct nv84_fence_priv *priv = chan->drm->fence;
 	struct nv84_fence_chan *fctx;
 	int ret;
@@ -127,7 +126,7 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	fctx->base.sequence = nv84_fence_read(chan);
 
 	mutex_lock(&priv->mutex);
-	ret = nouveau_vma_new(priv->bo, &cli->vmm, &fctx->vma);
+	ret = nouveau_vma_new(priv->bo, chan->vmm, &fctx->vma);
 	mutex_unlock(&priv->mutex);
 
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c
index ef97dd223a32..61638b3b9d3d 100644
--- a/drivers/gpu/drm/nouveau/nvif/disp.c
+++ b/drivers/gpu/drm/nouveau/nvif/disp.c
@@ -34,7 +34,7 @@ int
 nvif_disp_ctor(struct nvif_device *device, s32 oclass, struct nvif_disp *disp)
 {
 	static const struct nvif_mclass disps[] = {
-		{ TU104_DISP, -1 },
+		{ TU102_DISP, -1 },
 		{ GV100_DISP, -1 },
 		{ GP102_DISP, -1 },
 		{ GP100_DISP, -1 },
diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c
index 6b9c5776547f..11487c00b909 100644
--- a/drivers/gpu/drm/nouveau/nvif/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvif/vmm.c
@@ -112,8 +112,8 @@ nvif_vmm_fini(struct nvif_vmm *vmm)
 }
 
 int
-nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, u64 addr, u64 size,
-	      void *argv, u32 argc, struct nvif_vmm *vmm)
+nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, bool managed, u64 addr,
+	      u64 size, void *argv, u32 argc, struct nvif_vmm *vmm)
 {
 	struct nvif_vmm_v0 *args;
 	u32 argn = sizeof(*args) + argc;
@@ -125,6 +125,7 @@ nvif_vmm_init(struct nvif_mmu *mmu, s32 oclass, u64 addr, u64 size,
 	if (!(args = kmalloc(argn, GFP_KERNEL)))
 		return -ENOMEM;
 	args->version = 0;
+	args->managed = managed;
 	args->addr = addr;
 	args->size = size;
 	memcpy(args->data, argv, argc);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index c61b467cf45e..245990de1e90 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -39,6 +39,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_SUBDEV_FB      ] = "fb",
 	[NVKM_SUBDEV_FUSE    ] = "fuse",
 	[NVKM_SUBDEV_GPIO    ] = "gpio",
+	[NVKM_SUBDEV_GSP     ] = "gsp",
 	[NVKM_SUBDEV_I2C     ] = "i2c",
 	[NVKM_SUBDEV_IBUS    ] = "priv",
 	[NVKM_SUBDEV_ICCSENSE] = "iccsense",
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index 177a23301d6a..9211663239af 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -6,4 +6,4 @@ nvkm-y += nvkm/engine/ce/gm200.o
 nvkm-y += nvkm/engine/ce/gp100.o
 nvkm-y += nvkm/engine/ce/gp102.o
 nvkm-y += nvkm/engine/ce/gv100.o
-nvkm-y += nvkm/engine/ce/tu104.o
+nvkm-y += nvkm/engine/ce/tu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c
index 3c25043bbb33..b4308e2d8c75 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu102.c
@@ -24,7 +24,7 @@
 #include <nvif/class.h>
 
 static const struct nvkm_engine_func
-tu104_ce = {
+tu102_ce = {
 	.intr = gp100_ce_intr,
 	.sclass = {
 		{ -1, -1, TURING_DMA_COPY_A },
@@ -33,8 +33,8 @@ tu104_ce = {
 };
 
 int
-tu104_ce_new(struct nvkm_device *device, int index,
+tu102_ce_new(struct nvkm_device *device, int index,
 	     struct nvkm_engine **pengine)
 {
-	return nvkm_engine_new_(&tu104_ce, device, index, true, pengine);
+	return nvkm_engine_new_(&tu102_ce, device, index, true, pengine);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index bfbc9341e0c2..7971096b6767 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1613,7 +1613,7 @@ nvd7_chipset = {
 	.pci = gf106_pci_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = gf100_volt_new,
+	.volt = gf117_volt_new,
 	.ce[0] = gf100_ce_new,
 	.disp = gf119_disp_new,
 	.dma = gf119_dma_new,
@@ -2405,6 +2405,7 @@ nv140_chipset = {
 	.fb = gv100_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
+	.gsp = gv100_gsp_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
@@ -2435,67 +2436,108 @@ nv140_chipset = {
 };
 
 static const struct nvkm_device_chip
+nv162_chipset = {
+	.name = "TU102",
+	.bar = tu102_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = tu102_devinit_new,
+	.fault = tu102_fault_new,
+	.fb = gv100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.gsp = gv100_gsp_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp102_ltc_new,
+	.mc = tu102_mc_new,
+	.mmu = tu102_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.therm = gp100_therm_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = tu102_ce_new,
+	.ce[1] = tu102_ce_new,
+	.ce[2] = tu102_ce_new,
+	.ce[3] = tu102_ce_new,
+	.ce[4] = tu102_ce_new,
+	.disp = tu102_disp_new,
+	.dma = gv100_dma_new,
+	.fifo = tu102_fifo_new,
+	.nvdec[0] = gp102_nvdec_new,
+	.sec2 = tu102_sec2_new,
+};
+
+static const struct nvkm_device_chip
 nv164_chipset = {
 	.name = "TU104",
-	.bar = tu104_bar_new,
+	.bar = tu102_bar_new,
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
-	.devinit = tu104_devinit_new,
-	.fault = tu104_fault_new,
+	.devinit = tu102_devinit_new,
+	.fault = tu102_fault_new,
 	.fb = gv100_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
+	.gsp = gv100_gsp_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
 	.ltc = gp102_ltc_new,
-	.mc = tu104_mc_new,
-	.mmu = tu104_mmu_new,
+	.mc = tu102_mc_new,
+	.mmu = tu102_mmu_new,
 	.pci = gp100_pci_new,
 	.pmu = gp102_pmu_new,
 	.therm = gp100_therm_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
-	.ce[0] = tu104_ce_new,
-	.ce[1] = tu104_ce_new,
-	.ce[2] = tu104_ce_new,
-	.ce[3] = tu104_ce_new,
-	.ce[4] = tu104_ce_new,
-	.disp = tu104_disp_new,
+	.ce[0] = tu102_ce_new,
+	.ce[1] = tu102_ce_new,
+	.ce[2] = tu102_ce_new,
+	.ce[3] = tu102_ce_new,
+	.ce[4] = tu102_ce_new,
+	.disp = tu102_disp_new,
 	.dma = gv100_dma_new,
-	.fifo = tu104_fifo_new,
+	.fifo = tu102_fifo_new,
+	.nvdec[0] = gp102_nvdec_new,
+	.sec2 = tu102_sec2_new,
 };
 
 static const struct nvkm_device_chip
 nv166_chipset = {
 	.name = "TU106",
-	.bar = tu104_bar_new,
+	.bar = tu102_bar_new,
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
-	.devinit = tu104_devinit_new,
-	.fault = tu104_fault_new,
+	.devinit = tu102_devinit_new,
+	.fault = tu102_fault_new,
 	.fb = gv100_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
+	.gsp = gv100_gsp_new,
 	.i2c = gm200_i2c_new,
 	.ibus = gm200_ibus_new,
 	.imem = nv50_instmem_new,
 	.ltc = gp102_ltc_new,
-	.mc = tu104_mc_new,
-	.mmu = tu104_mmu_new,
+	.mc = tu102_mc_new,
+	.mmu = tu102_mmu_new,
 	.pci = gp100_pci_new,
 	.pmu = gp102_pmu_new,
 	.therm = gp100_therm_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
-	.ce[0] = tu104_ce_new,
-	.ce[1] = tu104_ce_new,
-	.ce[2] = tu104_ce_new,
-	.ce[3] = tu104_ce_new,
-	.ce[4] = tu104_ce_new,
-	.disp = tu104_disp_new,
+	.ce[0] = tu102_ce_new,
+	.ce[1] = tu102_ce_new,
+	.ce[2] = tu102_ce_new,
+	.ce[3] = tu102_ce_new,
+	.ce[4] = tu102_ce_new,
+	.disp = tu102_disp_new,
 	.dma = gv100_dma_new,
-	.fifo = tu104_fifo_new,
+	.fifo = tu102_fifo_new,
+	.nvdec[0] = gp102_nvdec_new,
+	.sec2 = tu102_sec2_new,
 };
 
 static int
@@ -2535,6 +2577,7 @@ nvkm_device_subdev(struct nvkm_device *device, int index)
 	_(FB      , device->fb      , &device->fb->subdev);
 	_(FUSE    , device->fuse    , &device->fuse->subdev);
 	_(GPIO    , device->gpio    , &device->gpio->subdev);
+	_(GSP     , device->gsp     , &device->gsp->subdev);
 	_(I2C     , device->i2c     , &device->i2c->subdev);
 	_(IBUS    , device->ibus    ,  device->ibus);
 	_(ICCSENSE, device->iccsense, &device->iccsense->subdev);
@@ -2950,6 +2993,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x138: device->chip = &nv138_chipset; break;
 		case 0x13b: device->chip = &nv13b_chipset; break;
 		case 0x140: device->chip = &nv140_chipset; break;
+		case 0x162: device->chip = &nv162_chipset; break;
 		case 0x164: device->chip = &nv164_chipset; break;
 		case 0x166: device->chip = &nv166_chipset; break;
 		default:
@@ -3017,6 +3061,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_SUBDEV_FB      ,       fb);
 		_(NVKM_SUBDEV_FUSE    ,     fuse);
 		_(NVKM_SUBDEV_GPIO    ,     gpio);
+		_(NVKM_SUBDEV_GSP     ,      gsp);
 		_(NVKM_SUBDEV_I2C     ,      i2c);
 		_(NVKM_SUBDEV_IBUS    ,     ibus);
 		_(NVKM_SUBDEV_ICCSENSE, iccsense);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
index 253ab914a8ef..2a53e37dfa7a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
@@ -12,6 +12,7 @@
 #include <subdev/fb.h>
 #include <subdev/fuse.h>
 #include <subdev/gpio.h>
+#include <subdev/gsp.h>
 #include <subdev/i2c.h>
 #include <subdev/ibus.h>
 #include <subdev/iccsense.h>
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 092ddc4ffefa..03c6d9aef075 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -365,16 +365,15 @@ nvkm_udevice_child_get(struct nvkm_object *object, int index,
 	}
 
 	if (!sclass) {
-		switch (index) {
-		case 0: sclass = &nvkm_control_oclass; break;
-		case 1:
-			if (!device->mmu)
-				return -EINVAL;
+		if (index-- == 0)
+			sclass = &nvkm_control_oclass;
+		else if (device->mmu && index-- == 0)
 			sclass = &device->mmu->user;
-			break;
-		default:
+		else if (device->fault && index-- == 0)
+			sclass = &device->fault->user;
+		else
 			return -EINVAL;
-		}
+
 		oclass->base = sclass->base;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index c6a257ba4347..2c28a5e747cc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -15,7 +15,7 @@ nvkm-y += nvkm/engine/disp/gm200.o
 nvkm-y += nvkm/engine/disp/gp100.o
 nvkm-y += nvkm/engine/disp/gp102.o
 nvkm-y += nvkm/engine/disp/gv100.o
-nvkm-y += nvkm/engine/disp/tu104.o
+nvkm-y += nvkm/engine/disp/tu102.o
 nvkm-y += nvkm/engine/disp/vga.o
 
 nvkm-y += nvkm/engine/disp/head.o
@@ -39,7 +39,7 @@ nvkm-y += nvkm/engine/disp/sorgk104.o
 nvkm-y += nvkm/engine/disp/sorgm107.o
 nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/sorgv100.o
-nvkm-y += nvkm/engine/disp/sortu104.o
+nvkm-y += nvkm/engine/disp/sortu102.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/dp.o
@@ -71,7 +71,7 @@ nvkm-y += nvkm/engine/disp/rootgm200.o
 nvkm-y += nvkm/engine/disp/rootgp100.o
 nvkm-y += nvkm/engine/disp/rootgp102.o
 nvkm-y += nvkm/engine/disp/rootgv100.o
-nvkm-y += nvkm/engine/disp/roottu104.o
+nvkm-y += nvkm/engine/disp/roottu102.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index 794e90982641..e675d9b9d5d7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -91,15 +91,21 @@ gf119_disp_intr_error(struct nv50_disp *disp, int chid)
 {
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 mthd = nvkm_rd32(device, 0x6101f0 + (chid * 12));
+	u32 stat = nvkm_rd32(device, 0x6101f0 + (chid * 12));
+	u32 type = (stat & 0x00007000) >> 12;
+	u32 mthd = (stat & 0x00000ffc);
 	u32 data = nvkm_rd32(device, 0x6101f4 + (chid * 12));
-	u32 unkn = nvkm_rd32(device, 0x6101f8 + (chid * 12));
+	u32 code = nvkm_rd32(device, 0x6101f8 + (chid * 12));
+	const struct nvkm_enum *reason =
+		nvkm_enum_find(nv50_disp_intr_error_type, type);
 
-	nvkm_error(subdev, "chid %d mthd %04x data %08x %08x %08x\n",
-		   chid, (mthd & 0x0000ffc), data, mthd, unkn);
+	nvkm_error(subdev, "chid %d stat %08x reason %d [%s] mthd %04x "
+			   "data %08x code %08x\n",
+		   chid, stat, type, reason ? reason->name : "",
+		   mthd, data, code);
 
 	if (chid < ARRAY_SIZE(disp->chan)) {
-		switch (mthd & 0xffc) {
+		switch (mthd) {
 		case 0x0080:
 			nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR);
 			break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
index 47be0ba4aebe..892be6c9b76c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
@@ -103,10 +103,13 @@ gv100_disp_exception(struct nv50_disp *disp, int chid)
 	u32 mthd = (stat & 0x00000fff) << 2;
 	u32 data = nvkm_rd32(device, 0x611024 + (chid * 12));
 	u32 code = nvkm_rd32(device, 0x611028 + (chid * 12));
+	const struct nvkm_enum *reason =
+		nvkm_enum_find(nv50_disp_intr_error_type, type);
 
-	nvkm_error(subdev, "chid %d %08x [type %d mthd %04x] "
+	nvkm_error(subdev, "chid %d stat %08x reason %d [%s] mthd %04x "
 			   "data %08x code %08x\n",
-		   chid, stat, type, mthd, data, code);
+		   chid, stat, type, reason ? reason->name : "",
+		   mthd, data, code);
 
 	if (chid < ARRAY_SIZE(disp->chan) && disp->chan[chid]) {
 		switch (mthd) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 790e42f460fd..1681ddccd298 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -201,5 +201,5 @@ int gm200_sor_new(struct nvkm_disp *, int);
 int gv100_sor_cnt(struct nvkm_disp *, unsigned long *);
 int gv100_sor_new(struct nvkm_disp *, int);
 
-int tu104_sor_new(struct nvkm_disp *, int);
+int tu102_sor_new(struct nvkm_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index def005dd5fda..e21556bf2cb1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -28,7 +28,6 @@
 #include "rootnv50.h"
 
 #include <core/client.h>
-#include <core/enum.h>
 #include <core/ramht.h>
 #include <subdev/bios.h>
 #include <subdev/bios/disp.h>
@@ -593,12 +592,15 @@ nv50_disp_super(struct work_struct *work)
 	nvkm_wr32(device, 0x610030, 0x80000000);
 }
 
-static const struct nvkm_enum
+const struct nvkm_enum
 nv50_disp_intr_error_type[] = {
-	{ 3, "ILLEGAL_MTHD" },
-	{ 4, "INVALID_VALUE" },
+	{ 0, "NONE" },
+	{ 1, "PUSHBUFFER_ERR" },
+	{ 2, "TRAP" },
+	{ 3, "RESERVED_METHOD" },
+	{ 4, "INVALID_ARG" },
 	{ 5, "INVALID_STATE" },
-	{ 7, "INVALID_HANDLE" },
+	{ 7, "UNRESOLVABLE_HANDLE" },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index c36a8a7cafa1..e5d00f478bb1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -5,6 +5,8 @@
 #include "priv.h"
 struct nvkm_head;
 
+#include <core/enum.h>
+
 struct nv50_disp {
 	const struct nv50_disp_func *func;
 	struct nvkm_disp base;
@@ -71,6 +73,7 @@ int nv50_disp_init(struct nv50_disp *);
 void nv50_disp_fini(struct nv50_disp *);
 void nv50_disp_intr(struct nv50_disp *);
 void nv50_disp_super(struct work_struct *);
+extern const struct nvkm_enum nv50_disp_intr_error_type[];
 
 int gf119_disp_init(struct nv50_disp *);
 void gf119_disp_fini(struct nv50_disp *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index 97de928cbde1..aee9822a7a87 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -37,5 +37,5 @@ extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp102_disp_root_oclass;
 extern const struct nvkm_disp_oclass gv100_disp_root_oclass;
-extern const struct nvkm_disp_oclass tu104_disp_root_oclass;
+extern const struct nvkm_disp_oclass tu102_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c
index ad438c62f66c..579a5d02308a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu102.c
@@ -25,28 +25,28 @@
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
-tu104_disp_root = {
+tu102_disp_root = {
 	.user = {
-		{{0,0,TU104_DISP_CURSOR                }, gv100_disp_curs_new },
-		{{0,0,TU104_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
-		{{0,0,TU104_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
-		{{0,0,TU104_DISP_WINDOW_CHANNEL_DMA    }, gv100_disp_wndw_new },
+		{{0,0,TU102_DISP_CURSOR                }, gv100_disp_curs_new },
+		{{0,0,TU102_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
+		{{0,0,TU102_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
+		{{0,0,TU102_DISP_WINDOW_CHANNEL_DMA    }, gv100_disp_wndw_new },
 		{}
 	},
 };
 
 static int
-tu104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+tu102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
 		    void *data, u32 size, struct nvkm_object **pobject)
 {
-	return nv50_disp_root_new_(&tu104_disp_root, disp, oclass,
+	return nv50_disp_root_new_(&tu102_disp_root, disp, oclass,
 				   data, size, pobject);
 }
 
 const struct nvkm_disp_oclass
-tu104_disp_root_oclass = {
-	.base.oclass = TU104_DISP,
+tu102_disp_root_oclass = {
+	.base.oclass = TU102_DISP,
 	.base.minver = -1,
 	.base.maxver = -1,
-	.ctor = tu104_disp_root_new,
+	.ctor = tu102_disp_root_new,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
index df026a525ef1..d57b73ada89e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c
@@ -24,7 +24,7 @@
 #include <subdev/timer.h>
 
 static void
-tu104_sor_dp_vcpi(struct nvkm_ior *sor, int head,
+tu102_sor_dp_vcpi(struct nvkm_ior *sor, int head,
 		  u8 slot, u8 slot_nr, u16 pbn, u16 aligned)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
@@ -35,7 +35,7 @@ tu104_sor_dp_vcpi(struct nvkm_ior *sor, int head,
 }
 
 static int
-tu104_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux)
+tu102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
 	const u32 soff = nv50_ior_base(sor);
@@ -62,7 +62,7 @@ tu104_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux)
 }
 
 static const struct nvkm_ior_func
-tu104_sor = {
+tu102_sor = {
 	.route = {
 		.get = gm200_sor_route_get,
 		.set = gm200_sor_route_set,
@@ -75,11 +75,11 @@ tu104_sor = {
 	},
 	.dp = {
 		.lanes = { 0, 1, 2, 3 },
-		.links = tu104_sor_dp_links,
+		.links = tu102_sor_dp_links,
 		.power = g94_sor_dp_power,
 		.pattern = gm107_sor_dp_pattern,
 		.drive = gm200_sor_dp_drive,
-		.vcpi = tu104_sor_dp_vcpi,
+		.vcpi = tu102_sor_dp_vcpi,
 		.audio = gv100_sor_dp_audio,
 		.audio_sym = gv100_sor_dp_audio_sym,
 		.watermark = gv100_sor_dp_watermark,
@@ -91,7 +91,7 @@ tu104_sor = {
 };
 
 int
-tu104_sor_new(struct nvkm_disp *disp, int id)
+tu102_sor_new(struct nvkm_disp *disp, int id)
 {
-	return nvkm_ior_new_(&tu104_sor, disp, SOR, id);
+	return nvkm_ior_new_(&tu102_sor, disp, SOR, id);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
index 13fa21459d38..883ae4151ff8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
@@ -29,7 +29,7 @@
 #include <subdev/timer.h>
 
 static int
-tu104_disp_init(struct nv50_disp *disp)
+tu102_disp_init(struct nv50_disp *disp)
 {
 	struct nvkm_device *device = disp->base.engine.subdev.device;
 	struct nvkm_head *head;
@@ -132,21 +132,21 @@ tu104_disp_init(struct nv50_disp *disp)
 }
 
 static const struct nv50_disp_func
-tu104_disp = {
-	.init = tu104_disp_init,
+tu102_disp = {
+	.init = tu102_disp_init,
 	.fini = gv100_disp_fini,
 	.intr = gv100_disp_intr,
 	.uevent = &gv100_disp_chan_uevent,
 	.super = gv100_disp_super,
-	.root = &tu104_disp_root_oclass,
+	.root = &tu102_disp_root_oclass,
 	.wndw = { .cnt = gv100_disp_wndw_cnt },
 	.head = { .cnt = gv100_head_cnt, .new = gv100_head_new },
-	.sor = { .cnt = gv100_sor_cnt, .new = tu104_sor_new },
+	.sor = { .cnt = gv100_sor_cnt, .new = tu102_sor_new },
 	.ramht_size = 0x2000,
 };
 
 int
-tu104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+tu102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return nv50_disp_new_(&tu104_disp, device, index, pdisp);
+	return nv50_disp_new_(&tu102_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
index 816ccaedfc73..8675613e142b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
@@ -22,6 +22,7 @@
 #include <engine/falcon.h>
 
 #include <core/gpuobj.h>
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 #include <engine/fifo.h>
 
@@ -107,8 +108,10 @@ nvkm_falcon_fini(struct nvkm_engine *engine, bool suspend)
 		}
 	}
 
-	nvkm_mask(device, base + 0x048, 0x00000003, 0x00000000);
-	nvkm_wr32(device, base + 0x014, 0xffffffff);
+	if (nvkm_mc_enabled(device, engine->subdev.index)) {
+		nvkm_mask(device, base + 0x048, 0x00000003, 0x00000000);
+		nvkm_wr32(device, base + 0x014, 0xffffffff);
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 87d8e054e40a..05aada541ea5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -16,7 +16,7 @@ nvkm-y += nvkm/engine/fifo/gm20b.o
 nvkm-y += nvkm/engine/fifo/gp100.o
 nvkm-y += nvkm/engine/fifo/gp10b.o
 nvkm-y += nvkm/engine/fifo/gv100.o
-nvkm-y += nvkm/engine/fifo/tu104.o
+nvkm-y += nvkm/engine/fifo/tu102.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
@@ -34,7 +34,7 @@ nvkm-y += nvkm/engine/fifo/gpfifog84.o
 nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogv100.o
-nvkm-y += nvkm/engine/fifo/gpfifotu104.o
+nvkm-y += nvkm/engine/fifo/gpfifotu102.o
 
 nvkm-y += nvkm/engine/fifo/usergv100.o
-nvkm-y += nvkm/engine/fifo/usertu104.o
+nvkm-y += nvkm/engine/fifo/usertu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index a14545d871d8..f8557cdfbd81 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -47,6 +47,6 @@ int gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *,
 int gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *,
 				  struct nvkm_engine *, bool);
 
-int tu104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
+int tu102_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
 			  void *data, u32 size, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
index ff70484dd01a..abef7fb6e2d3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
@@ -29,14 +29,14 @@
 #include <nvif/unpack.h>
 
 static u32
-tu104_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base)
+tu102_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
 	return (chan->runl << 16) | chan->base.chid;
 }
 
 static const struct nvkm_fifo_chan_func
-tu104_fifo_gpfifo = {
+tu102_fifo_gpfifo = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
 	.fini = gk104_fifo_gpfifo_fini,
@@ -45,11 +45,11 @@ tu104_fifo_gpfifo = {
 	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
 	.engine_init = gv100_fifo_gpfifo_engine_init,
 	.engine_fini = gv100_fifo_gpfifo_engine_fini,
-	.submit_token = tu104_fifo_gpfifo_submit_token,
+	.submit_token = tu102_fifo_gpfifo_submit_token,
 };
 
 int
-tu104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
+tu102_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
 		      void *data, u32 size, struct nvkm_object **pobject)
 {
 	struct nvkm_object *parent = oclass->parent;
@@ -67,7 +67,7 @@ tu104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
 			   args->v0.ilength, args->v0.runlist, args->v0.priv);
 		if (args->v0.priv && !oclass->client->super)
 			return -EINVAL;
-		return gv100_fifo_gpfifo_new_(&tu104_fifo_gpfifo, fifo,
+		return gv100_fifo_gpfifo_new_(&tu102_fifo_gpfifo, fifo,
 					      &args->v0.runlist,
 					      &args->v0.chid,
 					       args->v0.vmm,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
index 98c80705bc61..005f3e1729b9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
@@ -29,7 +29,7 @@
 #include <nvif/class.h>
 
 static void
-tu104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
+tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
 			  struct nvkm_memory *mem, int nr)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
@@ -44,15 +44,15 @@ tu104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
 }
 
 const struct gk104_fifo_runlist_func
-tu104_fifo_runlist = {
+tu102_fifo_runlist = {
 	.size = 16,
 	.cgrp = gv100_fifo_runlist_cgrp,
 	.chan = gv100_fifo_runlist_chan,
-	.commit = tu104_fifo_runlist_commit,
+	.commit = tu102_fifo_runlist_commit,
 };
 
 static const struct nvkm_enum
-tu104_fifo_fault_engine[] = {
+tu102_fifo_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "PTP" },
 	{ 0x06, "PWR_PMU" },
@@ -80,7 +80,7 @@ tu104_fifo_fault_engine[] = {
 };
 
 static void
-tu104_fifo_pbdma_init(struct gk104_fifo *fifo)
+tu102_fifo_pbdma_init(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	const u32 mask = (1 << fifo->pbdma_nr) - 1;
@@ -89,28 +89,28 @@ tu104_fifo_pbdma_init(struct gk104_fifo *fifo)
 }
 
 static const struct gk104_fifo_pbdma_func
-tu104_fifo_pbdma = {
+tu102_fifo_pbdma = {
 	.nr = gm200_fifo_pbdma_nr,
-	.init = tu104_fifo_pbdma_init,
+	.init = tu102_fifo_pbdma_init,
 	.init_timeout = gk208_fifo_pbdma_init_timeout,
 };
 
 static const struct gk104_fifo_func
-tu104_fifo = {
-	.pbdma = &tu104_fifo_pbdma,
+tu102_fifo = {
+	.pbdma = &tu102_fifo_pbdma,
 	.fault.access = gv100_fifo_fault_access,
-	.fault.engine = tu104_fifo_fault_engine,
+	.fault.engine = tu102_fifo_fault_engine,
 	.fault.reason = gv100_fifo_fault_reason,
 	.fault.hubclient = gv100_fifo_fault_hubclient,
 	.fault.gpcclient = gv100_fifo_fault_gpcclient,
-	.runlist = &tu104_fifo_runlist,
-	.user = {{-1,-1,VOLTA_USERMODE_A       }, tu104_fifo_user_new   },
-	.chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu104_fifo_gpfifo_new },
+	.runlist = &tu102_fifo_runlist,
+	.user = {{-1,-1,VOLTA_USERMODE_A       }, tu102_fifo_user_new   },
+	.chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu102_fifo_gpfifo_new },
 	.cgrp_force = true,
 };
 
 int
-tu104_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+tu102_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&tu104_fifo, device, index, 4096, pfifo);
+	return gk104_fifo_new_(&tu102_fifo, device, index, 4096, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
index 14b0c6bde8eb..54a3a3092cc0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
@@ -3,6 +3,6 @@
 #include "priv.h"
 int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32,
 			struct nvkm_object **);
-int tu104_fifo_user_new(const struct nvkm_oclass *, void *, u32,
+int tu102_fifo_user_new(const struct nvkm_oclass *, void *, u32,
 			struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c
index 8f98548a21f6..217268f8ccad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c
@@ -22,7 +22,7 @@
 #include "user.h"
 
 static int
-tu104_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
+tu102_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
 		    enum nvkm_object_map *type, u64 *addr, u64 *size)
 {
 	struct nvkm_device *device = object->engine->subdev.device;
@@ -33,13 +33,13 @@ tu104_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
 }
 
 static const struct nvkm_object_func
-tu104_fifo_user = {
-	.map = tu104_fifo_user_map,
+tu102_fifo_user = {
+	.map = tu102_fifo_user_map,
 };
 
 int
-tu104_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+tu102_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 		    struct nvkm_object **pobject)
 {
-	return nvkm_object_new_(&tu104_fifo_user, oclass, argv, argc, pobject);
+	return nvkm_object_new_(&tu102_fifo_user, oclass, argv, argc, pobject);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index cd8cf6f7024c..d41fb94524e9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -25,6 +25,33 @@
 
 #include <engine/fifo.h>
 
+u32
+nvkm_gr_ctxsw_inst(struct nvkm_device *device)
+{
+	struct nvkm_gr *gr = device->gr;
+	if (gr && gr->func->ctxsw.inst)
+		return gr->func->ctxsw.inst(gr);
+	return 0;
+}
+
+int
+nvkm_gr_ctxsw_resume(struct nvkm_device *device)
+{
+	struct nvkm_gr *gr = device->gr;
+	if (gr && gr->func->ctxsw.resume)
+		return gr->func->ctxsw.resume(gr);
+	return 0;
+}
+
+int
+nvkm_gr_ctxsw_pause(struct nvkm_device *device)
+{
+	struct nvkm_gr *gr = device->gr;
+	if (gr && gr->func->ctxsw.pause)
+		return gr->func->ctxsw.pause(gr);
+	return 0;
+}
+
 static bool
 nvkm_gr_chsw_load(struct nvkm_engine *engine)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index e813a3f8ea93..85f2d1e950e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1523,13 +1523,9 @@ gf100_grctx_generate(struct gf100_gr *gr)
 	/* Make channel current. */
 	addr = nvkm_memory_addr(inst) >> 12;
 	if (gr->firmware) {
-		nvkm_wr32(device, 0x409840, 0x00000030);
-		nvkm_wr32(device, 0x409500, 0x80000000 | addr);
-		nvkm_wr32(device, 0x409504, 0x00000003);
-		nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800) & 0x00000010)
-				break;
-		);
+		ret = gf100_gr_fecs_bind_pointer(gr, 0x80000000 | addr);
+		if (ret)
+			goto done;
 
 		nvkm_kmap(data);
 		nvkm_wo32(data, 0x1c, 1);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 70d3d41e616c..81a13cf9a292 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -715,6 +715,211 @@ gf100_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+static u32
+gf100_gr_ctxsw_inst(struct nvkm_gr *gr)
+{
+	return nvkm_rd32(gr->engine.subdev.device, 0x409b00);
+}
+
+static int
+gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409804, 0xffffffff);
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0xffffffff);
+	nvkm_wr32(device, 0x409504, mthd);
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x409804);
+		if (stat == 0x00000002)
+			return -EIO;
+		if (stat == 0x00000001)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+int
+gf100_gr_fecs_start_ctxsw(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	int ret = 0;
+
+	mutex_lock(&gr->fecs.mutex);
+	if (!--gr->fecs.disable) {
+		if (WARN_ON(ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x39)))
+			gr->fecs.disable++;
+	}
+	mutex_unlock(&gr->fecs.mutex);
+	return ret;
+}
+
+int
+gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	int ret = 0;
+
+	mutex_lock(&gr->fecs.mutex);
+	if (!gr->fecs.disable++) {
+		if (WARN_ON(ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x38)))
+			gr->fecs.disable--;
+	}
+	mutex_unlock(&gr->fecs.mutex);
+	return ret;
+}
+
+int
+gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409840, 0x00000030);
+	nvkm_wr32(device, 0x409500, inst);
+	nvkm_wr32(device, 0x409504, 0x00000003);
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x409800);
+		if (stat & 0x00000020)
+			return -EIO;
+		if (stat & 0x00000010)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static int
+gf100_gr_fecs_set_reglist_virtual_address(struct gf100_gr *gr, u64 addr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409810, addr >> 8);
+	nvkm_wr32(device, 0x409800, 0x00000000);
+	nvkm_wr32(device, 0x409500, 0x00000001);
+	nvkm_wr32(device, 0x409504, 0x00000032);
+	nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) == 0x00000001)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static int
+gf100_gr_fecs_set_reglist_bind_instance(struct gf100_gr *gr, u32 inst)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409810, inst);
+	nvkm_wr32(device, 0x409800, 0x00000000);
+	nvkm_wr32(device, 0x409500, 0x00000001);
+	nvkm_wr32(device, 0x409504, 0x00000031);
+	nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) == 0x00000001)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static int
+gf100_gr_fecs_discover_reglist_image_size(struct gf100_gr *gr, u32 *psize)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409800, 0x00000000);
+	nvkm_wr32(device, 0x409500, 0x00000001);
+	nvkm_wr32(device, 0x409504, 0x00000030);
+	nvkm_msec(device, 2000,
+		if ((*psize = nvkm_rd32(device, 0x409800)))
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static int
+gf100_gr_fecs_elpg_bind(struct gf100_gr *gr)
+{
+	u32 size;
+	int ret;
+
+	ret = gf100_gr_fecs_discover_reglist_image_size(gr, &size);
+	if (ret)
+		return ret;
+
+	/*XXX: We need to allocate + map the above into PMU's inst block,
+	 *     which which means we probably need a proper PMU before we
+	 *     even bother.
+	 */
+
+	ret = gf100_gr_fecs_set_reglist_bind_instance(gr, 0);
+	if (ret)
+		return ret;
+
+	return gf100_gr_fecs_set_reglist_virtual_address(gr, 0);
+}
+
+static int
+gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000025);
+	nvkm_msec(device, 2000,
+		if ((*psize = nvkm_rd32(device, 0x409800)))
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static int
+gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000016);
+	nvkm_msec(device, 2000,
+		if ((*psize = nvkm_rd32(device, 0x409800)))
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static int
+gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000010);
+	nvkm_msec(device, 2000,
+		if ((*psize = nvkm_rd32(device, 0x409800)))
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
+static void
+gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, timeout);
+	nvkm_wr32(device, 0x409504, 0x00000021);
+}
+
 static bool
 gf100_gr_chsw_load(struct nvkm_gr *base)
 {
@@ -1487,6 +1692,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
 	u32 secboot_mask = 0;
+	int ret;
 
 	/* load fuc microcode */
 	nvkm_mc_unk260(device, 0);
@@ -1495,12 +1701,12 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
 		secboot_mask |= BIT(NVKM_SECBOOT_FALCON_FECS);
 	else
-		gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d);
+		gf100_gr_init_fw(gr->fecs.falcon, &gr->fuc409c, &gr->fuc409d);
 
 	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
 		secboot_mask |= BIT(NVKM_SECBOOT_FALCON_GPCCS);
 	else
-		gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad);
+		gf100_gr_init_fw(gr->gpccs.falcon, &gr->fuc41ac, &gr->fuc41ad);
 
 	if (secboot_mask != 0) {
 		int ret = nvkm_secboot_reset(sb, secboot_mask);
@@ -1515,8 +1721,8 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	nvkm_wr32(device, 0x41a10c, 0x00000000);
 	nvkm_wr32(device, 0x40910c, 0x00000000);
 
-	nvkm_falcon_start(gr->gpccs);
-	nvkm_falcon_start(gr->fecs);
+	nvkm_falcon_start(gr->gpccs.falcon);
+	nvkm_falcon_start(gr->fecs.falcon);
 
 	if (nvkm_msec(device, 2000,
 		if (nvkm_rd32(device, 0x409800) & 0x00000001)
@@ -1524,72 +1730,36 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	) < 0)
 		return -EBUSY;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
-	nvkm_wr32(device, 0x409500, 0x7fffffff);
-	nvkm_wr32(device, 0x409504, 0x00000021);
+	gf100_gr_fecs_set_watchdog_timeout(gr, 0x7fffffff);
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
-	nvkm_wr32(device, 0x409500, 0x00000000);
-	nvkm_wr32(device, 0x409504, 0x00000010);
-	if (nvkm_msec(device, 2000,
-		if ((gr->size = nvkm_rd32(device, 0x409800)))
-			break;
-	) < 0)
-		return -EBUSY;
-
-	nvkm_wr32(device, 0x409840, 0xffffffff);
-	nvkm_wr32(device, 0x409500, 0x00000000);
-	nvkm_wr32(device, 0x409504, 0x00000016);
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x409800))
-			break;
-	) < 0)
-		return -EBUSY;
+	/* Determine how much memory is required to store main context image. */
+	ret = gf100_gr_fecs_discover_image_size(gr, &gr->size);
+	if (ret)
+		return ret;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
-	nvkm_wr32(device, 0x409500, 0x00000000);
-	nvkm_wr32(device, 0x409504, 0x00000025);
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x409800))
-			break;
-	) < 0)
-		return -EBUSY;
+	/* Determine how much memory is required to store ZCULL image. */
+	ret = gf100_gr_fecs_discover_zcull_image_size(gr, &gr->size_zcull);
+	if (ret)
+		return ret;
 
-	if (device->chipset >= 0xe0) {
-		nvkm_wr32(device, 0x409800, 0x00000000);
-		nvkm_wr32(device, 0x409500, 0x00000001);
-		nvkm_wr32(device, 0x409504, 0x00000030);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800))
-				break;
-		) < 0)
-			return -EBUSY;
-
-		nvkm_wr32(device, 0x409810, 0xb00095c8);
-		nvkm_wr32(device, 0x409800, 0x00000000);
-		nvkm_wr32(device, 0x409500, 0x00000001);
-		nvkm_wr32(device, 0x409504, 0x00000031);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800))
-				break;
-		) < 0)
-			return -EBUSY;
-
-		nvkm_wr32(device, 0x409810, 0x00080420);
-		nvkm_wr32(device, 0x409800, 0x00000000);
-		nvkm_wr32(device, 0x409500, 0x00000001);
-		nvkm_wr32(device, 0x409504, 0x00000032);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800))
-				break;
-		) < 0)
-			return -EBUSY;
+	/* Determine how much memory is required to store PerfMon image. */
+	ret = gf100_gr_fecs_discover_pm_image_size(gr, &gr->size_pm);
+	if (ret)
+		return ret;
 
-		nvkm_wr32(device, 0x409614, 0x00000070);
-		nvkm_wr32(device, 0x409614, 0x00000770);
-		nvkm_wr32(device, 0x40802c, 0x00000001);
+	/*XXX: We (likely) require PMU support to even bother with this.
+	 *
+	 *     Also, it seems like not all GPUs support ELPG.  Traces I
+	 *     have here show RM enabling it on Kepler/Turing, but none
+	 *     of the GPUs between those.  NVGPU decides this by PCIID.
+	 */
+	if (0) {
+		ret = gf100_gr_fecs_elpg_bind(gr);
+		if (ret)
+			return ret;
 	}
 
+	/* Generate golden context image. */
 	if (gr->data == NULL) {
 		int ret = gf100_grctx_generate(gr);
 		if (ret) {
@@ -1614,15 +1784,19 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
 
 	/* load HUB microcode */
 	nvkm_mc_unk260(device, 0);
-	nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0,
+	nvkm_falcon_load_dmem(gr->fecs.falcon,
+			      gr->func->fecs.ucode->data.data, 0x0,
 			      gr->func->fecs.ucode->data.size, 0);
-	nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0,
+	nvkm_falcon_load_imem(gr->fecs.falcon,
+			      gr->func->fecs.ucode->code.data, 0x0,
 			      gr->func->fecs.ucode->code.size, 0, 0, false);
 
 	/* load GPC microcode */
-	nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0,
+	nvkm_falcon_load_dmem(gr->gpccs.falcon,
+			      gr->func->gpccs.ucode->data.data, 0x0,
 			      gr->func->gpccs.ucode->data.size, 0);
-	nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0,
+	nvkm_falcon_load_imem(gr->gpccs.falcon,
+			      gr->func->gpccs.ucode->code.data, 0x0,
 			      gr->func->gpccs.ucode->code.size, 0, 0, false);
 	nvkm_mc_unk260(device, 1);
 
@@ -1769,11 +1943,13 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 	int i, j;
 	int ret;
 
-	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs);
+	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs.falcon);
 	if (ret)
 		return ret;
 
-	ret = nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs);
+	mutex_init(&gr->fecs.mutex);
+
+	ret = nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs.falcon);
 	if (ret)
 		return ret;
 
@@ -1816,11 +1992,11 @@ gf100_gr_init_(struct nvkm_gr *base)
 
 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
 
-	ret = nvkm_falcon_get(gr->fecs, subdev);
+	ret = nvkm_falcon_get(gr->fecs.falcon, subdev);
 	if (ret)
 		return ret;
 
-	ret = nvkm_falcon_get(gr->gpccs, subdev);
+	ret = nvkm_falcon_get(gr->gpccs.falcon, subdev);
 	if (ret)
 		return ret;
 
@@ -1832,8 +2008,8 @@ gf100_gr_fini_(struct nvkm_gr *base, bool suspend)
 {
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	nvkm_falcon_put(gr->gpccs, subdev);
-	nvkm_falcon_put(gr->fecs, subdev);
+	nvkm_falcon_put(gr->gpccs.falcon, subdev);
+	nvkm_falcon_put(gr->fecs.falcon, subdev);
 	return 0;
 }
 
@@ -1859,8 +2035,8 @@ gf100_gr_dtor(struct nvkm_gr *base)
 		gr->func->dtor(gr);
 	kfree(gr->data);
 
-	nvkm_falcon_del(&gr->gpccs);
-	nvkm_falcon_del(&gr->fecs);
+	nvkm_falcon_del(&gr->gpccs.falcon);
+	nvkm_falcon_del(&gr->fecs.falcon);
 
 	gf100_gr_dtor_fw(&gr->fuc409c);
 	gf100_gr_dtor_fw(&gr->fuc409d);
@@ -1886,6 +2062,9 @@ gf100_gr_ = {
 	.chan_new = gf100_gr_chan_new,
 	.object_get = gf100_gr_object_get,
 	.chsw_load = gf100_gr_chsw_load,
+	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
+	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
+	.ctxsw.inst = gf100_gr_ctxsw_inst,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index dc46cf0131db..fafdd0bbea9b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -82,8 +82,16 @@ struct gf100_gr {
 	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
 
-	struct nvkm_falcon *fecs;
-	struct nvkm_falcon *gpccs;
+	struct {
+		struct nvkm_falcon *falcon;
+		struct mutex mutex;
+		u32 disable;
+	} fecs;
+
+	struct {
+		struct nvkm_falcon *falcon;
+	} gpccs;
+
 	struct gf100_gr_fuc fuc409c;
 	struct gf100_gr_fuc fuc409d;
 	struct gf100_gr_fuc fuc41ac;
@@ -128,6 +136,8 @@ struct gf100_gr {
 	struct gf100_gr_mmio mmio_list[4096/8];
 	u32  size;
 	u32 *data;
+	u32 size_zcull;
+	u32 size_pm;
 };
 
 int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
@@ -136,6 +146,8 @@ int gf100_gr_new_(const struct gf100_gr_func *, struct nvkm_device *,
 		  int, struct nvkm_gr **);
 void *gf100_gr_dtor(struct nvkm_gr *);
 
+int gf100_gr_fecs_bind_pointer(struct gf100_gr *, u32 inst);
+
 struct gf100_gr_func_zbc {
 	void (*clear_color)(struct gf100_gr *, int zbc);
 	void (*clear_depth)(struct gf100_gr *, int zbc);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index 66359c23cbce..d4d5601c51e7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -27,6 +27,11 @@ struct nvkm_gr_func {
 	 */
 	u64 (*units)(struct nvkm_gr *);
 	bool (*chsw_load)(struct nvkm_gr *);
+	struct {
+		int (*pause)(struct nvkm_gr *);
+		int (*resume)(struct nvkm_gr *);
+		u32 (*inst)(struct nvkm_gr *);
+	} ctxsw;
 	struct nvkm_sclass sclass[];
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
index 4807021fd990..4a63581bdd5e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
@@ -21,13 +21,21 @@
  */
 #include "priv.h"
 
+#include <subdev/top.h>
 #include <engine/falcon.h>
 
 static int
 nvkm_nvdec_oneinit(struct nvkm_engine *engine)
 {
 	struct nvkm_nvdec *nvdec = nvkm_nvdec(engine);
-	return nvkm_falcon_v1_new(&nvdec->engine.subdev, "NVDEC", 0x84000,
+	struct nvkm_subdev *subdev = &nvdec->engine.subdev;
+
+	nvdec->addr = nvkm_top_addr(subdev->device, subdev->index);
+	if (!nvdec->addr)
+		return -EINVAL;
+
+	/*XXX: fix naming of this when adding support for multiple-NVDEC */
+	return nvkm_falcon_v1_new(subdev, "NVDEC", nvdec->addr,
 				  &nvdec->falcon);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
index 4b17254cfbd0..d9cdea7d9353 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
@@ -1,2 +1,3 @@
 nvkm-y += nvkm/engine/sec2/base.o
 nvkm-y += nvkm/engine/sec2/gp102.o
+nvkm-y += nvkm/engine/sec2/tu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
index f865d2a3e184..1b49e5b6717f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
@@ -22,6 +22,7 @@
 #include "priv.h"
 
 #include <core/msgqueue.h>
+#include <subdev/top.h>
 #include <engine/falcon.h>
 
 static void *
@@ -39,18 +40,18 @@ nvkm_sec2_intr(struct nvkm_engine *engine)
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
 	struct nvkm_subdev *subdev = &engine->subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 disp = nvkm_rd32(device, 0x8701c);
-	u32 intr = nvkm_rd32(device, 0x87008) & disp & ~(disp >> 16);
+	u32 disp = nvkm_rd32(device, sec2->addr + 0x01c);
+	u32 intr = nvkm_rd32(device, sec2->addr + 0x008) & disp & ~(disp >> 16);
 
 	if (intr & 0x00000040) {
 		schedule_work(&sec2->work);
-		nvkm_wr32(device, 0x87004, 0x00000040);
+		nvkm_wr32(device, sec2->addr + 0x004, 0x00000040);
 		intr &= ~0x00000040;
 	}
 
 	if (intr) {
 		nvkm_error(subdev, "unhandled intr %08x\n", intr);
-		nvkm_wr32(device, 0x87004, intr);
+		nvkm_wr32(device, sec2->addr + 0x004, intr);
 
 	}
 }
@@ -74,8 +75,15 @@ static int
 nvkm_sec2_oneinit(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
-	return nvkm_falcon_v1_new(&sec2->engine.subdev, "SEC2", 0x87000,
-				  &sec2->falcon);
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+
+	if (!sec2->addr) {
+		sec2->addr = nvkm_top_addr(subdev->device, subdev->index);
+		if (WARN_ON(!sec2->addr))
+			return -EINVAL;
+	}
+
+	return nvkm_falcon_v1_new(subdev, "SEC2", sec2->addr, &sec2->falcon);
 }
 
 static int
@@ -95,13 +103,14 @@ nvkm_sec2 = {
 };
 
 int
-nvkm_sec2_new_(struct nvkm_device *device, int index,
+nvkm_sec2_new_(struct nvkm_device *device, int index, u32 addr,
 	       struct nvkm_sec2 **psec2)
 {
 	struct nvkm_sec2 *sec2;
 
 	if (!(sec2 = *psec2 = kzalloc(sizeof(*sec2), GFP_KERNEL)))
 		return -ENOMEM;
+	sec2->addr = addr;
 	INIT_WORK(&sec2->work, nvkm_sec2_recv);
 
 	return nvkm_engine_ctor(&nvkm_sec2, device, index, true, &sec2->engine);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
index 9be1524c08f5..858cf27fa010 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
@@ -26,5 +26,5 @@ int
 gp102_sec2_new(struct nvkm_device *device, int index,
 	       struct nvkm_sec2 **psec2)
 {
-	return nvkm_sec2_new_(device, index, psec2);
+	return nvkm_sec2_new_(device, index, 0, psec2);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
index 2f97c806a79d..ab0165e2d1a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
@@ -5,6 +5,5 @@
 
 #define nvkm_sec2(p) container_of((p), struct nvkm_sec2, engine)
 
-int nvkm_sec2_new_(struct nvkm_device *, int, struct nvkm_sec2 **);
-
+int nvkm_sec2_new_(struct nvkm_device *, int, u32 addr, struct nvkm_sec2 **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
new file mode 100644
index 000000000000..d655576164b1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+int
+tu102_sec2_new(struct nvkm_device *device, int index,
+	       struct nvkm_sec2 **psec2)
+{
+	/* TOP info wasn't updated on Turing to reflect the PRI
+	 * address change for some reason.  We override it here.
+	 */
+	return nvkm_sec2_new_(device, index, 0x840000, psec2);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
index 427340153640..366c87de6e72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -204,6 +204,9 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
 		debug_reg = 0x408;
 		falcon->has_emem = true;
 		break;
+	case NVKM_SUBDEV_GSP:
+		debug_reg = 0x0; /*XXX*/
+		break;
 	default:
 		nvkm_warn(subdev, "unsupported falcon %s!\n",
 			  nvkm_subdev_name[subdev->index]);
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c
index 771e16a16267..a8bee1e046aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgqueue.c
@@ -269,7 +269,7 @@ cmd_write(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_hdr *cmd,
 		commit = false;
 	}
 
-	   cmd_queue_close(priv, queue, commit);
+	cmd_queue_close(priv, queue, commit);
 
 	return ret;
 }
@@ -347,7 +347,7 @@ nvkm_msgqueue_post(struct nvkm_msgqueue *priv, enum msgqueue_msg_priority prio,
 	ret = cmd_write(priv, cmd, queue);
 	if (ret) {
 		seq->state = SEQ_STATE_PENDING;
-		      msgqueue_seq_release(priv, seq);
+		msgqueue_seq_release(priv, seq);
 	}
 
 	return ret;
@@ -373,7 +373,7 @@ msgqueue_msg_handle(struct nvkm_msgqueue *priv, struct nvkm_msgqueue_hdr *hdr)
 	if (seq->completion)
 		complete(seq->completion);
 
-	   msgqueue_seq_release(priv, seq);
+	msgqueue_seq_release(priv, seq);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
index cfdffef1afb9..a339fe03d423 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
@@ -7,6 +7,7 @@ include $(src)/nvkm/subdev/fault/Kbuild
 include $(src)/nvkm/subdev/fb/Kbuild
 include $(src)/nvkm/subdev/fuse/Kbuild
 include $(src)/nvkm/subdev/gpio/Kbuild
+include $(src)/nvkm/subdev/gsp/Kbuild
 include $(src)/nvkm/subdev/i2c/Kbuild
 include $(src)/nvkm/subdev/ibus/Kbuild
 include $(src)/nvkm/subdev/iccsense/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild
index ab0282dc0736..dc300600c019 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild
@@ -5,4 +5,4 @@ nvkm-y += nvkm/subdev/bar/gf100.o
 nvkm-y += nvkm/subdev/bar/gk20a.o
 nvkm-y += nvkm/subdev/bar/gm107.o
 nvkm-y += nvkm/subdev/bar/gm20b.o
-nvkm-y += nvkm/subdev/bar/tu104.o
+nvkm-y += nvkm/subdev/bar/tu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c
index ecaead156e9b..798f65ec3a86 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu102.c
@@ -25,7 +25,7 @@
 #include <subdev/timer.h>
 
 static void
-tu104_bar_bar2_wait(struct nvkm_bar *bar)
+tu102_bar_bar2_wait(struct nvkm_bar *bar)
 {
 	struct nvkm_device *device = bar->subdev.device;
 	nvkm_msec(device, 2000,
@@ -35,13 +35,13 @@ tu104_bar_bar2_wait(struct nvkm_bar *bar)
 }
 
 static void
-tu104_bar_bar2_fini(struct nvkm_bar *bar)
+tu102_bar_bar2_fini(struct nvkm_bar *bar)
 {
 	nvkm_mask(bar->subdev.device, 0xb80f48, 0x80000000, 0x00000000);
 }
 
 static void
-tu104_bar_bar2_init(struct nvkm_bar *base)
+tu102_bar_bar2_init(struct nvkm_bar *base)
 {
 	struct nvkm_device *device = base->subdev.device;
 	struct gf100_bar *bar = gf100_bar(base);
@@ -52,7 +52,7 @@ tu104_bar_bar2_init(struct nvkm_bar *base)
 }
 
 static void
-tu104_bar_bar1_wait(struct nvkm_bar *bar)
+tu102_bar_bar1_wait(struct nvkm_bar *bar)
 {
 	struct nvkm_device *device = bar->subdev.device;
 	nvkm_msec(device, 2000,
@@ -62,13 +62,13 @@ tu104_bar_bar1_wait(struct nvkm_bar *bar)
 }
 
 static void
-tu104_bar_bar1_fini(struct nvkm_bar *bar)
+tu102_bar_bar1_fini(struct nvkm_bar *bar)
 {
 	nvkm_mask(bar->subdev.device, 0xb80f40, 0x80000000, 0x00000000);
 }
 
 static void
-tu104_bar_bar1_init(struct nvkm_bar *base)
+tu102_bar_bar1_init(struct nvkm_bar *base)
 {
 	struct nvkm_device *device = base->subdev.device;
 	struct gf100_bar *bar = gf100_bar(base);
@@ -77,22 +77,22 @@ tu104_bar_bar1_init(struct nvkm_bar *base)
 }
 
 static const struct nvkm_bar_func
-tu104_bar = {
+tu102_bar = {
 	.dtor = gf100_bar_dtor,
 	.oneinit = gf100_bar_oneinit,
-	.bar1.init = tu104_bar_bar1_init,
-	.bar1.fini = tu104_bar_bar1_fini,
-	.bar1.wait = tu104_bar_bar1_wait,
+	.bar1.init = tu102_bar_bar1_init,
+	.bar1.fini = tu102_bar_bar1_fini,
+	.bar1.wait = tu102_bar_bar1_wait,
 	.bar1.vmm = gf100_bar_bar1_vmm,
-	.bar2.init = tu104_bar_bar2_init,
-	.bar2.fini = tu104_bar_bar2_fini,
-	.bar2.wait = tu104_bar_bar2_wait,
+	.bar2.init = tu102_bar_bar2_init,
+	.bar2.fini = tu102_bar_bar2_fini,
+	.bar2.wait = tu102_bar_bar2_wait,
 	.bar2.vmm = gf100_bar_bar2_vmm,
 	.flush = g84_bar_flush,
 };
 
 int
-tu104_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar)
+tu102_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar)
 {
-	return gf100_bar_new_(&tu104_bar, device, index, pbar);
+	return gf100_bar_new_(&tu102_bar, device, index, pbar);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 3133b28f849c..b099d1209be8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -212,7 +212,7 @@ nvbios_dpcfg_match(struct nvkm_bios *bios, u16 outp, u8 pc, u8 vs, u8 pe,
 	u16 data;
 
 	if (*ver >= 0x30) {
-		const u8 vsoff[] = { 0, 4, 7, 9 };
+		static const u8 vsoff[] = { 0, 4, 7, 9 };
 		idx = (pc * 10) + vsoff[vs] + pe;
 		if (*ver >= 0x40 && *ver <= 0x41 && *hdr >= 0x12)
 			idx += nvbios_rd08(bios, outp + 0x11) * 40;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c
index 9cc10e438b3d..ec0e9f7224b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/init.c
@@ -806,12 +806,12 @@ init_generic_condition(struct nvbios_init *init)
 	init->offset += 3;
 
 	switch (cond) {
-	case 0:
+	case 0: /* CONDITION_ID_INT_DP. */
 		if (init_conn(init) != DCB_CONNECTOR_eDP)
 			init_exec_set(init, false);
 		break;
-	case 1:
-	case 2:
+	case 1: /* CONDITION_ID_USE_SPPLL0. */
+	case 2: /* CONDITION_ID_USE_SPPLL1. */
 		if ( init->outp &&
 		    (data = nvbios_dpout_match(bios, DCB_OUTPUT_DP,
 					       (init->outp->or << 0) |
@@ -826,10 +826,13 @@ init_generic_condition(struct nvbios_init *init)
 		if (init_exec(init))
 			warn("script needs dp output table data\n");
 		break;
-	case 5:
+	case 5: /* CONDITION_ID_ASSR_SUPPORT. */
 		if (!(init_rdauxr(init, 0x0d) & 1))
 			init_exec_set(init, false);
 		break;
+	case 7: /* CONDITION_ID_NO_PANEL_SEQ_DELAYS. */
+		init_exec_set(init, false);
+		break;
 	default:
 		warn("INIT_GENERIC_CONDITON: unknown 0x%02x\n", cond);
 		init->offset += size;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
index 3ef505a5c01b..f3c388932b6f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
@@ -13,4 +13,4 @@ nvkm-y += nvkm/subdev/devinit/gf100.o
 nvkm-y += nvkm/subdev/devinit/gm107.o
 nvkm-y += nvkm/subdev/devinit/gm200.o
 nvkm-y += nvkm/subdev/devinit/gv100.o
-nvkm-y += nvkm/subdev/devinit/tu104.o
+nvkm-y += nvkm/subdev/devinit/tu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
index aae87b3fc429..397670e72fff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu102.c
@@ -26,7 +26,7 @@
 #include <subdev/clk/pll.h>
 
 static int
-tu104_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
+tu102_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 {
 	struct nvkm_subdev *subdev = &init->subdev;
 	struct nvkm_device *device = subdev->device;
@@ -66,7 +66,7 @@ tu104_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 }
 
 static int
-tu104_devinit_post(struct nvkm_devinit *base, bool post)
+tu102_devinit_post(struct nvkm_devinit *base, bool post)
 {
 	struct nv50_devinit *init = nv50_devinit(base);
 	gm200_devinit_preos(init, post);
@@ -74,16 +74,16 @@ tu104_devinit_post(struct nvkm_devinit *base, bool post)
 }
 
 static const struct nvkm_devinit_func
-tu104_devinit = {
+tu102_devinit = {
 	.init = nv50_devinit_init,
-	.post = tu104_devinit_post,
-	.pll_set = tu104_devinit_pll_set,
+	.post = tu102_devinit_post,
+	.pll_set = tu102_devinit_pll_set,
 	.disable = gm107_devinit_disable,
 };
 
 int
-tu104_devinit_new(struct nvkm_device *device, int index,
+tu102_devinit_new(struct nvkm_device *device, int index,
 		struct nvkm_devinit **pinit)
 {
-	return nv50_devinit_new_(&tu104_devinit, device, index, pinit);
+	return nv50_devinit_new_(&tu102_devinit, device, index, pinit);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
index 794eb1745b2f..42586267fc08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
@@ -1,4 +1,5 @@
 nvkm-y += nvkm/subdev/fault/base.o
+nvkm-y += nvkm/subdev/fault/user.o
 nvkm-y += nvkm/subdev/fault/gp100.o
 nvkm-y += nvkm/subdev/fault/gv100.o
-nvkm-y += nvkm/subdev/fault/tu104.o
+nvkm-y += nvkm/subdev/fault/tu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
index 4ba1e21e8fda..ca251560d3e0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
@@ -176,5 +176,7 @@ nvkm_fault_new_(const struct nvkm_fault_func *func, struct nvkm_device *device,
 		return -ENOMEM;
 	nvkm_subdev_ctor(&nvkm_fault, device, index, &fault->subdev);
 	fault->func = func;
+	fault->user.ctor = nvkm_ufault_new;
+	fault->user.base = func->user.base;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
index 8fb96fe614f9..4f3c4e091117 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
@@ -23,6 +23,8 @@
 
 #include <subdev/mc.h>
 
+#include <nvif/class.h>
+
 static void
 gp100_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
 {
@@ -69,6 +71,7 @@ gp100_fault = {
 	.buffer.init = gp100_fault_buffer_init,
 	.buffer.fini = gp100_fault_buffer_fini,
 	.buffer.intr = gp100_fault_buffer_intr,
+	.user = { { 0, 0, MAXWELL_FAULT_BUFFER_A }, 0 },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
index 6fc54e17c935..6747f09c2dc3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
@@ -25,6 +25,8 @@
 #include <subdev/mmu.h>
 #include <engine/fifo.h>
 
+#include <nvif/class.h>
+
 static void
 gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer)
 {
@@ -166,6 +168,13 @@ gv100_fault_intr(struct nvkm_fault *fault)
 		}
 	}
 
+	if (stat & 0x08000000) {
+		if (fault->buffer[1]) {
+			nvkm_event_send(&fault->event, 1, 1, NULL, 0);
+			stat &= ~0x08000000;
+		}
+	}
+
 	if (stat) {
 		nvkm_debug(subdev, "intr %08x\n", stat);
 	}
@@ -208,6 +217,13 @@ gv100_fault = {
 	.buffer.init = gv100_fault_buffer_init,
 	.buffer.fini = gv100_fault_buffer_fini,
 	.buffer.intr = gv100_fault_buffer_intr,
+	/*TODO: Figure out how to expose non-replayable fault buffer, which,
+	 *      for some reason, is where recoverable CE faults appear...
+	 *
+	 * 	It's a bit tricky, as both NVKM and SVM will need access to
+	 * 	the non-replayable fault buffer.
+	 */
+	.user = { { 0, 0, VOLTA_FAULT_BUFFER_A }, 1 },
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
index 8ca8b2876dad..975e66ac6344 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
@@ -34,7 +34,14 @@ struct nvkm_fault_func {
 		void (*fini)(struct nvkm_fault_buffer *);
 		void (*intr)(struct nvkm_fault_buffer *, bool enable);
 	} buffer;
+	struct {
+		struct nvkm_sclass base;
+		int rp;
+	} user;
 };
 
 int gv100_fault_oneinit(struct nvkm_fault *);
+
+int nvkm_ufault_new(struct nvkm_device *, const struct nvkm_oclass *,
+		    void *, u32, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c
index 9c8a3adf99d7..fa1dfe5692b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c
@@ -28,7 +28,7 @@
 #include <nvif/class.h>
 
 static void
-tu104_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
+tu102_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
 {
 	/*XXX: Earlier versions of RM touched the old regs on Turing,
 	 *     which don't appear to actually work anymore, but newer
@@ -37,7 +37,7 @@ tu104_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
 }
 
 static void
-tu104_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
+tu102_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 foff = buffer->id * 0x20;
@@ -45,7 +45,7 @@ tu104_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
 }
 
 static void
-tu104_fault_buffer_init(struct nvkm_fault_buffer *buffer)
+tu102_fault_buffer_init(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 foff = buffer->id * 0x20;
@@ -57,7 +57,7 @@ tu104_fault_buffer_init(struct nvkm_fault_buffer *buffer)
 }
 
 static void
-tu104_fault_buffer_info(struct nvkm_fault_buffer *buffer)
+tu102_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 foff = buffer->id * 0x20;
@@ -70,7 +70,7 @@ tu104_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 }
 
 static void
-tu104_fault_intr_fault(struct nvkm_fault *fault)
+tu102_fault_intr_fault(struct nvkm_fault *fault)
 {
 	struct nvkm_subdev *subdev = &fault->subdev;
 	struct nvkm_device *device = subdev->device;
@@ -96,14 +96,14 @@ tu104_fault_intr_fault(struct nvkm_fault *fault)
 }
 
 static void
-tu104_fault_intr(struct nvkm_fault *fault)
+tu102_fault_intr(struct nvkm_fault *fault)
 {
 	struct nvkm_subdev *subdev = &fault->subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 stat = nvkm_rd32(device, 0xb83094);
 
 	if (stat & 0x80000000) {
-		tu104_fault_intr_fault(fault);
+		tu102_fault_intr_fault(fault);
 		nvkm_wr32(device, 0xb83094, 0x80000000);
 		stat &= ~0x80000000;
 	}
@@ -129,7 +129,7 @@ tu104_fault_intr(struct nvkm_fault *fault)
 }
 
 static void
-tu104_fault_fini(struct nvkm_fault *fault)
+tu102_fault_fini(struct nvkm_fault *fault)
 {
 	nvkm_notify_put(&fault->nrpfb);
 	if (fault->buffer[0])
@@ -138,7 +138,7 @@ tu104_fault_fini(struct nvkm_fault *fault)
 }
 
 static void
-tu104_fault_init(struct nvkm_fault *fault)
+tu102_fault_init(struct nvkm_fault *fault)
 {
 	/*XXX: enable priv faults */
 	fault->func->buffer.init(fault->buffer[0]);
@@ -146,22 +146,23 @@ tu104_fault_init(struct nvkm_fault *fault)
 }
 
 static const struct nvkm_fault_func
-tu104_fault = {
+tu102_fault = {
 	.oneinit = gv100_fault_oneinit,
-	.init = tu104_fault_init,
-	.fini = tu104_fault_fini,
-	.intr = tu104_fault_intr,
+	.init = tu102_fault_init,
+	.fini = tu102_fault_fini,
+	.intr = tu102_fault_intr,
 	.buffer.nr = 2,
 	.buffer.entry_size = 32,
-	.buffer.info = tu104_fault_buffer_info,
-	.buffer.init = tu104_fault_buffer_init,
-	.buffer.fini = tu104_fault_buffer_fini,
-	.buffer.intr = tu104_fault_buffer_intr,
+	.buffer.info = tu102_fault_buffer_info,
+	.buffer.init = tu102_fault_buffer_init,
+	.buffer.fini = tu102_fault_buffer_fini,
+	.buffer.intr = tu102_fault_buffer_intr,
+	.user = { { 0, 0, VOLTA_FAULT_BUFFER_A }, 1 },
 };
 
 int
-tu104_fault_new(struct nvkm_device *device, int index,
+tu102_fault_new(struct nvkm_device *device, int index,
 		struct nvkm_fault **pfault)
 {
-	return nvkm_fault_new_(&tu104_fault, device, index, pfault);
+	return nvkm_fault_new_(&tu102_fault, device, index, pfault);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c
new file mode 100644
index 000000000000..ac835c9582fd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/memory.h>
+#include <subdev/mmu.h>
+
+#include <nvif/clb069.h>
+#include <nvif/unpack.h>
+
+static int
+nvkm_ufault_map(struct nvkm_object *object, void *argv, u32 argc,
+		enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	*type = NVKM_OBJECT_MAP_IO;
+	*addr = device->func->resource_addr(device, 3) + buffer->addr;
+	*size = nvkm_memory_size(buffer->mem);
+	return 0;
+}
+
+static int
+nvkm_ufault_ntfy(struct nvkm_object *object, u32 type,
+		 struct nvkm_event **pevent)
+{
+	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
+	if (type == NVB069_V0_NTFY_FAULT) {
+		*pevent = &buffer->fault->event;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int
+nvkm_ufault_fini(struct nvkm_object *object, bool suspend)
+{
+	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
+	buffer->fault->func->buffer.fini(buffer);
+	return 0;
+}
+
+static int
+nvkm_ufault_init(struct nvkm_object *object)
+{
+	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
+	buffer->fault->func->buffer.init(buffer);
+	return 0;
+}
+
+static void *
+nvkm_ufault_dtor(struct nvkm_object *object)
+{
+	return NULL;
+}
+
+static const struct nvkm_object_func
+nvkm_ufault = {
+	.dtor = nvkm_ufault_dtor,
+	.init = nvkm_ufault_init,
+	.fini = nvkm_ufault_fini,
+	.ntfy = nvkm_ufault_ntfy,
+	.map = nvkm_ufault_map,
+};
+
+int
+nvkm_ufault_new(struct nvkm_device *device, const struct nvkm_oclass *oclass,
+		void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	union {
+		struct nvif_clb069_v0 v0;
+	} *args = argv;
+	struct nvkm_fault *fault = device->fault;
+	struct nvkm_fault_buffer *buffer = fault->buffer[fault->func->user.rp];
+	int ret = -ENOSYS;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+		args->v0.entries = buffer->entries;
+		args->v0.get = buffer->get;
+		args->v0.put = buffer->put;
+	} else
+		return ret;
+
+	nvkm_object_ctor(&nvkm_ufault, oclass, &buffer->object);
+	*pobject = &buffer->object;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c
index 60ece0a8a2e1..1d2d6bae73cd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c
@@ -87,7 +87,7 @@ nvkm_gddr3_calc(struct nvkm_ram *ram)
 		WR  = (ram->next->bios.timing[2] & 0x007f0000) >> 16;
 		/* XXX: Get these values from the VBIOS instead */
 		DLL = !(ram->mr[1] & 0x1);
-		RON = !(ram->mr[1] & 0x300) >> 8;
+		RON = !((ram->mr[1] & 0x300) >> 8);
 		break;
 	default:
 		return -ENOSYS;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild
new file mode 100644
index 000000000000..26fc6feb807e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild
@@ -0,0 +1 @@
+nvkm-y += nvkm/subdev/gsp/gv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c
new file mode 100644
index 000000000000..dccfaf1162e2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <subdev/gsp.h>
+#include <subdev/top.h>
+#include <engine/falcon.h>
+
+static int
+gv100_gsp_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_gsp *gsp = nvkm_gsp(subdev);
+
+	gsp->addr = nvkm_top_addr(subdev->device, subdev->index);
+	if (!gsp->addr)
+		return -EINVAL;
+
+	return nvkm_falcon_v1_new(subdev, "GSP", gsp->addr, &gsp->falcon);
+}
+
+static void *
+gv100_gsp_dtor(struct nvkm_subdev *subdev)
+{
+	struct nvkm_gsp *gsp = nvkm_gsp(subdev);
+	nvkm_falcon_del(&gsp->falcon);
+	return gsp;
+}
+
+static const struct nvkm_subdev_func
+gv100_gsp = {
+	.dtor = gv100_gsp_dtor,
+	.oneinit = gv100_gsp_oneinit,
+};
+
+int
+gv100_gsp_new(struct nvkm_device *device, int index, struct nvkm_gsp **pgsp)
+{
+	struct nvkm_gsp *gsp;
+
+	if (!(gsp = *pgsp = kzalloc(sizeof(*gsp), GFP_KERNEL)))
+		return -ENOMEM;
+
+	nvkm_subdev_ctor(&gv100_gsp, device, index, &gsp->subdev);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index f3b06329c338..c64e399326b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -12,4 +12,4 @@ nvkm-y += nvkm/subdev/mc/gk104.o
 nvkm-y += nvkm/subdev/mc/gk20a.o
 nvkm-y += nvkm/subdev/mc/gp100.o
 nvkm-y += nvkm/subdev/mc/gp10b.o
-nvkm-y += nvkm/subdev/mc/tu104.o
+nvkm-y += nvkm/subdev/mc/tu102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c
index b7165bd18999..d098c44a4fcb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c
@@ -22,7 +22,7 @@
 #include "priv.h"
 
 static void
-tu104_mc_intr_hack(struct nvkm_mc *mc, bool *handled)
+tu102_mc_intr_hack(struct nvkm_mc *mc, bool *handled)
 {
 	struct nvkm_device *device = mc->subdev.device;
 	u32 stat = nvkm_rd32(device, 0xb81010);
@@ -37,19 +37,19 @@ tu104_mc_intr_hack(struct nvkm_mc *mc, bool *handled)
 }
 
 static const struct nvkm_mc_func
-tu104_mc = {
+tu102_mc = {
 	.init = nv50_mc_init,
 	.intr = gp100_mc_intr,
 	.intr_unarm = gp100_mc_intr_unarm,
 	.intr_rearm = gp100_mc_intr_rearm,
 	.intr_mask = gp100_mc_intr_mask,
 	.intr_stat = gf100_mc_intr_stat,
-	.intr_hack = tu104_mc_intr_hack,
+	.intr_hack = tu102_mc_intr_hack,
 	.reset = gk104_mc_reset,
 };
 
 int
-tu104_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
+tu102_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
 {
-	return gp100_mc_new_(&tu104_mc, device, index, pmc);
+	return gp100_mc_new_(&tu102_mc, device, index, pmc);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
index 8966180b36cc..db9c56028f21 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
@@ -13,7 +13,7 @@ nvkm-y += nvkm/subdev/mmu/gm20b.o
 nvkm-y += nvkm/subdev/mmu/gp100.o
 nvkm-y += nvkm/subdev/mmu/gp10b.o
 nvkm-y += nvkm/subdev/mmu/gv100.o
-nvkm-y += nvkm/subdev/mmu/tu104.o
+nvkm-y += nvkm/subdev/mmu/tu102.o
 
 nvkm-y += nvkm/subdev/mmu/mem.o
 nvkm-y += nvkm/subdev/mmu/memnv04.o
@@ -34,7 +34,7 @@ nvkm-y += nvkm/subdev/mmu/vmmgm20b.o
 nvkm-y += nvkm/subdev/mmu/vmmgp100.o
 nvkm-y += nvkm/subdev/mmu/vmmgp10b.o
 nvkm-y += nvkm/subdev/mmu/vmmgv100.o
-nvkm-y += nvkm/subdev/mmu/vmmtu104.o
+nvkm-y += nvkm/subdev/mmu/vmmtu102.o
 
 nvkm-y += nvkm/subdev/mmu/umem.o
 nvkm-y += nvkm/subdev/mmu/ummu.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c
index 651b8805c67c..65cb9d28e60e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp100.c
@@ -31,7 +31,7 @@ gp100_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
-	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp100_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, gp100_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
index 3bd3db31e0bb..0a50be9a785a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
@@ -31,7 +31,7 @@ gp10b_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1, -1, NVIF_CLASS_MEM_GF100}, .umap = gf100_mem_map },
-	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, gp10b_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
index f666cb57f69e..e0997eedd6d9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gv100.c
@@ -31,7 +31,7 @@ gv100_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
-	.vmm = {{ -1, -1, NVIF_CLASS_VMM_GP100}, gv100_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, gv100_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h
index 948a48c21be4..2ad1102a4e31 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/priv.h
@@ -28,7 +28,7 @@ struct nvkm_mmu_func {
 
 	struct {
 		struct nvkm_sclass user;
-		int (*ctor)(struct nvkm_mmu *, u64 addr, u64 size,
+		int (*ctor)(struct nvkm_mmu *, bool managed, u64 addr, u64 size,
 			    void *argv, u32 argc, struct lock_class_key *,
 			    const char *name, struct nvkm_vmm **);
 		bool global;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c
index 8e6f4096170d..c0db0ce10cba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu102.c
@@ -27,17 +27,17 @@
 #include <nvif/class.h>
 
 static const struct nvkm_mmu_func
-tu104_mmu = {
+tu102_mmu = {
 	.dma_bits = 47,
 	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
 	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
-	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, tu104_vmm_new },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, tu102_vmm_new },
 	.kind = gm200_mmu_kind,
 	.kind_sys = true,
 };
 
 int
-tu104_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+tu102_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
 {
-	return nvkm_mmu_new_(&tu104_mmu, device, index, pmmu);
+	return nvkm_mmu_new_(&tu102_mmu, device, index, pmmu);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
index 6889076097ec..c43b8248c682 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
@@ -43,6 +43,69 @@ nvkm_uvmm_search(struct nvkm_client *client, u64 handle)
 }
 
 static int
+nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
+{
+	struct nvkm_client *client = uvmm->object.client;
+	union {
+		struct nvif_vmm_pfnclr_v0 v0;
+	} *args = argv;
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	int ret = -ENOSYS;
+	u64 addr, size;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+		addr = args->v0.addr;
+		size = args->v0.size;
+	} else
+		return ret;
+
+	if (!client->super)
+		return -ENOENT;
+
+	if (size) {
+		mutex_lock(&vmm->mutex);
+		ret = nvkm_vmm_pfn_unmap(vmm, addr, size);
+		mutex_unlock(&vmm->mutex);
+	}
+
+	return ret;
+}
+
+static int
+nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
+{
+	struct nvkm_client *client = uvmm->object.client;
+	union {
+		struct nvif_vmm_pfnmap_v0 v0;
+	} *args = argv;
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	int ret = -ENOSYS;
+	u64 addr, size, *phys;
+	u8  page;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true))) {
+		page = args->v0.page;
+		addr = args->v0.addr;
+		size = args->v0.size;
+		phys = args->v0.phys;
+		if (argc != (size >> page) * sizeof(args->v0.phys[0]))
+			return -EINVAL;
+	} else
+		return ret;
+
+	if (!client->super)
+		return -ENOENT;
+
+	if (size) {
+		mutex_lock(&vmm->mutex);
+		ret = nvkm_vmm_pfn_map(vmm, page, addr, size, phys);
+		mutex_unlock(&vmm->mutex);
+	}
+
+	return ret;
+}
+
+static int
 nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 {
 	struct nvkm_client *client = uvmm->object.client;
@@ -78,7 +141,7 @@ nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 		goto done;
 	}
 
-	nvkm_vmm_unmap_locked(vmm, vma);
+	nvkm_vmm_unmap_locked(vmm, vma, false);
 	ret = 0;
 done:
 	mutex_unlock(&vmm->mutex);
@@ -124,6 +187,11 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 		goto fail;
 	}
 
+	if (ret = -EINVAL, vma->mapped && !vma->memory) {
+		VMM_DEBUG(vmm, "pfnmap %016llx", addr);
+		goto fail;
+	}
+
 	if (ret = -EINVAL, vma->addr != addr || vma->size != size) {
 		if (addr + size > vma->addr + vma->size || vma->memory ||
 		    (vma->refd == NVKM_VMA_PAGE_NONE && !vma->mapref)) {
@@ -271,6 +339,15 @@ nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
 	case NVIF_VMM_V0_PUT   : return nvkm_uvmm_mthd_put   (uvmm, argv, argc);
 	case NVIF_VMM_V0_MAP   : return nvkm_uvmm_mthd_map   (uvmm, argv, argc);
 	case NVIF_VMM_V0_UNMAP : return nvkm_uvmm_mthd_unmap (uvmm, argv, argc);
+	case NVIF_VMM_V0_PFNMAP: return nvkm_uvmm_mthd_pfnmap(uvmm, argv, argc);
+	case NVIF_VMM_V0_PFNCLR: return nvkm_uvmm_mthd_pfnclr(uvmm, argv, argc);
+	case NVIF_VMM_V0_MTHD(0x00) ... NVIF_VMM_V0_MTHD(0x7f):
+		if (uvmm->vmm->func->mthd) {
+			return uvmm->vmm->func->mthd(uvmm->vmm,
+						     uvmm->object.client,
+						     mthd, argv, argc);
+		}
+		break;
 	default:
 		break;
 	}
@@ -304,8 +381,10 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 	struct nvkm_uvmm *uvmm;
 	int ret = -ENOSYS;
 	u64 addr, size;
+	bool managed;
 
 	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, more))) {
+		managed = args->v0.managed != 0;
 		addr = args->v0.addr;
 		size = args->v0.size;
 	} else
@@ -317,7 +396,7 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 	*pobject = &uvmm->object;
 
 	if (!mmu->vmm) {
-		ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc,
+		ret = mmu->func->vmm.ctor(mmu, managed, addr, size, argv, argc,
 					  NULL, "user", &uvmm->vmm);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 6b87fff014b3..fa93f964e6a4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -255,11 +255,23 @@ nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 }
 
 static bool
-nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 {
 	const struct nvkm_vmm_desc *desc = it->desc;
 	const int type = desc->type == SPT;
 	struct nvkm_vmm_pt *pgt = it->pt[0];
+	bool dma;
+
+	if (pfn) {
+		/* Need to clear PTE valid bits before we dma_unmap_page(). */
+		dma = desc->func->pfn_clear(it->vmm, pgt->pt[type], ptei, ptes);
+		if (dma) {
+			/* GPU may have cached the PT, flush before unmap. */
+			nvkm_vmm_flush_mark(it);
+			nvkm_vmm_flush(it);
+			desc->func->pfn_unmap(it->vmm, pgt->pt[type], ptei, ptes);
+		}
+	}
 
 	/* Drop PTE references. */
 	pgt->refs[type] -= ptes;
@@ -349,7 +361,7 @@ nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 }
 
 static bool
-nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 {
 	const struct nvkm_vmm_desc *desc = it->desc;
 	const int type = desc->type == SPT;
@@ -379,7 +391,7 @@ nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
 }
 
 static bool
-nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 {
 	struct nvkm_vmm_pt *pt = it->pt[0];
 	if (it->desc->type == PGD)
@@ -387,14 +399,14 @@ nvkm_vmm_sparse_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 	else
 	if (it->desc->type == LPT)
 		memset(&pt->pte[ptei], 0x00, sizeof(pt->pte[0]) * ptes);
-	return nvkm_vmm_unref_ptes(it, ptei, ptes);
+	return nvkm_vmm_unref_ptes(it, pfn, ptei, ptes);
 }
 
 static bool
-nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+nvkm_vmm_sparse_ref_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 {
 	nvkm_vmm_sparse_ptes(it->desc, it->pt[0], ptei, ptes);
-	return nvkm_vmm_ref_ptes(it, ptei, ptes);
+	return nvkm_vmm_ref_ptes(it, pfn, ptei, ptes);
 }
 
 static bool
@@ -487,8 +499,8 @@ nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 
 static inline u64
 nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
-	      u64 addr, u64 size, const char *name, bool ref,
-	      bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32),
+	      u64 addr, u64 size, const char *name, bool ref, bool pfn,
+	      bool (*REF_PTES)(struct nvkm_vmm_iter *, bool pfn, u32, u32),
 	      nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map,
 	      nvkm_vmm_pxe_func CLR_PTES)
 {
@@ -548,7 +560,7 @@ nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		}
 
 		/* Handle PTE updates. */
-		if (!REF_PTES || REF_PTES(&it, ptei, ptes)) {
+		if (!REF_PTES || REF_PTES(&it, pfn, ptei, ptes)) {
 			struct nvkm_mmu_pt *pt = pgt->pt[type];
 			if (MAP_PTES || CLR_PTES) {
 				if (MAP_PTES)
@@ -590,7 +602,7 @@ static void
 nvkm_vmm_ptes_sparse_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 			 u64 addr, u64 size)
 {
-	nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false,
+	nvkm_vmm_iter(vmm, page, addr, size, "sparse unref", false, false,
 		      nvkm_vmm_sparse_unref_ptes, NULL, NULL,
 		      page->desc->func->invalid ?
 		      page->desc->func->invalid : page->desc->func->unmap);
@@ -602,8 +614,8 @@ nvkm_vmm_ptes_sparse_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 {
 	if ((page->type & NVKM_VMM_PAGE_SPARSE)) {
 		u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "sparse ref",
-					 true, nvkm_vmm_sparse_ref_ptes, NULL,
-					 NULL, page->desc->func->sparse);
+					 true, false, nvkm_vmm_sparse_ref_ptes,
+					 NULL, NULL, page->desc->func->sparse);
 		if (fail != ~0ULL) {
 			if ((size = fail - addr))
 				nvkm_vmm_ptes_sparse_put(vmm, page, addr, size);
@@ -666,11 +678,11 @@ nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref)
 
 static void
 nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
-			u64 addr, u64 size, bool sparse)
+			u64 addr, u64 size, bool sparse, bool pfn)
 {
 	const struct nvkm_vmm_desc_func *func = page->desc->func;
 	nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref",
-		      false, nvkm_vmm_unref_ptes, NULL, NULL,
+		      false, pfn, nvkm_vmm_unref_ptes, NULL, NULL,
 		      sparse ? func->sparse : func->invalid ? func->invalid :
 							      func->unmap);
 }
@@ -681,10 +693,10 @@ nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		      nvkm_vmm_pte_func func)
 {
 	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true,
-				 nvkm_vmm_ref_ptes, func, map, NULL);
+				 false, nvkm_vmm_ref_ptes, func, map, NULL);
 	if (fail != ~0ULL) {
 		if ((size = fail - addr))
-			nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false);
+			nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false, false);
 		return -ENOMEM;
 	}
 	return 0;
@@ -692,10 +704,11 @@ nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 
 static void
 nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
-		    u64 addr, u64 size, bool sparse)
+		    u64 addr, u64 size, bool sparse, bool pfn)
 {
 	const struct nvkm_vmm_desc_func *func = page->desc->func;
-	nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL,
+	nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, pfn,
+		      NULL, NULL, NULL,
 		      sparse ? func->sparse : func->invalid ? func->invalid :
 							      func->unmap);
 }
@@ -705,7 +718,7 @@ nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		  u64 addr, u64 size, struct nvkm_vmm_map *map,
 		  nvkm_vmm_pte_func func)
 {
-	nvkm_vmm_iter(vmm, page, addr, size, "map", false,
+	nvkm_vmm_iter(vmm, page, addr, size, "map", false, false,
 		      NULL, func, map, NULL);
 }
 
@@ -713,7 +726,7 @@ static void
 nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		  u64 addr, u64 size)
 {
-	nvkm_vmm_iter(vmm, page, addr, size, "unref", false,
+	nvkm_vmm_iter(vmm, page, addr, size, "unref", false, false,
 		      nvkm_vmm_unref_ptes, NULL, NULL, NULL);
 }
 
@@ -721,7 +734,7 @@ static int
 nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		  u64 addr, u64 size)
 {
-	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true,
+	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, false,
 				 nvkm_vmm_ref_ptes, NULL, NULL, NULL);
 	if (fail != ~0ULL) {
 		if (fail != addr)
@@ -763,6 +776,7 @@ nvkm_vma_tail(struct nvkm_vma *vma, u64 tail)
 	new->part = vma->part;
 	new->user = vma->user;
 	new->busy = vma->busy;
+	new->mapped = vma->mapped;
 	list_add(&new->head, &vma->head);
 	return new;
 }
@@ -935,11 +949,40 @@ nvkm_vmm_node_split(struct nvkm_vmm *vmm,
 }
 
 static void
+nvkm_vma_dump(struct nvkm_vma *vma)
+{
+	printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c%c %p\n",
+	       vma->addr, (u64)vma->size,
+	       vma->used ? '-' : 'F',
+	       vma->mapref ? 'R' : '-',
+	       vma->sparse ? 'S' : '-',
+	       vma->page != NVKM_VMA_PAGE_NONE ? '0' + vma->page : '-',
+	       vma->refd != NVKM_VMA_PAGE_NONE ? '0' + vma->refd : '-',
+	       vma->part ? 'P' : '-',
+	       vma->user ? 'U' : '-',
+	       vma->busy ? 'B' : '-',
+	       vma->mapped ? 'M' : '-',
+	       vma->memory);
+}
+
+static void
+nvkm_vmm_dump(struct nvkm_vmm *vmm)
+{
+	struct nvkm_vma *vma;
+	list_for_each_entry(vma, &vmm->list, head) {
+		nvkm_vma_dump(vma);
+	}
+}
+
+static void
 nvkm_vmm_dtor(struct nvkm_vmm *vmm)
 {
 	struct nvkm_vma *vma;
 	struct rb_node *node;
 
+	if (0)
+		nvkm_vmm_dump(vmm);
+
 	while ((node = rb_first(&vmm->root))) {
 		struct nvkm_vma *vma = rb_entry(node, typeof(*vma), tree);
 		nvkm_vmm_put(vmm, &vma);
@@ -972,16 +1015,32 @@ nvkm_vmm_dtor(struct nvkm_vmm *vmm)
 	}
 }
 
+static int
+nvkm_vmm_ctor_managed(struct nvkm_vmm *vmm, u64 addr, u64 size)
+{
+	struct nvkm_vma *vma;
+	if (!(vma = nvkm_vma_new(addr, size)))
+		return -ENOMEM;
+	vma->mapref = true;
+	vma->sparse = false;
+	vma->used = true;
+	vma->user = true;
+	nvkm_vmm_node_insert(vmm, vma);
+	list_add_tail(&vma->head, &vmm->list);
+	return 0;
+}
+
 int
 nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
-	      u32 pd_header, u64 addr, u64 size, struct lock_class_key *key,
-	      const char *name, struct nvkm_vmm *vmm)
+	      u32 pd_header, bool managed, u64 addr, u64 size,
+	      struct lock_class_key *key, const char *name,
+	      struct nvkm_vmm *vmm)
 {
 	static struct lock_class_key _key;
 	const struct nvkm_vmm_page *page = func->page;
 	const struct nvkm_vmm_desc *desc;
 	struct nvkm_vma *vma;
-	int levels, bits = 0;
+	int levels, bits = 0, ret;
 
 	vmm->func = func;
 	vmm->mmu = mmu;
@@ -1009,11 +1068,6 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 	if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX))
 		return -EINVAL;
 
-	vmm->start = addr;
-	vmm->limit = size ? (addr + size) : (1ULL << bits);
-	if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits))
-		return -EINVAL;
-
 	/* Allocate top-level page table. */
 	vmm->pd = nvkm_vmm_pt_new(desc, false, NULL);
 	if (!vmm->pd)
@@ -1036,50 +1090,273 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 	vmm->free = RB_ROOT;
 	vmm->root = RB_ROOT;
 
-	if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start)))
-		return -ENOMEM;
+	if (managed) {
+		/* Address-space will be managed by the client for the most
+		 * part, except for a specified area where NVKM allocations
+		 * are allowed to be placed.
+		 */
+		vmm->start = 0;
+		vmm->limit = 1ULL << bits;
+		if (addr + size < addr || addr + size > vmm->limit)
+			return -EINVAL;
+
+		/* Client-managed area before the NVKM-managed area. */
+		if (addr && (ret = nvkm_vmm_ctor_managed(vmm, 0, addr)))
+			return ret;
+
+		/* NVKM-managed area. */
+		if (size) {
+			if (!(vma = nvkm_vma_new(addr, size)))
+				return -ENOMEM;
+			nvkm_vmm_free_insert(vmm, vma);
+			list_add_tail(&vma->head, &vmm->list);
+		}
+
+		/* Client-managed area after the NVKM-managed area. */
+		addr = addr + size;
+		size = vmm->limit - addr;
+		if (size && (ret = nvkm_vmm_ctor_managed(vmm, addr, size)))
+			return ret;
+	} else {
+		/* Address-space fully managed by NVKM, requiring calls to
+		 * nvkm_vmm_get()/nvkm_vmm_put() to allocate address-space.
+		 */
+		vmm->start = addr;
+		vmm->limit = size ? (addr + size) : (1ULL << bits);
+		if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits))
+			return -EINVAL;
+
+		if (!(vma = nvkm_vma_new(vmm->start, vmm->limit - vmm->start)))
+			return -ENOMEM;
+
+		nvkm_vmm_free_insert(vmm, vma);
+		list_add(&vma->head, &vmm->list);
+	}
 
-	nvkm_vmm_free_insert(vmm, vma);
-	list_add(&vma->head, &vmm->list);
 	return 0;
 }
 
 int
 nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
-	      u32 hdr, u64 addr, u64 size, struct lock_class_key *key,
-	      const char *name, struct nvkm_vmm **pvmm)
+	      u32 hdr, bool managed, u64 addr, u64 size,
+	      struct lock_class_key *key, const char *name,
+	      struct nvkm_vmm **pvmm)
 {
 	if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL)))
 		return -ENOMEM;
-	return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm);
+	return nvkm_vmm_ctor(func, mmu, hdr, managed, addr, size, key, name, *pvmm);
+}
+
+static struct nvkm_vma *
+nvkm_vmm_pfn_split_merge(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
+			 u64 addr, u64 size, u8 page, bool map)
+{
+	struct nvkm_vma *prev = NULL;
+	struct nvkm_vma *next = NULL;
+
+	if (vma->addr == addr && vma->part && (prev = node(vma, prev))) {
+		if (prev->memory || prev->mapped != map)
+			prev = NULL;
+	}
+
+	if (vma->addr + vma->size == addr + size && (next = node(vma, next))) {
+		if (!next->part ||
+		    next->memory || next->mapped != map)
+			next = NULL;
+	}
+
+	if (prev || next)
+		return nvkm_vmm_node_merge(vmm, prev, vma, next, size);
+	return nvkm_vmm_node_split(vmm, vma, addr, size);
+}
+
+int
+nvkm_vmm_pfn_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size)
+{
+	struct nvkm_vma *vma = nvkm_vmm_node_search(vmm, addr);
+	struct nvkm_vma *next;
+	u64 limit = addr + size;
+	u64 start = addr;
+
+	if (!vma)
+		return -EINVAL;
+
+	do {
+		if (!vma->mapped || vma->memory)
+			continue;
+
+		size = min(limit - start, vma->size - (start - vma->addr));
+
+		nvkm_vmm_ptes_unmap_put(vmm, &vmm->func->page[vma->refd],
+					start, size, false, true);
+
+		next = nvkm_vmm_pfn_split_merge(vmm, vma, start, size, 0, false);
+		if (!WARN_ON(!next)) {
+			vma = next;
+			vma->refd = NVKM_VMA_PAGE_NONE;
+			vma->mapped = false;
+		}
+	} while ((vma = node(vma, next)) && (start = vma->addr) < limit);
+
+	return 0;
+}
+
+/*TODO:
+ * - Avoid PT readback (for dma_unmap etc), this might end up being dealt
+ *   with inside HMM, which would be a lot nicer for us to deal with.
+ * - Multiple page sizes (particularly for huge page support).
+ * - Support for systems without a 4KiB page size.
+ */
+int
+nvkm_vmm_pfn_map(struct nvkm_vmm *vmm, u8 shift, u64 addr, u64 size, u64 *pfn)
+{
+	const struct nvkm_vmm_page *page = vmm->func->page;
+	struct nvkm_vma *vma, *tmp;
+	u64 limit = addr + size;
+	u64 start = addr;
+	int pm = size >> shift;
+	int pi = 0;
+
+	/* Only support mapping where the page size of the incoming page
+	 * array matches a page size available for direct mapping.
+	 */
+	while (page->shift && page->shift != shift &&
+	       page->desc->func->pfn == NULL)
+		page++;
+
+	if (!page->shift || !IS_ALIGNED(addr, 1ULL << shift) ||
+			    !IS_ALIGNED(size, 1ULL << shift) ||
+	    addr + size < addr || addr + size > vmm->limit) {
+		VMM_DEBUG(vmm, "paged map %d %d %016llx %016llx\n",
+			  shift, page->shift, addr, size);
+		return -EINVAL;
+	}
+
+	if (!(vma = nvkm_vmm_node_search(vmm, addr)))
+		return -ENOENT;
+
+	do {
+		bool map = !!(pfn[pi] & NVKM_VMM_PFN_V);
+		bool mapped = vma->mapped;
+		u64 size = limit - start;
+		u64 addr = start;
+		int pn, ret = 0;
+
+		/* Narrow the operation window to cover a single action (page
+		 * should be mapped or not) within a single VMA.
+		 */
+		for (pn = 0; pi + pn < pm; pn++) {
+			if (map != !!(pfn[pi + pn] & NVKM_VMM_PFN_V))
+				break;
+		}
+		size = min_t(u64, size, pn << page->shift);
+		size = min_t(u64, size, vma->size + vma->addr - addr);
+
+		/* Reject any operation to unmanaged regions, and areas that
+		 * have nvkm_memory objects mapped in them already.
+		 */
+		if (!vma->mapref || vma->memory) {
+			ret = -EINVAL;
+			goto next;
+		}
+
+		/* In order to both properly refcount GPU page tables, and
+		 * prevent "normal" mappings and these direct mappings from
+		 * interfering with each other, we need to track contiguous
+		 * ranges that have been mapped with this interface.
+		 *
+		 * Here we attempt to either split an existing VMA so we're
+		 * able to flag the region as either unmapped/mapped, or to
+		 * merge with adjacent VMAs that are already compatible.
+		 *
+		 * If the region is already compatible, nothing is required.
+		 */
+		if (map != mapped) {
+			tmp = nvkm_vmm_pfn_split_merge(vmm, vma, addr, size,
+						       page -
+						       vmm->func->page, map);
+			if (WARN_ON(!tmp)) {
+				ret = -ENOMEM;
+				goto next;
+			}
+
+			if ((tmp->mapped = map))
+				tmp->refd = page - vmm->func->page;
+			else
+				tmp->refd = NVKM_VMA_PAGE_NONE;
+			vma = tmp;
+		}
+
+		/* Update HW page tables. */
+		if (map) {
+			struct nvkm_vmm_map args;
+			args.page = page;
+			args.pfn = &pfn[pi];
+
+			if (!mapped) {
+				ret = nvkm_vmm_ptes_get_map(vmm, page, addr,
+							    size, &args, page->
+							    desc->func->pfn);
+			} else {
+				nvkm_vmm_ptes_map(vmm, page, addr, size, &args,
+						  page->desc->func->pfn);
+			}
+		} else {
+			if (mapped) {
+				nvkm_vmm_ptes_unmap_put(vmm, page, addr, size,
+							false, true);
+			}
+		}
+
+next:
+		/* Iterate to next operation. */
+		if (vma->addr + vma->size == addr + size)
+			vma = node(vma, next);
+		start += size;
+
+		if (ret) {
+			/* Failure is signalled by clearing the valid bit on
+			 * any PFN that couldn't be modified as requested.
+			 */
+			while (size) {
+				pfn[pi++] = NVKM_VMM_PFN_NONE;
+				size -= 1 << page->shift;
+			}
+		} else {
+			pi += size >> page->shift;
+		}
+	} while (vma && start < limit);
+
+	return 0;
 }
 
 void
 nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 {
-	struct nvkm_vma *next = node(vma, next);
 	struct nvkm_vma *prev = NULL;
+	struct nvkm_vma *next;
 
 	nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
 	nvkm_memory_unref(&vma->memory);
+	vma->mapped = false;
 
-	if (!vma->part || ((prev = node(vma, prev)), prev->memory))
+	if (vma->part && (prev = node(vma, prev)) && prev->mapped)
 		prev = NULL;
-	if (!next->part || next->memory)
+	if ((next = node(vma, next)) && (!next->part || next->mapped))
 		next = NULL;
 	nvkm_vmm_node_merge(vmm, prev, vma, next, vma->size);
 }
 
 void
-nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
+nvkm_vmm_unmap_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma, bool pfn)
 {
 	const struct nvkm_vmm_page *page = &vmm->func->page[vma->refd];
 
 	if (vma->mapref) {
-		nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse);
+		nvkm_vmm_ptes_unmap_put(vmm, page, vma->addr, vma->size, vma->sparse, pfn);
 		vma->refd = NVKM_VMA_PAGE_NONE;
 	} else {
-		nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse);
+		nvkm_vmm_ptes_unmap(vmm, page, vma->addr, vma->size, vma->sparse, pfn);
 	}
 
 	nvkm_vmm_unmap_region(vmm, vma);
@@ -1090,7 +1367,7 @@ nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 {
 	if (vma->memory) {
 		mutex_lock(&vmm->mutex);
-		nvkm_vmm_unmap_locked(vmm, vma);
+		nvkm_vmm_unmap_locked(vmm, vma, false);
 		mutex_unlock(&vmm->mutex);
 	}
 }
@@ -1224,6 +1501,7 @@ nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
 	nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
 	nvkm_memory_unref(&vma->memory);
 	vma->memory = nvkm_memory_ref(map->memory);
+	vma->mapped = true;
 	vma->tags = map->tags;
 	return 0;
 }
@@ -1269,14 +1547,16 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 
 	if (vma->mapref || !vma->sparse) {
 		do {
-			const bool map = next->memory != NULL;
+			const bool mem = next->memory != NULL;
+			const bool map = next->mapped;
 			const u8  refd = next->refd;
 			const u64 addr = next->addr;
 			u64 size = next->size;
 
 			/* Merge regions that are in the same state. */
 			while ((next = node(next, next)) && next->part &&
-			       (next->memory != NULL) == map &&
+			       (next->mapped == map) &&
+			       (next->memory != NULL) == mem &&
 			       (next->refd == refd))
 				size += next->size;
 
@@ -1286,7 +1566,8 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 				 * the page tree.
 				 */
 				nvkm_vmm_ptes_unmap_put(vmm, &page[refd], addr,
-							size, vma->sparse);
+							size, vma->sparse,
+							!mem);
 			} else
 			if (refd != NVKM_VMA_PAGE_NONE) {
 				/* Drop allocation-time PTE references. */
@@ -1301,7 +1582,7 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 	 */
 	next = vma;
 	do {
-		if (next->memory)
+		if (next->mapped)
 			nvkm_vmm_unmap_region(vmm, next);
 	} while ((next = node(vma, next)) && next->part);
 
@@ -1522,7 +1803,7 @@ nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 }
 
 static bool
-nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
+nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, bool pfn, u32 ptei, u32 ptes)
 {
 	const struct nvkm_vmm_desc *desc = it->desc;
 	const int type = desc->type == SPT;
@@ -1544,7 +1825,7 @@ nvkm_vmm_boot(struct nvkm_vmm *vmm)
 	if (ret)
 		return ret;
 
-	nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false,
+	nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false, false,
 		      nvkm_vmm_boot_ptes, NULL, NULL, NULL);
 	vmm->bootstrapped = true;
 	return 0;
@@ -1584,7 +1865,8 @@ nvkm_vmm_new(struct nvkm_device *device, u64 addr, u64 size, void *argv,
 	struct nvkm_mmu *mmu = device->mmu;
 	struct nvkm_vmm *vmm = NULL;
 	int ret;
-	ret = mmu->func->vmm.ctor(mmu, addr, size, argv, argc, key, name, &vmm);
+	ret = mmu->func->vmm.ctor(mmu, false, addr, size, argv, argc,
+				  key, name, &vmm);
 	if (ret)
 		nvkm_vmm_unref(&vmm);
 	*pvmm = vmm;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 42ad326521a3..5e55ecbd8005 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -67,6 +67,10 @@ struct nvkm_vmm_desc_func {
 	nvkm_vmm_pte_func mem;
 	nvkm_vmm_pte_func dma;
 	nvkm_vmm_pte_func sgl;
+
+	nvkm_vmm_pte_func pfn;
+	bool (*pfn_clear)(struct nvkm_vmm *, struct nvkm_mmu_pt *, u32 ptei, u32 ptes);
+	nvkm_vmm_pxe_func pfn_unmap;
 };
 
 extern const struct nvkm_vmm_desc_func gf100_vmm_pgd;
@@ -141,6 +145,11 @@ struct nvkm_vmm_func {
 		     struct nvkm_vmm_map *);
 	void (*flush)(struct nvkm_vmm *, int depth);
 
+	int (*mthd)(struct nvkm_vmm *, struct nvkm_client *,
+		    u32 mthd, void *argv, u32 argc);
+
+	void (*invalidate_pdb)(struct nvkm_vmm *, u64 addr);
+
 	u64 page_block;
 	const struct nvkm_vmm_page page[];
 };
@@ -151,11 +160,12 @@ struct nvkm_vmm_join {
 };
 
 int nvkm_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *,
-		  u32 pd_header, u64 addr, u64 size, struct lock_class_key *,
-		  const char *name, struct nvkm_vmm **);
+		  u32 pd_header, bool managed, u64 addr, u64 size,
+		  struct lock_class_key *, const char *name,
+		  struct nvkm_vmm **);
 int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *,
-		  u32 pd_header, u64 addr, u64 size, struct lock_class_key *,
-		  const char *name, struct nvkm_vmm *);
+		  u32 pd_header, bool managed, u64 addr, u64 size,
+		  struct lock_class_key *, const char *name, struct nvkm_vmm *);
 struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr);
 struct nvkm_vma *nvkm_vmm_node_split(struct nvkm_vmm *, struct nvkm_vma *,
 				     u64 addr, u64 size);
@@ -163,13 +173,25 @@ int nvkm_vmm_get_locked(struct nvkm_vmm *, bool getref, bool mapref,
 			bool sparse, u8 page, u8 align, u64 size,
 			struct nvkm_vma **pvma);
 void nvkm_vmm_put_locked(struct nvkm_vmm *, struct nvkm_vma *);
-void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *);
-void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma);
+void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *, bool pfn);
+void nvkm_vmm_unmap_region(struct nvkm_vmm *, struct nvkm_vma *);
+
+#define NVKM_VMM_PFN_ADDR                                 0xfffffffffffff000ULL
+#define NVKM_VMM_PFN_ADDR_SHIFT                                              12
+#define NVKM_VMM_PFN_APER                                 0x00000000000000f0ULL
+#define NVKM_VMM_PFN_HOST                                 0x0000000000000000ULL
+#define NVKM_VMM_PFN_VRAM                                 0x0000000000000010ULL
+#define NVKM_VMM_PFN_W                                    0x0000000000000002ULL
+#define NVKM_VMM_PFN_V                                    0x0000000000000001ULL
+#define NVKM_VMM_PFN_NONE                                 0x0000000000000000ULL
+
+int nvkm_vmm_pfn_map(struct nvkm_vmm *, u8 page, u64 addr, u64 size, u64 *pfn);
+int nvkm_vmm_pfn_unmap(struct nvkm_vmm *, u64 addr, u64 size);
 
 struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail);
 
 int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32,
-		  u64, u64, void *, u32, struct lock_class_key *,
+		  bool, u64, u64, void *, u32, struct lock_class_key *,
 		  const char *, struct nvkm_vmm **);
 int nv04_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
 
@@ -179,70 +201,76 @@ int nv50_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
 void nv50_vmm_flush(struct nvkm_vmm *, int);
 
 int gf100_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
-		   struct nvkm_mmu *, u64, u64, void *, u32,
+		   struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		   struct lock_class_key *, const char *, struct nvkm_vmm **);
 int gf100_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base);
 int gf100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 void gf100_vmm_part(struct nvkm_vmm *, struct nvkm_memory *);
 int gf100_vmm_aper(enum nvkm_memory_target);
 int gf100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
-void gf100_vmm_flush_(struct nvkm_vmm *, int);
 void gf100_vmm_flush(struct nvkm_vmm *, int);
+void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type);
+void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr);
 
 int gk20a_vmm_aper(enum nvkm_memory_target);
 
 int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
-		   struct nvkm_mmu *, u64, u64, void *, u32,
+		   struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		   struct lock_class_key *, const char *, struct nvkm_vmm **);
 int gm200_vmm_join_(struct nvkm_vmm *, struct nvkm_memory *, u64 base);
 int gm200_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 
+int gp100_vmm_new_(const struct nvkm_vmm_func *,
+		   struct nvkm_mmu *, bool, u64, u64, void *, u32,
+		   struct lock_class_key *, const char *, struct nvkm_vmm **);
 int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
 void gp100_vmm_flush(struct nvkm_vmm *, int);
+int gp100_vmm_mthd(struct nvkm_vmm *, struct nvkm_client *, u32, void *, u32);
+void gp100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr);
 
 int gv100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 
-int nv04_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int nv04_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
-int nv41_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int nv41_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
-int nv44_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int nv44_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
-int nv50_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int nv50_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
-int mcp77_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int mcp77_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *, struct nvkm_vmm **);
-int g84_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int g84_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		struct lock_class_key *, const char *, struct nvkm_vmm **);
-int gf100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gf100_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *, struct nvkm_vmm **);
-int gk104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gk104_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *, struct nvkm_vmm **);
-int gk20a_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gk20a_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *, struct nvkm_vmm **);
-int gm200_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32,
+int gm200_vmm_new_fixed(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 			struct lock_class_key *, const char *,
 			struct nvkm_vmm **);
-int gm200_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gm200_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
-int gm20b_vmm_new_fixed(struct nvkm_mmu *, u64, u64, void *, u32,
+int gm20b_vmm_new_fixed(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 			struct lock_class_key *, const char *,
 			struct nvkm_vmm **);
-int gm20b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gm20b_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
-int gp100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gp100_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
-int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gp10b_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
-int gv100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int gv100_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
-int tu104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+int tu102_vmm_new(struct nvkm_mmu *, bool, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c
index faf5a7e9265e..ab6424faf84c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c
@@ -178,15 +178,19 @@ gf100_vmm_desc_16_16[] = {
 };
 
 void
-gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth)
+gf100_vmm_invalidate_pdb(struct nvkm_vmm *vmm, u64 addr)
+{
+	struct nvkm_device *device = vmm->mmu->subdev.device;
+	nvkm_wr32(device, 0x100cb8, addr);
+}
+
+void
+gf100_vmm_invalidate(struct nvkm_vmm *vmm, u32 type)
 {
 	struct nvkm_subdev *subdev = &vmm->mmu->subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 type = depth << 24;
-
-	type = 0x00000001; /* PAGE_ALL */
-	if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
-		type |= 0x00000004; /* HUB_ONLY */
+	struct nvkm_mmu_pt *pd = vmm->pd->pt[0];
+	u64 addr = 0;
 
 	mutex_lock(&subdev->mutex);
 	/* Looks like maybe a "free flush slots" counter, the
@@ -197,7 +201,20 @@ gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth)
 			break;
 	);
 
-	nvkm_wr32(device, 0x100cb8, vmm->pd->pt[0]->addr >> 8);
+	if (!(type & 0x00000002) /* ALL_PDB. */) {
+		switch (nvkm_memory_target(pd->memory)) {
+		case NVKM_MEM_TARGET_VRAM: addr |= 0x00000000; break;
+		case NVKM_MEM_TARGET_HOST: addr |= 0x00000002; break;
+		case NVKM_MEM_TARGET_NCOH: addr |= 0x00000003; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+		addr |= (vmm->pd->pt[0]->addr >> 12) << 4;
+
+		vmm->func->invalidate_pdb(vmm, addr);
+	}
+
 	nvkm_wr32(device, 0x100cbc, 0x80000000 | type);
 
 	/* Wait for flush to be queued? */
@@ -211,7 +228,10 @@ gf100_vmm_flush_(struct nvkm_vmm *vmm, int depth)
 void
 gf100_vmm_flush(struct nvkm_vmm *vmm, int depth)
 {
-	gf100_vmm_flush_(vmm, 0);
+	u32 type = 0x00000001; /* PAGE_ALL */
+	if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
+		type |= 0x00000004; /* HUB_ONLY */
+	gf100_vmm_invalidate(vmm, type);
 }
 
 int
@@ -354,6 +374,7 @@ gf100_vmm_17 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 17, &gf100_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC },
 		{ 12, &gf100_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx },
@@ -368,6 +389,7 @@ gf100_vmm_16 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 16, &gf100_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC },
 		{ 12, &gf100_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx },
@@ -378,14 +400,14 @@ gf100_vmm_16 = {
 int
 gf100_vmm_new_(const struct nvkm_vmm_func *func_16,
 	       const struct nvkm_vmm_func *func_17,
-	       struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	       struct lock_class_key *key, const char *name,
-	       struct nvkm_vmm **pvmm)
+	       struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	       void *argv, u32 argc, struct lock_class_key *key,
+	       const char *name, struct nvkm_vmm **pvmm)
 {
 	switch (mmu->subdev.device->fb->page) {
-	case 16: return nv04_vmm_new_(func_16, mmu, 0, addr, size,
+	case 16: return nv04_vmm_new_(func_16, mmu, 0, managed, addr, size,
 				      argv, argc, key, name, pvmm);
-	case 17: return nv04_vmm_new_(func_17, mmu, 0, addr, size,
+	case 17: return nv04_vmm_new_(func_17, mmu, 0, managed, addr, size,
 				      argv, argc, key, name, pvmm);
 	default:
 		WARN_ON(1);
@@ -394,10 +416,10 @@ gf100_vmm_new_(const struct nvkm_vmm_func *func_16,
 }
 
 int
-gf100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gf100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return gf100_vmm_new_(&gf100_vmm_16, &gf100_vmm_17, mmu, addr,
+	return gf100_vmm_new_(&gf100_vmm_16, &gf100_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c
index 0ebb7bccfcd2..0b59c01fd146 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk104.c
@@ -71,6 +71,7 @@ gk104_vmm_17 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xVxC },
 		{ 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xVHx },
@@ -85,6 +86,7 @@ gk104_vmm_16 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xVxC },
 		{ 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xVHx },
@@ -93,10 +95,10 @@ gk104_vmm_16 = {
 };
 
 int
-gk104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gk104_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return gf100_vmm_new_(&gk104_vmm_16, &gk104_vmm_17, mmu, addr,
+	return gf100_vmm_new_(&gk104_vmm_16, &gk104_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
index 8086994a0446..5a9582dce970 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
@@ -40,6 +40,7 @@ gk20a_vmm_17 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 17, &gk104_vmm_desc_17_17[0], NVKM_VMM_PAGE_xxHC },
 		{ 12, &gk104_vmm_desc_17_12[0], NVKM_VMM_PAGE_xxHx },
@@ -54,6 +55,7 @@ gk20a_vmm_16 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 16, &gk104_vmm_desc_16_16[0], NVKM_VMM_PAGE_xxHC },
 		{ 12, &gk104_vmm_desc_16_12[0], NVKM_VMM_PAGE_xxHx },
@@ -62,10 +64,10 @@ gk20a_vmm_16 = {
 };
 
 int
-gk20a_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gk20a_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return gf100_vmm_new_(&gk20a_vmm_16, &gk20a_vmm_17, mmu, addr,
+	return gf100_vmm_new_(&gk20a_vmm_16, &gk20a_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c
index a1676a4644fe..2e61af02d4d8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm200.c
@@ -113,6 +113,7 @@ gm200_vmm_17 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx },
 		{ 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SVxC },
@@ -128,6 +129,7 @@ gm200_vmm_16 = {
 	.aper = gf100_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx },
 		{ 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SVxC },
@@ -139,9 +141,9 @@ gm200_vmm_16 = {
 int
 gm200_vmm_new_(const struct nvkm_vmm_func *func_16,
 	       const struct nvkm_vmm_func *func_17,
-	       struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	       struct lock_class_key *key, const char *name,
-	       struct nvkm_vmm **pvmm)
+	       struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	       void *argv, u32 argc, struct lock_class_key *key,
+	       const char *name, struct nvkm_vmm **pvmm)
 {
 	const struct nvkm_vmm_func *func;
 	union {
@@ -163,23 +165,23 @@ gm200_vmm_new_(const struct nvkm_vmm_func *func_16,
 	} else
 		return ret;
 
-	return nvkm_vmm_new_(func, mmu, 0, addr, size, key, name, pvmm);
+	return nvkm_vmm_new_(func, mmu, 0, managed, addr, size, key, name, pvmm);
 }
 
 int
-gm200_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gm200_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return gm200_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr,
+	return gm200_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
 
 int
-gm200_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size,
+gm200_vmm_new_fixed(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 		    void *argv, u32 argc, struct lock_class_key *key,
 		    const char *name, struct nvkm_vmm **pvmm)
 {
-	return gf100_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, addr,
+	return gf100_vmm_new_(&gm200_vmm_16, &gm200_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
index 64d4b6cff8dd..96b759695dd8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
@@ -28,6 +28,7 @@ gm20b_vmm_17 = {
 	.aper = gk20a_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 27, &gm200_vmm_desc_17_17[1], NVKM_VMM_PAGE_Sxxx },
 		{ 17, &gm200_vmm_desc_17_17[0], NVKM_VMM_PAGE_SxHC },
@@ -43,6 +44,7 @@ gm20b_vmm_16 = {
 	.aper = gk20a_vmm_aper,
 	.valid = gf100_vmm_valid,
 	.flush = gf100_vmm_flush,
+	.invalidate_pdb = gf100_vmm_invalidate_pdb,
 	.page = {
 		{ 27, &gm200_vmm_desc_16_16[1], NVKM_VMM_PAGE_Sxxx },
 		{ 16, &gm200_vmm_desc_16_16[0], NVKM_VMM_PAGE_SxHC },
@@ -52,19 +54,19 @@ gm20b_vmm_16 = {
 };
 
 int
-gm20b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gm20b_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return gm200_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr,
+	return gm200_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
 
 int
-gm20b_vmm_new_fixed(struct nvkm_mmu *mmu, u64 addr, u64 size,
+gm20b_vmm_new_fixed(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 		    void *argv, u32 argc, struct lock_class_key *key,
 		    const char *name, struct nvkm_vmm **pvmm)
 {
-	return gf100_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, addr,
+	return gf100_vmm_new_(&gm20b_vmm_16, &gm20b_vmm_17, mmu, managed, addr,
 			      size, argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
index 059fafe0e771..b4f519768d5e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
@@ -21,12 +21,90 @@
  */
 #include "vmm.h"
 
+#include <core/client.h>
 #include <subdev/fb.h>
 #include <subdev/ltc.h>
+#include <subdev/timer.h>
+#include <engine/gr.h>
 
 #include <nvif/ifc00d.h>
 #include <nvif/unpack.h>
 
+static void
+gp100_vmm_pfn_unmap(struct nvkm_vmm *vmm,
+		    struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes)
+{
+	struct device *dev = vmm->mmu->subdev.device->dev;
+	dma_addr_t addr;
+
+	nvkm_kmap(pt->memory);
+	while (ptes--) {
+		u32 datalo = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 0);
+		u32 datahi = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 4);
+		u64 data   = (u64)datahi << 32 | datalo;
+		if ((data & (3ULL << 1)) != 0) {
+			addr = (data >> 8) << 12;
+			dma_unmap_page(dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		}
+		ptei++;
+	}
+	nvkm_done(pt->memory);
+}
+
+static bool
+gp100_vmm_pfn_clear(struct nvkm_vmm *vmm,
+		    struct nvkm_mmu_pt *pt, u32 ptei, u32 ptes)
+{
+	bool dma = false;
+	nvkm_kmap(pt->memory);
+	while (ptes--) {
+		u32 datalo = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 0);
+		u32 datahi = nvkm_ro32(pt->memory, pt->base + ptei * 8 + 4);
+		u64 data   = (u64)datahi << 32 | datalo;
+		if ((data & BIT_ULL(0)) && (data & (3ULL << 1)) != 0) {
+			VMM_WO064(pt, vmm, ptei * 8, data & ~BIT_ULL(0));
+			dma = true;
+		}
+		ptei++;
+	}
+	nvkm_done(pt->memory);
+	return dma;
+}
+
+static void
+gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
+		  u32 ptei, u32 ptes, struct nvkm_vmm_map *map)
+{
+	struct device *dev = vmm->mmu->subdev.device->dev;
+	dma_addr_t addr;
+
+	nvkm_kmap(pt->memory);
+	while (ptes--) {
+		u64 data = 0;
+		if (!(*map->pfn & NVKM_VMM_PFN_W))
+			data |= BIT_ULL(6); /* RO. */
+
+		if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) {
+			addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT;
+			addr = dma_map_page(dev, pfn_to_page(addr), 0,
+					    PAGE_SIZE, DMA_BIDIRECTIONAL);
+			if (!WARN_ON(dma_mapping_error(dev, addr))) {
+				data |= addr >> 4;
+				data |= 2ULL << 1; /* SYSTEM_COHERENT_MEMORY. */
+				data |= BIT_ULL(3); /* VOL. */
+				data |= BIT_ULL(0); /* VALID. */
+			}
+		} else {
+			data |= (*map->pfn & NVKM_VMM_PFN_ADDR) >> 4;
+			data |= BIT_ULL(0); /* VALID. */
+		}
+
+		VMM_WO064(pt, vmm, ptei++ * 8, data);
+		map->pfn++;
+	}
+	nvkm_done(pt->memory);
+}
+
 static inline void
 gp100_vmm_pgt_pte(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt,
 		  u32 ptei, u32 ptes, struct nvkm_vmm_map *map, u64 addr)
@@ -89,6 +167,9 @@ gp100_vmm_desc_spt = {
 	.mem = gp100_vmm_pgt_mem,
 	.dma = gp100_vmm_pgt_dma,
 	.sgl = gp100_vmm_pgt_sgl,
+	.pfn = gp100_vmm_pgt_pfn,
+	.pfn_clear = gp100_vmm_pfn_clear,
+	.pfn_unmap = gp100_vmm_pfn_unmap,
 };
 
 static void
@@ -306,16 +387,100 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
 	return 0;
 }
 
+static int
+gp100_vmm_fault_cancel(struct nvkm_vmm *vmm, void *argv, u32 argc)
+{
+	struct nvkm_device *device = vmm->mmu->subdev.device;
+	union {
+		struct gp100_vmm_fault_cancel_v0 v0;
+	} *args = argv;
+	int ret = -ENOSYS;
+	u32 inst, aper;
+
+	if ((ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false)))
+		return ret;
+
+	/* Translate MaxwellFaultBufferA instance pointer to the same
+	 * format as the NV_GR_FECS_CURRENT_CTX register.
+	 */
+	aper = (args->v0.inst >> 8) & 3;
+	args->v0.inst >>= 12;
+	args->v0.inst |= aper << 28;
+	args->v0.inst |= 0x80000000;
+
+	if (!WARN_ON(nvkm_gr_ctxsw_pause(device))) {
+		if ((inst = nvkm_gr_ctxsw_inst(device)) == args->v0.inst) {
+			gf100_vmm_invalidate(vmm, 0x0000001b
+					     /* CANCEL_TARGETED. */ |
+					     (args->v0.hub    << 20) |
+					     (args->v0.gpc    << 15) |
+					     (args->v0.client << 9));
+		}
+		WARN_ON(nvkm_gr_ctxsw_resume(device));
+	}
+
+	return 0;
+}
+
+static int
+gp100_vmm_fault_replay(struct nvkm_vmm *vmm, void *argv, u32 argc)
+{
+	union {
+		struct gp100_vmm_fault_replay_vn vn;
+	} *args = argv;
+	int ret = -ENOSYS;
+
+	if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) {
+		gf100_vmm_invalidate(vmm, 0x0000000b); /* REPLAY_GLOBAL. */
+	}
+
+	return ret;
+}
+
+int
+gp100_vmm_mthd(struct nvkm_vmm *vmm,
+	       struct nvkm_client *client, u32 mthd, void *argv, u32 argc)
+{
+	if (client->super) {
+		switch (mthd) {
+		case GP100_VMM_VN_FAULT_REPLAY:
+			return gp100_vmm_fault_replay(vmm, argv, argc);
+		case GP100_VMM_VN_FAULT_CANCEL:
+			return gp100_vmm_fault_cancel(vmm, argv, argc);
+		default:
+			break;
+		}
+	}
+	return -EINVAL;
+}
+
+void
+gp100_vmm_invalidate_pdb(struct nvkm_vmm *vmm, u64 addr)
+{
+	struct nvkm_device *device = vmm->mmu->subdev.device;
+	nvkm_wr32(device, 0x100cb8, lower_32_bits(addr));
+	nvkm_wr32(device, 0x100cec, upper_32_bits(addr));
+}
+
 void
 gp100_vmm_flush(struct nvkm_vmm *vmm, int depth)
 {
-	gf100_vmm_flush_(vmm, 5 /* CACHE_LEVEL_UP_TO_PDE3 */ - depth);
+	u32 type = (5 /* CACHE_LEVEL_UP_TO_PDE3 */ - depth) << 24;
+	type = 0; /*XXX: need to confirm stuff works with depth enabled... */
+	if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
+		type |= 0x00000004; /* HUB_ONLY */
+	type |= 0x00000001; /* PAGE_ALL */
+	gf100_vmm_invalidate(vmm, type);
 }
 
 int
 gp100_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 {
-	const u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11); /* 64KiB */
+	u64 base = BIT_ULL(10) /* VER2 */ | BIT_ULL(11) /* 64KiB */;
+	if (vmm->replay) {
+		base |= BIT_ULL(4); /* FAULT_REPLAY_TEX */
+		base |= BIT_ULL(5); /* FAULT_REPLAY_GCC */
+	}
 	return gf100_vmm_join_(vmm, inst, base);
 }
 
@@ -326,6 +491,8 @@ gp100_vmm = {
 	.aper = gf100_vmm_aper,
 	.valid = gp100_vmm_valid,
 	.flush = gp100_vmm_flush,
+	.mthd = gp100_vmm_mthd,
+	.invalidate_pdb = gp100_vmm_invalidate_pdb,
 	.page = {
 		{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
 		{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
@@ -338,10 +505,39 @@ gp100_vmm = {
 };
 
 int
-gp100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gp100_vmm_new_(const struct nvkm_vmm_func *func,
+	       struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	       void *argv, u32 argc, struct lock_class_key *key,
+	       const char *name, struct nvkm_vmm **pvmm)
+{
+	union {
+		struct gp100_vmm_vn vn;
+		struct gp100_vmm_v0 v0;
+	} *args = argv;
+	int ret = -ENOSYS;
+	bool replay;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))) {
+		replay = args->v0.fault_replay != 0;
+	} else
+	if (!(ret = nvif_unvers(ret, &argv, &argc, args->vn))) {
+		replay = false;
+	} else
+		return ret;
+
+	ret = nvkm_vmm_new_(func, mmu, 0, managed, addr, size, key, name, pvmm);
+	if (ret)
+		return ret;
+
+	(*pvmm)->replay = replay;
+	return 0;
+}
+
+int
+gp100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&gp100_vmm, mmu, 0, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&gp100_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
index 3dcc6bddb32f..e081239afe58 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
@@ -28,6 +28,8 @@ gp10b_vmm = {
 	.aper = gk20a_vmm_aper,
 	.valid = gp100_vmm_valid,
 	.flush = gp100_vmm_flush,
+	.mthd = gp100_vmm_mthd,
+	.invalidate_pdb = gp100_vmm_invalidate_pdb,
 	.page = {
 		{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
 		{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
@@ -40,10 +42,10 @@ gp10b_vmm = {
 };
 
 int
-gp10b_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gp10b_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&gp10b_vmm, mmu, 0, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&gp10b_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
index 2fa40c16e6d2..f0e21f63253a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgv100.c
@@ -66,6 +66,8 @@ gv100_vmm = {
 	.aper = gf100_vmm_aper,
 	.valid = gp100_vmm_valid,
 	.flush = gp100_vmm_flush,
+	.mthd = gp100_vmm_mthd,
+	.invalidate_pdb = gp100_vmm_invalidate_pdb,
 	.page = {
 		{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
 		{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
@@ -78,10 +80,10 @@ gv100_vmm = {
 };
 
 int
-gv100_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+gv100_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&gv100_vmm, mmu, 0, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&gv100_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
index e63d984cbfd4..bdddd99f5877 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmmcp77.c
@@ -36,10 +36,10 @@ mcp77_vmm = {
 };
 
 int
-mcp77_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+mcp77_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&mcp77_vmm, mmu, 0, addr, size,
+	return nv04_vmm_new_(&mcp77_vmm, mmu, 0, managed, addr, size,
 			     argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c
index 0cab1ffc9f64..4c6b3b7d221f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv04.c
@@ -100,16 +100,17 @@ nv04_vmm = {
 
 int
 nv04_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
-	      u32 pd_header, u64 addr, u64 size, void *argv, u32 argc,
-	      struct lock_class_key *key, const char *name,
-	      struct nvkm_vmm **pvmm)
+	      u32 pd_header, bool managed, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
 {
 	union {
 		struct nv04_vmm_vn vn;
 	} *args = argv;
 	int ret;
 
-	ret = nvkm_vmm_new_(func, mmu, pd_header, addr, size, key, name, pvmm);
+	ret = nvkm_vmm_new_(func, mmu, pd_header, managed, addr, size,
+			    key, name, pvmm);
 	if (ret)
 		return ret;
 
@@ -117,15 +118,15 @@ nv04_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 }
 
 int
-nv04_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	     struct lock_class_key *key, const char *name,
+nv04_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	     void *argv, u32 argc, struct lock_class_key *key, const char *name,
 	     struct nvkm_vmm **pvmm)
 {
 	struct nvkm_memory *mem;
 	struct nvkm_vmm *vmm;
 	int ret;
 
-	ret = nv04_vmm_new_(&nv04_vmm, mmu, 8, addr, size,
+	ret = nv04_vmm_new_(&nv04_vmm, mmu, 8, managed, addr, size,
 			    argv, argc, key, name, &vmm);
 	*pvmm = vmm;
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c
index b595f130e573..1d3369683a21 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv41.c
@@ -104,10 +104,10 @@ nv41_vmm = {
 };
 
 int
-nv41_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	     struct lock_class_key *key, const char *name,
+nv41_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	     void *argv, u32 argc, struct lock_class_key *key, const char *name,
 	     struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&nv41_vmm, mmu, 0, addr, size,
+	return nv04_vmm_new_(&nv41_vmm, mmu, 0, managed, addr, size,
 			     argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c
index b834e4352334..a82936ba9890 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv44.c
@@ -205,15 +205,15 @@ nv44_vmm = {
 };
 
 int
-nv44_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	     struct lock_class_key *key, const char *name,
+nv44_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	     void *argv, u32 argc, struct lock_class_key *key, const char *name,
 	     struct nvkm_vmm **pvmm)
 {
 	struct nvkm_subdev *subdev = &mmu->subdev;
 	struct nvkm_vmm *vmm;
 	int ret;
 
-	ret = nv04_vmm_new_(&nv44_vmm, mmu, 0, addr, size,
+	ret = nv04_vmm_new_(&nv44_vmm, mmu, 0, managed, addr, size,
 			    argv, argc, key, name, &vmm);
 	*pvmm = vmm;
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
index 64f75d906202..c98afe3134ee 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
@@ -376,10 +376,10 @@ nv50_vmm = {
 };
 
 int
-nv50_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size, void *argv, u32 argc,
-	     struct lock_class_key *key, const char *name,
+nv50_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
+	     void *argv, u32 argc, struct lock_class_key *key, const char *name,
 	     struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&nv50_vmm, mmu, 0, addr, size,
+	return nv04_vmm_new_(&nv50_vmm, mmu, 0, managed, addr, size,
 			     argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
index adaadd92110f..be91cffc3b52 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
@@ -24,7 +24,7 @@
 #include <subdev/timer.h>
 
 static void
-tu104_vmm_flush(struct nvkm_vmm *vmm, int depth)
+tu102_vmm_flush(struct nvkm_vmm *vmm, int depth)
 {
 	struct nvkm_subdev *subdev = &vmm->mmu->subdev;
 	struct nvkm_device *device = subdev->device;
@@ -50,12 +50,13 @@ tu104_vmm_flush(struct nvkm_vmm *vmm, int depth)
 }
 
 static const struct nvkm_vmm_func
-tu104_vmm = {
+tu102_vmm = {
 	.join = gv100_vmm_join,
 	.part = gf100_vmm_part,
 	.aper = gf100_vmm_aper,
 	.valid = gp100_vmm_valid,
-	.flush = tu104_vmm_flush,
+	.flush = tu102_vmm_flush,
+	.mthd = gp100_vmm_mthd,
 	.page = {
 		{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
 		{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
@@ -68,10 +69,10 @@ tu104_vmm = {
 };
 
 int
-tu104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size,
+tu102_vmm_new(struct nvkm_mmu *mmu, bool managed, u64 addr, u64 size,
 	      void *argv, u32 argc, struct lock_class_key *key,
 	      const char *name, struct nvkm_vmm **pvmm)
 {
-	return nv04_vmm_new_(&tu104_vmm, mmu, 0, addr, size,
-			     argv, argc, key, name, pvmm);
+	return gp100_vmm_new_(&tu102_vmm, mmu, managed, addr, size,
+			      argv, argc, key, name, pvmm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c
index 11b28b086a06..7b052879af72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c
@@ -88,10 +88,10 @@ nvkm_memx_fini(struct nvkm_memx **pmemx, bool exec)
 	if (exec) {
 		nvkm_pmu_send(pmu, reply, PROC_MEMX, MEMX_MSG_EXEC,
 			      memx->base, finish);
+		nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n",
+			   reply[0], reply[1]);
 	}
 
-	nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n",
-		   reply[0], reply[1]);
 	kfree(memx);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
index 5c14d6ac855d..1df09ed6fe6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
@@ -853,7 +853,7 @@ acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb)
 		 * and the expected behavior on RM as well
 		 */
 		if (ret && ret != 0x1d) {
-			nvkm_error(subdev, "HS unload failed, ret 0x%08x", ret);
+			nvkm_error(subdev, "HS unload failed, ret 0x%08x\n", ret);
 			return -EINVAL;
 		}
 		nvkm_debug(subdev, "HS unload blob completed\n");
@@ -922,7 +922,7 @@ acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb)
 	if (ret < 0) {
 		return ret;
 	} else if (ret > 0) {
-		nvkm_error(subdev, "HS load failed, ret 0x%08x", ret);
+		nvkm_error(subdev, "HS load failed, ret 0x%08x\n", ret);
 		return -EINVAL;
 	}
 	nvkm_debug(subdev, "HS load blob completed\n");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index 3695cde669f8..07914e36939e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -132,11 +132,12 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 			duty = nvkm_therm_update_linear(therm);
 			break;
 		case NVBIOS_THERM_FAN_OTHER:
-			if (therm->cstate)
+			if (therm->cstate) {
 				duty = therm->cstate;
-			else
+				poll = false;
+			} else {
 				duty = nvkm_therm_update_linear_fallback(therm);
-			poll = false;
+			}
 			break;
 		}
 		immd = false;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index 67ada1d9a28c..cce6e4e90ebf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -41,6 +41,22 @@ nvkm_top_device_new(struct nvkm_top *top)
 }
 
 u32
+nvkm_top_addr(struct nvkm_device *device, enum nvkm_devidx index)
+{
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	if (top) {
+		list_for_each_entry(info, &top->device, head) {
+			if (info->index == index)
+				return info->addr;
+		}
+	}
+
+	return 0;
+}
+
+u32
 nvkm_top_reset(struct nvkm_device *device, enum nvkm_devidx index)
 {
 	struct nvkm_top *top = device->top;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index 39081eadfd84..e01746ce9fc4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -73,6 +73,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 #define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A
 #define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1)      \
 		info->index = NVKM_ENGINE_##A##0 + inst
+#define C_(A) if (inst == 0) info->index = NVKM_SUBDEV_##A
 		switch (type) {
 		case 0x00000000: A_(GR    ); break;
 		case 0x00000001: A_(CE0   ); break;
@@ -88,6 +89,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 		case 0x0000000f: A_(NVENC1); break;
 		case 0x00000010: B_(NVDEC ); break;
 		case 0x00000013: B_(CE    ); break;
+		case 0x00000014: C_(GSP   ); break;
 			break;
 		default:
 			break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
index bcd179ba11d0..146adcdd316a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
@@ -2,6 +2,7 @@ nvkm-y += nvkm/subdev/volt/base.o
 nvkm-y += nvkm/subdev/volt/gpio.o
 nvkm-y += nvkm/subdev/volt/nv40.o
 nvkm-y += nvkm/subdev/volt/gf100.o
+nvkm-y += nvkm/subdev/volt/gf117.o
 nvkm-y += nvkm/subdev/volt/gk104.o
 nvkm-y += nvkm/subdev/volt/gk20a.o
 nvkm-y += nvkm/subdev/volt/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c
new file mode 100644
index 000000000000..547a58f0aeac
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2019 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ilia Mirkin
+ */
+#include "priv.h"
+
+#include <subdev/fuse.h>
+
+static int
+gf117_volt_speedo_read(struct nvkm_volt *volt)
+{
+	struct nvkm_device *device = volt->subdev.device;
+	struct nvkm_fuse *fuse = device->fuse;
+
+	if (!fuse)
+		return -EINVAL;
+
+	return nvkm_fuse_read(fuse, 0x3a8);
+}
+
+static const struct nvkm_volt_func
+gf117_volt = {
+	.oneinit = gf100_volt_oneinit,
+	.vid_get = nvkm_voltgpio_get,
+	.vid_set = nvkm_voltgpio_set,
+	.speedo_read = gf117_volt_speedo_read,
+};
+
+int
+gf117_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	struct nvkm_volt *volt;
+	int ret;
+
+	ret = nvkm_volt_new_(&gf117_volt, device, index, &volt);
+	*pvolt = volt;
+	if (ret)
+		return ret;
+
+	return nvkm_voltgpio_init(volt);
+}
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 00a9c2ab9e6c..64fb788b6647 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -1406,7 +1406,7 @@ static void dsi_pll_disable(struct dss_pll *pll)
 
 static int dsi_dump_dsi_clocks(struct seq_file *s, void *p)
 {
-	struct dsi_data *dsi = p;
+	struct dsi_data *dsi = s->private;
 	struct dss_pll_clock_info *cinfo = &dsi->pll.cinfo;
 	enum dss_clk_source dispc_clk_src, dsi_clk_src;
 	int dsi_module = dsi->module_id;
@@ -1467,7 +1467,7 @@ static int dsi_dump_dsi_clocks(struct seq_file *s, void *p)
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
 static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
 {
-	struct dsi_data *dsi = p;
+	struct dsi_data *dsi = s->private;
 	unsigned long flags;
 	struct dsi_irq_stats stats;
 
@@ -1558,7 +1558,7 @@ static int dsi_dump_dsi_irqs(struct seq_file *s, void *p)
 
 static int dsi_dump_dsi_regs(struct seq_file *s, void *p)
 {
-	struct dsi_data *dsi = p;
+	struct dsi_data *dsi = s->private;
 
 	if (dsi_runtime_get(dsi))
 		return 0;
@@ -4751,6 +4751,17 @@ static int dsi_set_config(struct omap_dss_device *dssdev,
 	dsi->vm.flags |= DISPLAY_FLAGS_HSYNC_HIGH;
 	dsi->vm.flags &= ~DISPLAY_FLAGS_VSYNC_LOW;
 	dsi->vm.flags |= DISPLAY_FLAGS_VSYNC_HIGH;
+	/*
+	 * HACK: These flags should be handled through the omap_dss_device bus
+	 * flags, but this will only be possible when the DSI encoder will be
+	 * converted to the omapdrm-managed encoder model.
+	 */
+	dsi->vm.flags &= ~DISPLAY_FLAGS_PIXDATA_NEGEDGE;
+	dsi->vm.flags |= DISPLAY_FLAGS_PIXDATA_POSEDGE;
+	dsi->vm.flags &= ~DISPLAY_FLAGS_DE_LOW;
+	dsi->vm.flags |= DISPLAY_FLAGS_DE_HIGH;
+	dsi->vm.flags &= ~DISPLAY_FLAGS_SYNC_POSEDGE;
+	dsi->vm.flags |= DISPLAY_FLAGS_SYNC_NEGEDGE;
 
 	dss_mgr_set_timings(&dsi->output, &dsi->vm);
 
@@ -5083,15 +5094,15 @@ static int dsi_bind(struct device *dev, struct device *master, void *data)
 
 	snprintf(name, sizeof(name), "dsi%u_regs", dsi->module_id + 1);
 	dsi->debugfs.regs = dss_debugfs_create_file(dss, name,
-						    dsi_dump_dsi_regs, &dsi);
+						    dsi_dump_dsi_regs, dsi);
 #ifdef CONFIG_OMAP2_DSS_COLLECT_IRQ_STATS
 	snprintf(name, sizeof(name), "dsi%u_irqs", dsi->module_id + 1);
 	dsi->debugfs.irqs = dss_debugfs_create_file(dss, name,
-						    dsi_dump_dsi_irqs, &dsi);
+						    dsi_dump_dsi_irqs, dsi);
 #endif
 	snprintf(name, sizeof(name), "dsi%u_clks", dsi->module_id + 1);
 	dsi->debugfs.clks = dss_debugfs_create_file(dss, name,
-						    dsi_dump_dsi_clocks, &dsi);
+						    dsi_dump_dsi_clocks, dsi);
 
 	return 0;
 }
@@ -5104,8 +5115,6 @@ static void dsi_unbind(struct device *dev, struct device *master, void *data)
 	dss_debugfs_remove_file(dsi->debugfs.irqs);
 	dss_debugfs_remove_file(dsi->debugfs.regs);
 
-	of_platform_depopulate(dev);
-
 	WARN_ON(dsi->scp_clk_refcount > 0);
 
 	dss_pll_unregister(&dsi->pll);
@@ -5457,6 +5466,8 @@ static int dsi_remove(struct platform_device *pdev)
 
 	dsi_uninit_output(dsi);
 
+	of_platform_depopulate(&pdev->dev);
+
 	pm_runtime_disable(&pdev->dev);
 
 	if (dsi->vdds_dsi_reg != NULL && dsi->vdds_dsi_enabled) {
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 11a76b6c9165..bb81e310eb6d 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -252,14 +252,10 @@ static struct drm_driver qxl_driver = {
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = qxl_debugfs_init,
 #endif
-	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = qxl_gem_prime_pin,
 	.gem_prime_unpin = qxl_gem_prime_unpin,
-	.gem_prime_get_sg_table = qxl_gem_prime_get_sg_table,
-	.gem_prime_import_sg_table = qxl_gem_prime_import_sg_table,
 	.gem_prime_vmap = qxl_gem_prime_vmap,
 	.gem_prime_vunmap = qxl_gem_prime_vunmap,
 	.gem_prime_mmap = qxl_gem_prime_mmap,
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
index 22e1faf0473a..8b448eca1cd9 100644
--- a/drivers/gpu/drm/qxl/qxl_prime.c
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -42,20 +42,6 @@ void qxl_gem_prime_unpin(struct drm_gem_object *obj)
 	qxl_bo_unpin(bo);
 }
 
-struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENOSYS);
-}
-
-struct drm_gem_object *qxl_gem_prime_import_sg_table(
-	struct drm_device *dev, struct dma_buf_attachment *attach,
-	struct sg_table *table)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENOSYS);
-}
-
 void *qxl_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct qxl_bo *bo = gem_to_qxl_bo(obj);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 96175d48a902..4cdea14d552f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -9,14 +9,17 @@
 
 #include <linux/clk.h>
 #include <linux/mutex.h>
+#include <linux/platform_device.h>
 #include <linux/sys_soc.h>
 
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_vblank.h>
 
 #include "rcar_du_crtc.h"
 #include "rcar_du_drv.h"
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index abd70d2931b0..75ab17af13a9 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -20,6 +20,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_probe_helper.h>
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index b0c80dffd8b8..3b7d50a8fb9b 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -10,10 +10,12 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_probe_helper.h>
+#include <drm/drm_vblank.h>
 
 #include <linux/of_graph.h>
 #include <linux/wait.h>
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index 321ac80b48d3..c6430027169f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -10,7 +10,9 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
 #include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fourcc.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_plane_helper.h>
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index 76a39eee7c9c..0878accbd134 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -13,6 +13,7 @@
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_vblank.h>
 
 #include <linux/bitops.h>
 #include <linux/dma-mapping.h>
diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c
index c3dd750c7189..ce4d82d293e4 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
@@ -1,17 +1,8 @@
-//SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
  * Author:
  *      Sandy Huang <hjc@rock-chips.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <drm/drmP.h>
@@ -113,8 +104,10 @@ struct rockchip_rgb *rockchip_rgb_init(struct device *dev,
 		child_count++;
 		ret = drm_of_find_panel_or_bridge(dev->of_node, 0, endpoint_id,
 						  &panel, &bridge);
-		if (!ret)
+		if (!ret) {
+			of_node_put(endpoint);
 			break;
+		}
 	}
 
 	of_node_put(port);
diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.h b/drivers/gpu/drm/rockchip/rockchip_rgb.h
index 38b52e63b2b0..27b9635124bc 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.h
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.h
@@ -1,17 +1,8 @@
-//SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
  * Author:
  *      Sandy Huang <hjc@rock-chips.com>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifdef CONFIG_ROCKCHIP_RGB
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 8e31b6628d09..35ddbec1375a 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -453,13 +453,10 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 
 	while ((entity->dependency =
 			sched->ops->dependency(sched_job, entity))) {
+		trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
 
-		if (drm_sched_entity_add_dependency_cb(entity)) {
-
-			trace_drm_sched_job_wait_dep(sched_job,
-						     entity->dependency);
+		if (drm_sched_entity_add_dependency_cb(entity))
 			return NULL;
-		}
 	}
 
 	/* skip jobs from entity that marked guilty */
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index c2c042287c19..1dbbc3a1b763 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -45,10 +45,11 @@ config DRM_SUN6I_DSI
 	default MACH_SUN8I
 	select CRC_CCITT
 	select DRM_MIPI_DSI
+	select PHY_SUN6I_MIPI_DPHY
 	help
 	  Choose this option if you want have an Allwinner SoC with
 	  MIPI-DSI support. If M is selected the module will be called
-	  sun6i-dsi
+	  sun6i_mipi_dsi.
 
 config DRM_SUN8I_DW_HDMI
 	tristate "Support for Allwinner version of DesignWare HDMI"
diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile
index 0eb38ac8e86e..0d04f2447b01 100644
--- a/drivers/gpu/drm/sun4i/Makefile
+++ b/drivers/gpu/drm/sun4i/Makefile
@@ -24,9 +24,6 @@ sun4i-tcon-y			+= sun4i_lvds.o
 sun4i-tcon-y			+= sun4i_tcon.o
 sun4i-tcon-y			+= sun4i_rgb.o
 
-sun6i-dsi-y			+= sun6i_mipi_dphy.o
-sun6i-dsi-y			+= sun6i_mipi_dsi.o
-
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i-drm.o
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i-tcon.o
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i_tv.o
@@ -37,7 +34,7 @@ ifdef CONFIG_DRM_SUN4I_BACKEND
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i-frontend.o
 endif
 obj-$(CONFIG_DRM_SUN4I_HDMI)	+= sun4i-drm-hdmi.o
-obj-$(CONFIG_DRM_SUN6I_DSI)	+= sun6i-dsi.o
+obj-$(CONFIG_DRM_SUN6I_DSI)	+= sun6i_mipi_dsi.o
 obj-$(CONFIG_DRM_SUN8I_DW_HDMI)	+= sun8i-drm-hdmi.o
 obj-$(CONFIG_DRM_SUN8I_MIXER)	+= sun8i-mixer.o
 obj-$(CONFIG_DRM_SUN8I_TCON_TOP) += sun8i_tcon_top.o
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index 990847cb40f6..4c0d51f73237 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -764,17 +764,18 @@ static struct sun4i_frontend *sun4i_backend_find_frontend(struct sun4i_drv *drv,
 		remote = of_graph_get_remote_port_parent(ep);
 		if (!remote)
 			continue;
+		of_node_put(remote);
 
 		/* does this node match any registered engines? */
 		list_for_each_entry(frontend, &drv->frontend_list, list) {
 			if (remote == frontend->node) {
-				of_node_put(remote);
 				of_node_put(port);
+				of_node_put(ep);
 				return frontend;
 			}
 		}
 	}
-
+	of_node_put(port);
 	return ERR_PTR(-EINVAL);
 }
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index d95c6e224bd9..d18862629301 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -93,6 +93,8 @@ static void sun4i_hdmi_disable(struct drm_encoder *encoder)
 	val = readl(hdmi->base + SUN4I_HDMI_VID_CTRL_REG);
 	val &= ~SUN4I_HDMI_VID_CTRL_ENABLE;
 	writel(val, hdmi->base + SUN4I_HDMI_VID_CTRL_REG);
+
+	clk_disable_unprepare(hdmi->tmds_clk);
 }
 
 static void sun4i_hdmi_enable(struct drm_encoder *encoder)
@@ -103,6 +105,8 @@ static void sun4i_hdmi_enable(struct drm_encoder *encoder)
 
 	DRM_DEBUG_DRIVER("Enabling the HDMI Output\n");
 
+	clk_prepare_enable(hdmi->tmds_clk);
+
 	sun4i_hdmi_setup_avi_infoframes(hdmi, mode);
 	val |= SUN4I_HDMI_PKT_CTRL_TYPE(0, SUN4I_HDMI_PKT_AVI);
 	val |= SUN4I_HDMI_PKT_CTRL_TYPE(1, SUN4I_HDMI_PKT_END);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 2bd2eda6480a..7136fc91c603 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -761,6 +761,7 @@ static int sun4i_tcon_init_clocks(struct device *dev,
 			return PTR_ERR(tcon->sclk0);
 		}
 	}
+	clk_prepare_enable(tcon->sclk0);
 
 	if (tcon->quirks->has_channel_1) {
 		tcon->sclk1 = devm_clk_get(dev, "tcon-ch1");
@@ -775,6 +776,7 @@ static int sun4i_tcon_init_clocks(struct device *dev,
 
 static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon)
 {
+	clk_disable_unprepare(tcon->sclk0);
 	clk_disable_unprepare(tcon->clk);
 }
 
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c
deleted file mode 100644
index e4d19431fa0e..000000000000
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c
+++ /dev/null
@@ -1,292 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (c) 2016 Allwinnertech Co., Ltd.
- * Copyright (C) 2017-2018 Bootlin
- *
- * Maxime Ripard <maxime.ripard@free-electrons.com>
- */
-
-#include <linux/bitops.h>
-#include <linux/clk.h>
-#include <linux/of_address.h>
-#include <linux/regmap.h>
-#include <linux/reset.h>
-
-#include "sun6i_mipi_dsi.h"
-
-#define SUN6I_DPHY_GCTL_REG		0x00
-#define SUN6I_DPHY_GCTL_LANE_NUM(n)		((((n) - 1) & 3) << 4)
-#define SUN6I_DPHY_GCTL_EN			BIT(0)
-
-#define SUN6I_DPHY_TX_CTL_REG		0x04
-#define SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT	BIT(28)
-
-#define SUN6I_DPHY_TX_TIME0_REG		0x10
-#define SUN6I_DPHY_TX_TIME0_HS_TRAIL(n)		(((n) & 0xff) << 24)
-#define SUN6I_DPHY_TX_TIME0_HS_PREPARE(n)	(((n) & 0xff) << 16)
-#define SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(n)	((n) & 0xff)
-
-#define SUN6I_DPHY_TX_TIME1_REG		0x14
-#define SUN6I_DPHY_TX_TIME1_CLK_POST(n)		(((n) & 0xff) << 24)
-#define SUN6I_DPHY_TX_TIME1_CLK_PRE(n)		(((n) & 0xff) << 16)
-#define SUN6I_DPHY_TX_TIME1_CLK_ZERO(n)		(((n) & 0xff) << 8)
-#define SUN6I_DPHY_TX_TIME1_CLK_PREPARE(n)	((n) & 0xff)
-
-#define SUN6I_DPHY_TX_TIME2_REG		0x18
-#define SUN6I_DPHY_TX_TIME2_CLK_TRAIL(n)	((n) & 0xff)
-
-#define SUN6I_DPHY_TX_TIME3_REG		0x1c
-
-#define SUN6I_DPHY_TX_TIME4_REG		0x20
-#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(n)	(((n) & 0xff) << 8)
-#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(n)	((n) & 0xff)
-
-#define SUN6I_DPHY_ANA0_REG		0x4c
-#define SUN6I_DPHY_ANA0_REG_PWS			BIT(31)
-#define SUN6I_DPHY_ANA0_REG_DMPC		BIT(28)
-#define SUN6I_DPHY_ANA0_REG_DMPD(n)		(((n) & 0xf) << 24)
-#define SUN6I_DPHY_ANA0_REG_SLV(n)		(((n) & 7) << 12)
-#define SUN6I_DPHY_ANA0_REG_DEN(n)		(((n) & 0xf) << 8)
-
-#define SUN6I_DPHY_ANA1_REG		0x50
-#define SUN6I_DPHY_ANA1_REG_VTTMODE		BIT(31)
-#define SUN6I_DPHY_ANA1_REG_CSMPS(n)		(((n) & 3) << 28)
-#define SUN6I_DPHY_ANA1_REG_SVTT(n)		(((n) & 0xf) << 24)
-
-#define SUN6I_DPHY_ANA2_REG		0x54
-#define SUN6I_DPHY_ANA2_EN_P2S_CPU(n)		(((n) & 0xf) << 24)
-#define SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK		GENMASK(27, 24)
-#define SUN6I_DPHY_ANA2_EN_CK_CPU		BIT(4)
-#define SUN6I_DPHY_ANA2_REG_ENIB		BIT(1)
-
-#define SUN6I_DPHY_ANA3_REG		0x58
-#define SUN6I_DPHY_ANA3_EN_VTTD(n)		(((n) & 0xf) << 28)
-#define SUN6I_DPHY_ANA3_EN_VTTD_MASK		GENMASK(31, 28)
-#define SUN6I_DPHY_ANA3_EN_VTTC			BIT(27)
-#define SUN6I_DPHY_ANA3_EN_DIV			BIT(26)
-#define SUN6I_DPHY_ANA3_EN_LDOC			BIT(25)
-#define SUN6I_DPHY_ANA3_EN_LDOD			BIT(24)
-#define SUN6I_DPHY_ANA3_EN_LDOR			BIT(18)
-
-#define SUN6I_DPHY_ANA4_REG		0x5c
-#define SUN6I_DPHY_ANA4_REG_DMPLVC		BIT(24)
-#define SUN6I_DPHY_ANA4_REG_DMPLVD(n)		(((n) & 0xf) << 20)
-#define SUN6I_DPHY_ANA4_REG_CKDV(n)		(((n) & 0x1f) << 12)
-#define SUN6I_DPHY_ANA4_REG_TMSC(n)		(((n) & 3) << 10)
-#define SUN6I_DPHY_ANA4_REG_TMSD(n)		(((n) & 3) << 8)
-#define SUN6I_DPHY_ANA4_REG_TXDNSC(n)		(((n) & 3) << 6)
-#define SUN6I_DPHY_ANA4_REG_TXDNSD(n)		(((n) & 3) << 4)
-#define SUN6I_DPHY_ANA4_REG_TXPUSC(n)		(((n) & 3) << 2)
-#define SUN6I_DPHY_ANA4_REG_TXPUSD(n)		((n) & 3)
-
-#define SUN6I_DPHY_DBG5_REG		0xf4
-
-int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes)
-{
-	reset_control_deassert(dphy->reset);
-	clk_prepare_enable(dphy->mod_clk);
-	clk_set_rate_exclusive(dphy->mod_clk, 150000000);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG,
-		     SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG,
-		     SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) |
-		     SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) |
-		     SUN6I_DPHY_TX_TIME0_HS_TRAIL(10));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG,
-		     SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) |
-		     SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) |
-		     SUN6I_DPHY_TX_TIME1_CLK_PRE(3) |
-		     SUN6I_DPHY_TX_TIME1_CLK_POST(10));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG,
-		     SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG,
-		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) |
-		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG,
-		     SUN6I_DPHY_GCTL_LANE_NUM(lanes) |
-		     SUN6I_DPHY_GCTL_EN);
-
-	return 0;
-}
-
-int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes)
-{
-	u8 lanes_mask = GENMASK(lanes - 1, 0);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG,
-		     SUN6I_DPHY_ANA0_REG_PWS |
-		     SUN6I_DPHY_ANA0_REG_DMPC |
-		     SUN6I_DPHY_ANA0_REG_SLV(7) |
-		     SUN6I_DPHY_ANA0_REG_DMPD(lanes_mask) |
-		     SUN6I_DPHY_ANA0_REG_DEN(lanes_mask));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_ANA1_REG,
-		     SUN6I_DPHY_ANA1_REG_CSMPS(1) |
-		     SUN6I_DPHY_ANA1_REG_SVTT(7));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG,
-		     SUN6I_DPHY_ANA4_REG_CKDV(1) |
-		     SUN6I_DPHY_ANA4_REG_TMSC(1) |
-		     SUN6I_DPHY_ANA4_REG_TMSD(1) |
-		     SUN6I_DPHY_ANA4_REG_TXDNSC(1) |
-		     SUN6I_DPHY_ANA4_REG_TXDNSD(1) |
-		     SUN6I_DPHY_ANA4_REG_TXPUSC(1) |
-		     SUN6I_DPHY_ANA4_REG_TXPUSD(1) |
-		     SUN6I_DPHY_ANA4_REG_DMPLVC |
-		     SUN6I_DPHY_ANA4_REG_DMPLVD(lanes_mask));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_ANA2_REG,
-		     SUN6I_DPHY_ANA2_REG_ENIB);
-	udelay(5);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG,
-		     SUN6I_DPHY_ANA3_EN_LDOR |
-		     SUN6I_DPHY_ANA3_EN_LDOC |
-		     SUN6I_DPHY_ANA3_EN_LDOD);
-	udelay(1);
-
-	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG,
-			   SUN6I_DPHY_ANA3_EN_VTTC |
-			   SUN6I_DPHY_ANA3_EN_VTTD_MASK,
-			   SUN6I_DPHY_ANA3_EN_VTTC |
-			   SUN6I_DPHY_ANA3_EN_VTTD(lanes_mask));
-	udelay(1);
-
-	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG,
-			   SUN6I_DPHY_ANA3_EN_DIV,
-			   SUN6I_DPHY_ANA3_EN_DIV);
-	udelay(1);
-
-	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG,
-			   SUN6I_DPHY_ANA2_EN_CK_CPU,
-			   SUN6I_DPHY_ANA2_EN_CK_CPU);
-	udelay(1);
-
-	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG,
-			   SUN6I_DPHY_ANA1_REG_VTTMODE,
-			   SUN6I_DPHY_ANA1_REG_VTTMODE);
-
-	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG,
-			   SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK,
-			   SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask));
-
-	return 0;
-}
-
-int sun6i_dphy_power_off(struct sun6i_dphy *dphy)
-{
-	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG,
-			   SUN6I_DPHY_ANA1_REG_VTTMODE, 0);
-
-	return 0;
-}
-
-int sun6i_dphy_exit(struct sun6i_dphy *dphy)
-{
-	clk_rate_exclusive_put(dphy->mod_clk);
-	clk_disable_unprepare(dphy->mod_clk);
-	reset_control_assert(dphy->reset);
-
-	return 0;
-}
-
-static struct regmap_config sun6i_dphy_regmap_config = {
-	.reg_bits	= 32,
-	.val_bits	= 32,
-	.reg_stride	= 4,
-	.max_register	= SUN6I_DPHY_DBG5_REG,
-	.name		= "mipi-dphy",
-};
-
-static const struct of_device_id sun6i_dphy_of_table[] = {
-	{ .compatible = "allwinner,sun6i-a31-mipi-dphy" },
-	{ }
-};
-
-int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node)
-{
-	struct sun6i_dphy *dphy;
-	struct resource res;
-	void __iomem *regs;
-	int ret;
-
-	if (!of_match_node(sun6i_dphy_of_table, node)) {
-		dev_err(dsi->dev, "Incompatible D-PHY\n");
-		return -EINVAL;
-	}
-
-	dphy = devm_kzalloc(dsi->dev, sizeof(*dphy), GFP_KERNEL);
-	if (!dphy)
-		return -ENOMEM;
-
-	ret = of_address_to_resource(node, 0, &res);
-	if (ret) {
-		dev_err(dsi->dev, "phy: Couldn't get our resources\n");
-		return ret;
-	}
-
-	regs = devm_ioremap_resource(dsi->dev, &res);
-	if (IS_ERR(regs)) {
-		dev_err(dsi->dev, "Couldn't map the DPHY encoder registers\n");
-		return PTR_ERR(regs);
-	}
-
-	dphy->regs = devm_regmap_init_mmio(dsi->dev, regs,
-					   &sun6i_dphy_regmap_config);
-	if (IS_ERR(dphy->regs)) {
-		dev_err(dsi->dev, "Couldn't create the DPHY encoder regmap\n");
-		return PTR_ERR(dphy->regs);
-	}
-
-	dphy->reset = of_reset_control_get_shared(node, NULL);
-	if (IS_ERR(dphy->reset)) {
-		dev_err(dsi->dev, "Couldn't get our reset line\n");
-		return PTR_ERR(dphy->reset);
-	}
-
-	dphy->bus_clk = of_clk_get_by_name(node, "bus");
-	if (IS_ERR(dphy->bus_clk)) {
-		dev_err(dsi->dev, "Couldn't get the DPHY bus clock\n");
-		ret = PTR_ERR(dphy->bus_clk);
-		goto err_free_reset;
-	}
-	regmap_mmio_attach_clk(dphy->regs, dphy->bus_clk);
-
-	dphy->mod_clk = of_clk_get_by_name(node, "mod");
-	if (IS_ERR(dphy->mod_clk)) {
-		dev_err(dsi->dev, "Couldn't get the DPHY mod clock\n");
-		ret = PTR_ERR(dphy->mod_clk);
-		goto err_free_bus;
-	}
-
-	dsi->dphy = dphy;
-
-	return 0;
-
-err_free_bus:
-	regmap_mmio_detach_clk(dphy->regs);
-	clk_put(dphy->bus_clk);
-err_free_reset:
-	reset_control_put(dphy->reset);
-	return ret;
-}
-
-int sun6i_dphy_remove(struct sun6i_dsi *dsi)
-{
-	struct sun6i_dphy *dphy = dsi->dphy;
-
-	regmap_mmio_detach_clk(dphy->regs);
-	clk_put(dphy->mod_clk);
-	clk_put(dphy->bus_clk);
-	reset_control_put(dphy->reset);
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
index 1ebe56817fa9..318994cd1b85 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 
 #include <linux/phy/phy.h>
+#include <linux/phy/phy-mipi-dphy.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
@@ -616,6 +617,8 @@ static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder)
 	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder);
 	struct mipi_dsi_device *device = dsi->device;
+	union phy_configure_opts opts = { 0 };
+	struct phy_configure_opts_mipi_dphy *cfg = &opts.mipi_dphy;
 	u16 delay;
 
 	DRM_DEBUG_DRIVER("Enabling DSI output\n");
@@ -634,8 +637,15 @@ static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder)
 	sun6i_dsi_setup_format(dsi, mode);
 	sun6i_dsi_setup_timings(dsi, mode);
 
-	sun6i_dphy_init(dsi->dphy, device->lanes);
-	sun6i_dphy_power_on(dsi->dphy, device->lanes);
+	phy_init(dsi->dphy);
+
+	phy_mipi_dphy_get_default_config(mode->clock * 1000,
+					 mipi_dsi_pixel_format_to_bpp(device->format),
+					 device->lanes, cfg);
+
+	phy_set_mode(dsi->dphy, PHY_MODE_MIPI_DPHY);
+	phy_configure(dsi->dphy, &opts);
+	phy_power_on(dsi->dphy);
 
 	if (!IS_ERR(dsi->panel))
 		drm_panel_prepare(dsi->panel);
@@ -673,8 +683,8 @@ static void sun6i_dsi_encoder_disable(struct drm_encoder *encoder)
 		drm_panel_unprepare(dsi->panel);
 	}
 
-	sun6i_dphy_power_off(dsi->dphy);
-	sun6i_dphy_exit(dsi->dphy);
+	phy_power_off(dsi->dphy);
+	phy_exit(dsi->dphy);
 
 	pm_runtime_put(dsi->dev);
 }
@@ -967,7 +977,6 @@ static const struct component_ops sun6i_dsi_ops = {
 static int sun6i_dsi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *dphy_node;
 	struct sun6i_dsi *dsi;
 	struct resource *res;
 	void __iomem *base;
@@ -1013,11 +1022,10 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 	 */
 	clk_set_rate_exclusive(dsi->mod_clk, 297000000);
 
-	dphy_node = of_parse_phandle(dev->of_node, "phys", 0);
-	ret = sun6i_dphy_probe(dsi, dphy_node);
-	of_node_put(dphy_node);
-	if (ret) {
+	dsi->dphy = devm_phy_get(dev, "dphy");
+	if (IS_ERR(dsi->dphy)) {
 		dev_err(dev, "Couldn't get the MIPI D-PHY\n");
+		ret = PTR_ERR(dsi->dphy);
 		goto err_unprotect_clk;
 	}
 
@@ -1026,7 +1034,7 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 	ret = mipi_dsi_host_register(&dsi->host);
 	if (ret) {
 		dev_err(dev, "Couldn't register MIPI-DSI host\n");
-		goto err_remove_phy;
+		goto err_pm_disable;
 	}
 
 	ret = component_add(&pdev->dev, &sun6i_dsi_ops);
@@ -1039,9 +1047,8 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 
 err_remove_dsi_host:
 	mipi_dsi_host_unregister(&dsi->host);
-err_remove_phy:
+err_pm_disable:
 	pm_runtime_disable(dev);
-	sun6i_dphy_remove(dsi);
 err_unprotect_clk:
 	clk_rate_exclusive_put(dsi->mod_clk);
 	return ret;
@@ -1055,7 +1062,6 @@ static int sun6i_dsi_remove(struct platform_device *pdev)
 	component_del(&pdev->dev, &sun6i_dsi_ops);
 	mipi_dsi_host_unregister(&dsi->host);
 	pm_runtime_disable(dev);
-	sun6i_dphy_remove(dsi);
 	clk_rate_exclusive_put(dsi->mod_clk);
 
 	return 0;
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
index dbbc5b3ecbda..a07090579f84 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
@@ -13,13 +13,6 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_mipi_dsi.h>
 
-struct sun6i_dphy {
-	struct clk		*bus_clk;
-	struct clk		*mod_clk;
-	struct regmap		*regs;
-	struct reset_control	*reset;
-};
-
 struct sun6i_dsi {
 	struct drm_connector	connector;
 	struct drm_encoder	encoder;
@@ -29,7 +22,7 @@ struct sun6i_dsi {
 	struct clk		*mod_clk;
 	struct regmap		*regs;
 	struct reset_control	*reset;
-	struct sun6i_dphy	*dphy;
+	struct phy		*dphy;
 
 	struct device		*dev;
 	struct sun4i_drv	*drv;
@@ -52,12 +45,4 @@ static inline struct sun6i_dsi *encoder_to_sun6i_dsi(const struct drm_encoder *e
 	return container_of(encoder, struct sun6i_dsi, encoder);
 };
 
-int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node);
-int sun6i_dphy_remove(struct sun6i_dsi *dsi);
-
-int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes);
-int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes);
-int sun6i_dphy_power_off(struct sun6i_dphy *dphy);
-int sun6i_dphy_exit(struct sun6i_dphy *dphy);
-
 #endif /* _SUN6I_MIPI_DSI_H_ */
diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c
index 437e7a27f21d..495150415020 100644
--- a/drivers/gpu/drm/vc4/vc4_perfmon.c
+++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
@@ -117,7 +117,7 @@ int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 	}
 
-	perfmon = kzalloc(sizeof(*perfmon) + (req->ncounters * sizeof(u64)),
+	perfmon = kzalloc(struct_size(perfmon, counters, req->ncounters),
 			  GFP_KERNEL);
 	if (!perfmon)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index af92964b6889..b996ac1d4fcc 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -205,14 +205,10 @@ static struct drm_driver driver = {
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = virtio_gpu_debugfs_init,
 #endif
-	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = virtgpu_gem_prime_pin,
 	.gem_prime_unpin = virtgpu_gem_prime_unpin,
-	.gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table,
-	.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = virtgpu_gem_prime_vmap,
 	.gem_prime_vunmap = virtgpu_gem_prime_vunmap,
 	.gem_prime_mmap = virtgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index d577cb76f5ad..3238fdf58eb4 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -354,10 +354,6 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
 /* virtgpu_prime.c */
 int virtgpu_gem_prime_pin(struct drm_gem_object *obj);
 void virtgpu_gem_prime_unpin(struct drm_gem_object *obj);
-struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
-	struct drm_device *dev, struct dma_buf_attachment *attach,
-	struct sg_table *sgt);
 void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int virtgpu_gem_prime_mmap(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 86ce0ae93f59..c59ec34c80a5 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -39,20 +39,6 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj)
 	WARN_ONCE(1, "not implemented");
 }
 
-struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENODEV);
-}
-
-struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
-	struct drm_device *dev, struct dma_buf_attachment *attach,
-	struct sg_table *table)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENODEV);
-}
-
 void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
diff --git a/drivers/gpu/drm/vkms/vkms_crc.c b/drivers/gpu/drm/vkms/vkms_crc.c
index 9d9e8146db90..d7b409a3c0f8 100644
--- a/drivers/gpu/drm/vkms/vkms_crc.c
+++ b/drivers/gpu/drm/vkms/vkms_crc.c
@@ -1,4 +1,5 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0+
+
 #include "vkms_drv.h"
 #include <linux/crc32.h>
 #include <drm/drm_atomic.h>
diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c
index d44bfc392491..8a9aeb0a9ea8 100644
--- a/drivers/gpu/drm/vkms/vkms_crtc.c
+++ b/drivers/gpu/drm/vkms/vkms_crtc.c
@@ -1,22 +1,20 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include "vkms_drv.h"
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_probe_helper.h>
 
-static void _vblank_handle(struct vkms_output *output)
+static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
 {
+	struct vkms_output *output = container_of(timer, struct vkms_output,
+						  vblank_hrtimer);
 	struct drm_crtc *crtc = &output->crtc;
 	struct vkms_crtc_state *state = to_vkms_crtc_state(crtc->state);
+	u64 ret_overrun;
 	bool ret;
 
 	spin_lock(&output->lock);
+
 	ret = drm_crtc_handle_vblank(crtc);
 	if (!ret)
 		DRM_ERROR("vkms failure on handling vblank");
@@ -37,19 +35,11 @@ static void _vblank_handle(struct vkms_output *output)
 			DRM_WARN("failed to queue vkms_crc_work_handle");
 	}
 
-	spin_unlock(&output->lock);
-}
-
-static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer)
-{
-	struct vkms_output *output = container_of(timer, struct vkms_output,
-						  vblank_hrtimer);
-	int ret_overrun;
-
-	_vblank_handle(output);
-
 	ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer,
 					  output->period_ns);
+	WARN_ON(ret_overrun != 1);
+
+	spin_unlock(&output->lock);
 
 	return HRTIMER_RESTART;
 }
@@ -87,6 +77,9 @@ bool vkms_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
 
 	*vblank_time = output->vblank_hrtimer.node.expires;
 
+	if (!in_vblank_irq)
+		*vblank_time -= output->period_ns;
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
index b13f99a5c849..738dd6206d85 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -1,9 +1,4 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 /**
  * DOC: vkms (Virtual Kernel Modesetting)
diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
index e4469cd3d254..81f1cfbeb936 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.h
+++ b/drivers/gpu/drm/vkms/vkms_drv.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
 #ifndef _VKMS_DRV_H_
 #define _VKMS_DRV_H_
 
diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c
index 80311daed47a..138b0bb325cf 100644
--- a/drivers/gpu/drm/vkms/vkms_gem.c
+++ b/drivers/gpu/drm/vkms/vkms_gem.c
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include <linux/shmem_fs.h>
 
diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c
index c5b16efed51a..3b162b25312e 100644
--- a/drivers/gpu/drm/vkms/vkms_output.c
+++ b/drivers/gpu/drm/vkms/vkms_output.c
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include "vkms_drv.h"
 #include <drm/drm_atomic_helper.h>
diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c
index 418817600ad1..0e67d2d42f0c 100644
--- a/drivers/gpu/drm/vkms/vkms_plane.c
+++ b/drivers/gpu/drm/vkms/vkms_plane.c
@@ -1,10 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
 
 #include "vkms_drv.h"
 #include <drm/drm_plane_helper.h>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 4638f6791cda..6165fe2c4504 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -26,6 +26,7 @@
  **************************************************************************/
 #include <linux/module.h>
 #include <linux/console.h>
+#include <linux/dma-mapping.h>
 
 #include <drm/drmP.h>
 #include "vmwgfx_drv.h"
@@ -34,7 +35,6 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_module.h>
-#include <linux/intel-iommu.h>
 
 #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
 #define VMWGFX_CHIP_SVGAII 0
@@ -546,6 +546,21 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv)
 }
 
 /**
+ * vmw_assume_iommu - Figure out whether coherent dma-remapping might be
+ * taking place.
+ * @dev: Pointer to the struct drm_device.
+ *
+ * Return: true if iommu present, false otherwise.
+ */
+static bool vmw_assume_iommu(struct drm_device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev->dev);
+
+	return !dma_is_direct(ops) && ops &&
+		ops->map_page != dma_direct_map_page;
+}
+
+/**
  * vmw_dma_select_mode - Determine how DMA mappings should be set up for this
  * system.
  *
@@ -565,55 +580,27 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
 		[vmw_dma_alloc_coherent] = "Using coherent TTM pages.",
 		[vmw_dma_map_populate] = "Keeping DMA mappings.",
 		[vmw_dma_map_bind] = "Giving up DMA mappings early."};
-#ifdef CONFIG_X86
-	const struct dma_map_ops *dma_ops = get_dma_ops(dev_priv->dev->dev);
 
-#ifdef CONFIG_INTEL_IOMMU
-	if (intel_iommu_enabled) {
+	if (vmw_force_coherent)
+		dev_priv->map_mode = vmw_dma_alloc_coherent;
+	else if (vmw_assume_iommu(dev_priv->dev))
 		dev_priv->map_mode = vmw_dma_map_populate;
-		goto out_fixup;
-	}
-#endif
-
-	if (!(vmw_force_iommu || vmw_force_coherent)) {
+	else if (!vmw_force_iommu)
 		dev_priv->map_mode = vmw_dma_phys;
-		DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]);
-		return 0;
-	}
-
-	dev_priv->map_mode = vmw_dma_map_populate;
-
-	if (dma_ops && dma_ops->sync_single_for_cpu)
+	else if (IS_ENABLED(CONFIG_SWIOTLB) && swiotlb_nr_tbl())
 		dev_priv->map_mode = vmw_dma_alloc_coherent;
-#ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl() == 0)
+	else
 		dev_priv->map_mode = vmw_dma_map_populate;
-#endif
 
-#ifdef CONFIG_INTEL_IOMMU
-out_fixup:
-#endif
-	if (dev_priv->map_mode == vmw_dma_map_populate &&
-	    vmw_restrict_iommu)
+	if (dev_priv->map_mode == vmw_dma_map_populate && vmw_restrict_iommu)
 		dev_priv->map_mode = vmw_dma_map_bind;
 
-	if (vmw_force_coherent)
-		dev_priv->map_mode = vmw_dma_alloc_coherent;
-
-#if !defined(CONFIG_SWIOTLB) && !defined(CONFIG_INTEL_IOMMU)
-	/*
-	 * No coherent page pool
-	 */
-	if (dev_priv->map_mode == vmw_dma_alloc_coherent)
+	/* No TTM coherent page pool? FIXME: Ask TTM instead! */
+        if (!(IS_ENABLED(CONFIG_SWIOTLB) || IS_ENABLED(CONFIG_INTEL_IOMMU)) &&
+	    (dev_priv->map_mode == vmw_dma_alloc_coherent))
 		return -EINVAL;
-#endif
-
-#else /* CONFIG_X86 */
-	dev_priv->map_mode = vmw_dma_map_populate;
-#endif /* CONFIG_X86 */
 
 	DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]);
-
 	return 0;
 }
 
@@ -625,24 +612,20 @@ out_fixup:
  * With 32-bit we can only handle 32 bit PFNs. Optionally set that
  * restriction also for 64-bit systems.
  */
-#ifdef CONFIG_INTEL_IOMMU
 static int vmw_dma_masks(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
+	int ret = 0;
 
-	if (intel_iommu_enabled &&
+	ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64));
+	if (dev_priv->map_mode != vmw_dma_phys &&
 	    (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) {
 		DRM_INFO("Restricting DMA addresses to 44 bits.\n");
-		return dma_set_mask(dev->dev, DMA_BIT_MASK(44));
+		return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44));
 	}
-	return 0;
-}
-#else
-static int vmw_dma_masks(struct vmw_private *dev_priv)
-{
-	return 0;
+
+	return ret;
 }
-#endif
 
 static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index f2d13a72c05d..88b8178d4687 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3570,7 +3570,7 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 		*p_fence = NULL;
 	}
 
-	return 0;
+	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b351fb5214d3..ed2f67822f45 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1646,7 +1646,7 @@ static int vmw_kms_check_topology(struct drm_device *dev,
 		struct drm_connector_state *conn_state;
 		struct vmw_connector_state *vmw_conn_state;
 
-		if (!du->pref_active) {
+		if (!du->pref_active && new_crtc_state->enable) {
 			ret = -EINVAL;
 			goto clean;
 		}
@@ -2554,8 +2554,8 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv,
 				      user_fence_rep)
 {
 	struct vmw_fence_obj *fence = NULL;
-	uint32_t handle;
-	int ret;
+	uint32_t handle = 0;
+	int ret = 0;
 
 	if (file_priv || user_fence_rep || vmw_validation_has_bos(ctx) ||
 	    out_fence)
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c
index d303a2e17f5e..53c376d55fcf 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_gem.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
@@ -235,8 +235,14 @@ static int gem_mmap_obj(struct xen_gem_object *xen_obj,
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_flags |= VM_MIXEDMAP;
 	vma->vm_pgoff = 0;
-	vma->vm_page_prot =
-			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	/*
+	 * According to Xen on ARM ABI (xen/include/public/arch-arm.h):
+	 * all memory which is shared with other entities in the system
+	 * (including the hypervisor and other guests) must reside in memory
+	 * which is mapped as Normal Inner Write-Back Outer Write-Back
+	 * Inner-Shareable.
+	 */
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
 	/*
 	 * vm_operations_struct.fault handler will be called if CPU access
@@ -282,8 +288,9 @@ void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj)
 	if (!xen_obj->pages)
 		return NULL;
 
+	/* Please see comment in gem_mmap_obj on mapping and attributes. */
 	return vmap(xen_obj->pages, xen_obj->num_pages,
-		    VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+		    VM_MAP, PAGE_KERNEL);
 }
 
 void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj,
diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c
index 860da055c6bb..c2955d375394 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_kms.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c
@@ -54,7 +54,7 @@ fb_create(struct drm_device *dev, struct drm_file *filp,
 	  const struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	struct xen_drm_front_drm_info *drm_info = dev->dev_private;
-	static struct drm_framebuffer *fb;
+	struct drm_framebuffer *fb;
 	struct drm_gem_object *gem_obj;
 	int ret;