summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2013-11-10 18:35:33 +1000
committerDave Airlie <airlied@redhat.com>2013-11-10 18:35:33 +1000
commitab0169bb5cc4a5c86756dde662087f9d12302eb0 (patch)
tree495e668337410f6763480ea1f010213f6399e38c /drivers/gpu
parent8d0a2215931f1ffd77aef65cae2c0becc3f5d560 (diff)
parent13b3a0a77625c09c84825ef6ba81d957ec207841 (diff)
Merge tag 'bdw-stage1-2013-11-08-v2' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
So here's the Broadwell pull request. From a kernel driver pov there's two areas with big changes in Broadwell: - Completely new enumerated interrupt bits. On the plus side it now looks fairly unform and sane. - Completely new pagetable layout. To ensure minimal impact on existing platforms we've refactored both the irq and low-level gtt handling code a lot in anticipation of the bdw push. So now bdw enabling in these areas just plugs in a bunch of vfuncs. Otherwise it's all fairly harmless adjusting of switch cases and if-ladders to shovel bdw into the right blocks. So minimized impact on existing platforms. I've also merged the bdw-stage1 branch into our -nightly integration branch for the past week to make sure we don't break anything. Note that there's still quite a flurry or patches floating around, but I've figured I'll push this out. I plan to keep the bdw fixes separate from my usual -fixes stream so that you can reject them easily in case it still looks like too much churn. Also, bdw is for now hidden behind the preliminary hw enabling module option. So there's no real pressure to get follow-up patches all into 3.13. * tag 'bdw-stage1-2013-11-08-v2' of git://people.freedesktop.org/~danvet/drm-intel: (75 commits) drm/i915: Mask the vblank interrupt on bdw by default drm/i915: Wire up cpu fifo underrun reporting support for bdw drm/i915: Optimize gen8_enable|disable_vblank functions drm/i915: Wire up pipe CRC support for bdw drm/i915: Wire up PCH interrupts for bdw drm/i915: Wire up port A aux channel drm/i915: Fix up the bdw pipe interrupt enable lists drm/i915: Optimize pipe irq handling on bdw drm/i915/bdw: Take render error interrupt out of the mask drm/i915/bdw: Add BDW PCH check first drm/i915: Use hsw_crt_get_config on BDW drm/i915/bdw: Change dp aux timeout to 600us on DDIA drm/i915/bdw: Enable trickle feed on Broadwell drm/i915/bdw: WaSingleSubspanDispatchOnAALinesAndPoints drm/i915/bdw: conservative SBE VUE cache mode drm/i915/bdw: Limit SDE poly depth FIFO to 2 drm/i915/bdw: Sampler power bypass disable ddrm/i915/bdw: Disable centroid pixel perf optimization drm/i915/bdw: BWGTLB clock gate disable drm/i915/bdw: Implement edp PSR workarounds ...
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c109
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c34
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h36
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c3
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c3
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c35
-rw-r--r--drivers/gpu/drm/i915/i915_gem_gtt.c504
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c375
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h123
-rw-r--r--drivers/gpu/drm/i915/intel_crt.c11
-rw-r--r--drivers/gpu/drm/i915/intel_ddi.c124
-rw-r--r--drivers/gpu/drm/i915/intel_display.c82
-rw-r--r--drivers/gpu/drm/i915/intel_dp.c66
-rw-r--r--drivers/gpu/drm/i915/intel_hdmi.c2
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c164
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c194
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c7
-rw-r--r--drivers/gpu/drm/i915/intel_uncore.c58
19 files changed, 1806 insertions, 126 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 43866221cd4c..6ed45a984230 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -586,7 +586,53 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
if (ret)
return ret;
- if (IS_VALLEYVIEW(dev)) {
+ if (INTEL_INFO(dev)->gen >= 8) {
+ int i;
+ seq_printf(m, "Master Interrupt Control:\t%08x\n",
+ I915_READ(GEN8_MASTER_IRQ));
+
+ for (i = 0; i < 4; i++) {
+ seq_printf(m, "GT Interrupt IMR %d:\t%08x\n",
+ i, I915_READ(GEN8_GT_IMR(i)));
+ seq_printf(m, "GT Interrupt IIR %d:\t%08x\n",
+ i, I915_READ(GEN8_GT_IIR(i)));
+ seq_printf(m, "GT Interrupt IER %d:\t%08x\n",
+ i, I915_READ(GEN8_GT_IER(i)));
+ }
+
+ for_each_pipe(i) {
+ seq_printf(m, "Pipe %c IMR:\t%08x\n",
+ pipe_name(i),
+ I915_READ(GEN8_DE_PIPE_IMR(i)));
+ seq_printf(m, "Pipe %c IIR:\t%08x\n",
+ pipe_name(i),
+ I915_READ(GEN8_DE_PIPE_IIR(i)));
+ seq_printf(m, "Pipe %c IER:\t%08x\n",
+ pipe_name(i),
+ I915_READ(GEN8_DE_PIPE_IER(i)));
+ }
+
+ seq_printf(m, "Display Engine port interrupt mask:\t%08x\n",
+ I915_READ(GEN8_DE_PORT_IMR));
+ seq_printf(m, "Display Engine port interrupt identity:\t%08x\n",
+ I915_READ(GEN8_DE_PORT_IIR));
+ seq_printf(m, "Display Engine port interrupt enable:\t%08x\n",
+ I915_READ(GEN8_DE_PORT_IER));
+
+ seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n",
+ I915_READ(GEN8_DE_MISC_IMR));
+ seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n",
+ I915_READ(GEN8_DE_MISC_IIR));
+ seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n",
+ I915_READ(GEN8_DE_MISC_IER));
+
+ seq_printf(m, "PCU interrupt mask:\t%08x\n",
+ I915_READ(GEN8_PCU_IMR));
+ seq_printf(m, "PCU interrupt identity:\t%08x\n",
+ I915_READ(GEN8_PCU_IIR));
+ seq_printf(m, "PCU interrupt enable:\t%08x\n",
+ I915_READ(GEN8_PCU_IER));
+ } else if (IS_VALLEYVIEW(dev)) {
seq_printf(m, "Display IER:\t%08x\n",
I915_READ(VLV_IER));
seq_printf(m, "Display IIR:\t%08x\n",
@@ -658,7 +704,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
seq_printf(m, "Interrupts received: %d\n",
atomic_read(&dev_priv->irq_received));
for_each_ring(ring, dev_priv, i) {
- if (IS_GEN6(dev) || IS_GEN7(dev)) {
+ if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m,
"Graphics Interrupt mask (%s): %08x\n",
ring->name, I915_READ_IMR(ring));
@@ -1577,7 +1623,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
I915_READ16(C0DRB3));
seq_printf(m, "C1DRB3 = 0x%04x\n",
I915_READ16(C1DRB3));
- } else if (IS_GEN6(dev) || IS_GEN7(dev)) {
+ } else if (INTEL_INFO(dev)->gen >= 6) {
seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n",
I915_READ(MAD_DIMM_C0));
seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n",
@@ -1586,8 +1632,12 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
I915_READ(MAD_DIMM_C2));
seq_printf(m, "TILECTL = 0x%08x\n",
I915_READ(TILECTL));
- seq_printf(m, "ARB_MODE = 0x%08x\n",
- I915_READ(ARB_MODE));
+ if (IS_GEN8(dev))
+ seq_printf(m, "GAMTARBMODE = 0x%08x\n",
+ I915_READ(GAMTARBMODE));
+ else
+ seq_printf(m, "ARB_MODE = 0x%08x\n",
+ I915_READ(ARB_MODE));
seq_printf(m, "DISP_ARB_CTL = 0x%08x\n",
I915_READ(DISP_ARB_CTL));
}
@@ -1596,18 +1646,37 @@ static int i915_swizzle_info(struct seq_file *m, void *data)
return 0;
}
-static int i915_ppgtt_info(struct seq_file *m, void *data)
+static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev)
{
- struct drm_info_node *node = (struct drm_info_node *) m->private;
- struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ring_buffer *ring;
- int i, ret;
+ struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+ int unused, i;
+ if (!ppgtt)
+ return;
+
+ seq_printf(m, "Page directories: %d\n", ppgtt->num_pd_pages);
+ seq_printf(m, "Page tables: %d\n", ppgtt->num_pt_pages);
+ for_each_ring(ring, dev_priv, unused) {
+ seq_printf(m, "%s\n", ring->name);
+ for (i = 0; i < 4; i++) {
+ u32 offset = 0x270 + i * 8;
+ u64 pdp = I915_READ(ring->mmio_base + offset + 4);
+ pdp <<= 32;
+ pdp |= I915_READ(ring->mmio_base + offset);
+ for (i = 0; i < 4; i++)
+ seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp);
+ }
+ }
+}
+
+static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_ring_buffer *ring;
+ int i;
- ret = mutex_lock_interruptible(&dev->struct_mutex);
- if (ret)
- return ret;
if (INTEL_INFO(dev)->gen == 6)
seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE));
@@ -1626,6 +1695,22 @@ static int i915_ppgtt_info(struct seq_file *m, void *data)
seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
}
seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
+}
+
+static int i915_ppgtt_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+
+ int ret = mutex_lock_interruptible(&dev->struct_mutex);
+ if (ret)
+ return ret;
+
+ if (INTEL_INFO(dev)->gen >= 8)
+ gen8_ppgtt_info(m, dev);
+ else if (INTEL_INFO(dev)->gen >= 6)
+ gen6_ppgtt_info(m, dev);
+
mutex_unlock(&dev->struct_mutex);
return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a0804fa1e306..989be12cdd6e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -336,6 +336,24 @@ static const struct intel_device_info intel_haswell_m_info = {
.ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
};
+static const struct intel_device_info intel_broadwell_d_info = {
+ .is_preliminary = 1,
+ .gen = 8, .num_pipes = 3,
+ .need_gfx_hws = 1, .has_hotplug = 1,
+ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+ .has_llc = 1,
+ .has_ddi = 1,
+};
+
+static const struct intel_device_info intel_broadwell_m_info = {
+ .is_preliminary = 1,
+ .gen = 8, .is_mobile = 1, .num_pipes = 3,
+ .need_gfx_hws = 1, .has_hotplug = 1,
+ .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING,
+ .has_llc = 1,
+ .has_ddi = 1,
+};
+
/*
* Make sure any device matches here are from most specific to most
* general. For example, since the Quanta match is based on the subsystem
@@ -367,7 +385,9 @@ static const struct intel_device_info intel_haswell_m_info = {
INTEL_HSW_D_IDS(&intel_haswell_d_info), \
INTEL_HSW_M_IDS(&intel_haswell_m_info), \
INTEL_VLV_M_IDS(&intel_valleyview_m_info), \
- INTEL_VLV_D_IDS(&intel_valleyview_d_info)
+ INTEL_VLV_D_IDS(&intel_valleyview_d_info), \
+ INTEL_BDW_M_IDS(&intel_broadwell_m_info), \
+ INTEL_BDW_D_IDS(&intel_broadwell_d_info)
static const struct pci_device_id pciidlist[] = { /* aka */
INTEL_PCI_IDS,
@@ -428,6 +448,12 @@ void intel_detect_pch(struct drm_device *dev)
DRM_DEBUG_KMS("Found LynxPoint PCH\n");
WARN_ON(!IS_HASWELL(dev));
WARN_ON(IS_ULT(dev));
+ } else if (IS_BROADWELL(dev)) {
+ dev_priv->pch_type = PCH_LPT;
+ dev_priv->pch_id =
+ INTEL_PCH_LPT_LP_DEVICE_ID_TYPE;
+ DRM_DEBUG_KMS("This is Broadwell, assuming "
+ "LynxPoint LP PCH\n");
} else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
dev_priv->pch_type = PCH_LPT;
DRM_DEBUG_KMS("Found LynxPoint LP PCH\n");
@@ -452,6 +478,12 @@ bool i915_semaphore_is_enabled(struct drm_device *dev)
if (INTEL_INFO(dev)->gen < 6)
return 0;
+ /* Until we get further testing... */
+ if (IS_GEN8(dev)) {
+ WARN_ON(!i915_preliminary_hw_support);
+ return 0;
+ }
+
if (i915_semaphores >= 0)
return i915_semaphores;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b0dd4ea8133f..8600c315b4c4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -118,6 +118,10 @@ enum intel_display_power_domain {
#define HSW_ALWAYS_ON_POWER_DOMAINS ( \
BIT(POWER_DOMAIN_PIPE_A) | \
BIT(POWER_DOMAIN_TRANSCODER_EDP))
+#define BDW_ALWAYS_ON_POWER_DOMAINS ( \
+ BIT(POWER_DOMAIN_PIPE_A) | \
+ BIT(POWER_DOMAIN_TRANSCODER_EDP) | \
+ BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER))
enum hpd_pin {
HPD_NONE = 0,
@@ -575,10 +579,21 @@ struct i915_gtt {
struct i915_hw_ppgtt {
struct i915_address_space base;
unsigned num_pd_entries;
- struct page **pt_pages;
- uint32_t pd_offset;
- dma_addr_t *pt_dma_addr;
-
+ union {
+ struct page **pt_pages;
+ struct page *gen8_pt_pages;
+ };
+ struct page *pd_pages;
+ int num_pd_pages;
+ int num_pt_pages;
+ union {
+ uint32_t pd_offset;
+ dma_addr_t pd_dma_addr[4];
+ };
+ union {
+ dma_addr_t *pt_dma_addr;
+ dma_addr_t *gen8_pt_dma_addr[4];
+ };
int (*enable)(struct drm_device *dev);
};
@@ -1322,7 +1337,10 @@ typedef struct drm_i915_private {
struct mutex dpio_lock;
/** Cached value of IMR to avoid reads in updating the bitfield */
- u32 irq_mask;
+ union {
+ u32 irq_mask;
+ u32 de_irq_mask[I915_MAX_PIPES];
+ };
u32 gt_irq_mask;
u32 pm_irq_mask;
@@ -1733,6 +1751,7 @@ struct drm_i915_file_private {
(dev)->pdev->device == 0x010A)
#define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview)
#define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell)
+#define IS_BROADWELL(dev) (INTEL_INFO(dev)->gen == 8)
#define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile)
#define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \
((dev)->pdev->device & 0xFF00) == 0x0C00)
@@ -1754,6 +1773,7 @@ struct drm_i915_file_private {
#define IS_GEN5(dev) (INTEL_INFO(dev)->gen == 5)
#define IS_GEN6(dev) (INTEL_INFO(dev)->gen == 6)
#define IS_GEN7(dev) (INTEL_INFO(dev)->gen == 7)
+#define IS_GEN8(dev) (INTEL_INFO(dev)->gen == 8)
#define RENDER_RING (1<<RCS)
#define BSD_RING (1<<VCS)
@@ -1790,12 +1810,12 @@ struct drm_i915_file_private {
#define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
#define I915_HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
-#define HAS_IPS(dev) (IS_ULT(dev))
+#define HAS_IPS(dev) (IS_ULT(dev) || IS_BROADWELL(dev))
#define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi)
-#define HAS_POWER_WELL(dev) (IS_HASWELL(dev))
+#define HAS_POWER_WELL(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev))
#define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg)
-#define HAS_PSR(dev) (IS_HASWELL(dev))
+#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev))
#define INTEL_PCH_DEVICE_ID_MASK 0xff00
#define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e7b39d731db6..12bbd5eac70d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2954,6 +2954,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
obj->stride, obj->tiling_mode);
switch (INTEL_INFO(dev)->gen) {
+ case 8:
case 7:
case 6:
case 5:
@@ -4361,6 +4362,8 @@ void i915_gem_init_swizzling(struct drm_device *dev)
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
else if (IS_GEN7(dev))
I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
+ else if (IS_GEN8(dev))
+ I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
else
BUG();
}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index cc619c138777..72a3df32292f 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -117,6 +117,9 @@ static int get_context_size(struct drm_device *dev)
else
ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
break;
+ case 8:
+ ret = GEN8_CXT_TOTAL_SIZE;
+ break;
default:
BUG();
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47e4b0f..885d595e0e02 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -212,6 +212,7 @@ static int
relocate_entry_cpu(struct drm_i915_gem_object *obj,
struct drm_i915_gem_relocation_entry *reloc)
{
+ struct drm_device *dev = obj->base.dev;
uint32_t page_offset = offset_in_page(reloc->offset);
char *vaddr;
int ret = -EINVAL;
@@ -223,6 +224,19 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj,
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
reloc->offset >> PAGE_SHIFT));
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
+
+ if (INTEL_INFO(dev)->gen >= 8) {
+ page_offset = offset_in_page(page_offset + sizeof(uint32_t));
+
+ if (page_offset == 0) {
+ kunmap_atomic(vaddr);
+ vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+ (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
+ }
+
+ *(uint32_t *)(vaddr + page_offset) = 0;
+ }
+
kunmap_atomic(vaddr);
return 0;
@@ -253,6 +267,21 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
reloc_entry = (uint32_t __iomem *)
(reloc_page + offset_in_page(reloc->offset));
iowrite32(reloc->delta, reloc_entry);
+
+ if (INTEL_INFO(dev)->gen >= 8) {
+ reloc_entry += 1;
+
+ if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
+ io_mapping_unmap_atomic(reloc_page);
+ reloc_page = io_mapping_map_atomic_wc(
+ dev_priv->gtt.mappable,
+ reloc->offset + sizeof(uint32_t));
+ reloc_entry = reloc_page;
+ }
+
+ iowrite32(0, reloc_entry);
+ }
+
io_mapping_unmap_atomic(reloc_page);
return 0;
@@ -323,7 +352,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
return 0;
/* Check that the relocation address is valid... */
- if (unlikely(reloc->offset > obj->base.size - 4)) {
+ if (unlikely(reloc->offset >
+ obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
DRM_DEBUG("Relocation beyond object bounds: "
"obj %p target %d offset %d size %d.\n",
obj, reloc->target_handle,
@@ -1116,8 +1146,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
- * hsw should have this fixed, but let's be paranoid and do it
- * unconditionally for now. */
+ * hsw should have this fixed, but bdw mucks it up again. */
if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index c4c42e7cbd7b..3620a1b0a73c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -30,6 +30,8 @@
#define GEN6_PPGTT_PD_ENTRIES 512
#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+typedef uint64_t gen8_gtt_pte_t;
+typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
/* PPGTT stuff */
#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
@@ -57,6 +59,41 @@
#define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
#define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
+#define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
+#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
+#define GEN8_LEGACY_PDPS 4
+
+#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD)
+#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */
+#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */
+#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */
+
+static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
+ enum i915_cache_level level,
+ bool valid)
+{
+ gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
+ pte |= addr;
+ if (level != I915_CACHE_NONE)
+ pte |= PPAT_CACHED_INDEX;
+ else
+ pte |= PPAT_UNCACHED_INDEX;
+ return pte;
+}
+
+static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
+ dma_addr_t addr,
+ enum i915_cache_level level)
+{
+ gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
+ pde |= addr;
+ if (level != I915_CACHE_NONE)
+ pde |= PPAT_CACHED_PDE_INDEX;
+ else
+ pde |= PPAT_UNCACHED_INDEX;
+ return pde;
+}
+
static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
enum i915_cache_level level,
bool valid)
@@ -158,6 +195,257 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
return pte;
}
+/* Broadwell Page Directory Pointer Descriptors */
+static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
+ uint64_t val)
+{
+ int ret;
+
+ BUG_ON(entry >= 4);
+
+ ret = intel_ring_begin(ring, 6);
+ if (ret)
+ return ret;
+
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
+ intel_ring_emit(ring, (u32)(val >> 32));
+ intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+ intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
+ intel_ring_emit(ring, (u32)(val));
+ intel_ring_advance(ring);
+
+ return 0;
+}
+
+static int gen8_ppgtt_enable(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_ring_buffer *ring;
+ struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+ int i, j, ret;
+
+ /* bit of a hack to find the actual last used pd */
+ int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
+
+ for_each_ring(ring, dev_priv, j) {
+ I915_WRITE(RING_MODE_GEN7(ring),
+ _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
+ }
+
+ for (i = used_pd - 1; i >= 0; i--) {
+ dma_addr_t addr = ppgtt->pd_dma_addr[i];
+ for_each_ring(ring, dev_priv, j) {
+ ret = gen8_write_pdp(ring, i, addr);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
+ unsigned first_entry,
+ unsigned num_entries,
+ bool use_scratch)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ gen8_gtt_pte_t *pt_vaddr, scratch_pte;
+ unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
+ unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
+ unsigned last_pte, i;
+
+ scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
+ I915_CACHE_LLC, use_scratch);
+
+ while (num_entries) {
+ struct page *page_table = &ppgtt->gen8_pt_pages[act_pt];
+
+ last_pte = first_pte + num_entries;
+ if (last_pte > GEN8_PTES_PER_PAGE)
+ last_pte = GEN8_PTES_PER_PAGE;
+
+ pt_vaddr = kmap_atomic(page_table);
+
+ for (i = first_pte; i < last_pte; i++)
+ pt_vaddr[i] = scratch_pte;
+
+ kunmap_atomic(pt_vaddr);
+
+ num_entries -= last_pte - first_pte;
+ first_pte = 0;
+ act_pt++;
+ }
+}
+
+static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
+ struct sg_table *pages,
+ unsigned first_entry,
+ enum i915_cache_level cache_level)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ gen8_gtt_pte_t *pt_vaddr;
+ unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
+ unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
+ struct sg_page_iter sg_iter;
+
+ pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]);
+ for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
+ dma_addr_t page_addr;
+
+ page_addr = sg_dma_address(sg_iter.sg) +
+ (sg_iter.sg_pgoffset << PAGE_SHIFT);
+ pt_vaddr[act_pte] = gen8_pte_encode(page_addr, cache_level,
+ true);
+ if (++act_pte == GEN8_PTES_PER_PAGE) {
+ kunmap_atomic(pt_vaddr);
+ act_pt++;
+ pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]);
+ act_pte = 0;
+
+ }
+ }
+ kunmap_atomic(pt_vaddr);
+}
+
+static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
+{
+ struct i915_hw_ppgtt *ppgtt =
+ container_of(vm, struct i915_hw_ppgtt, base);
+ int i, j;
+
+ for (i = 0; i < ppgtt->num_pd_pages ; i++) {
+ if (ppgtt->pd_dma_addr[i]) {
+ pci_unmap_page(ppgtt->base.dev->pdev,
+ ppgtt->pd_dma_addr[i],
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+
+ for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+ dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
+ if (addr)
+ pci_unmap_page(ppgtt->base.dev->pdev,
+ addr,
+ PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+
+ }
+ }
+ kfree(ppgtt->gen8_pt_dma_addr[i]);
+ }
+
+ __free_pages(ppgtt->gen8_pt_pages, ppgtt->num_pt_pages << PAGE_SHIFT);
+ __free_pages(ppgtt->pd_pages, ppgtt->num_pd_pages << PAGE_SHIFT);
+}
+
+/**
+ * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a
+ * net effect resembling a 2-level page table in normal x86 terms. Each PDP
+ * represents 1GB of memory
+ * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space.
+ *
+ * TODO: Do something with the size parameter
+ **/
+static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
+{
+ struct page *pt_pages;
+ int i, j, ret = -ENOMEM;
+ const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
+ const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
+
+ if (size % (1<<30))
+ DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
+
+ /* FIXME: split allocation into smaller pieces. For now we only ever do
+ * this once, but with full PPGTT, the multiple contiguous allocations
+ * will be bad.
+ */
+ ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT));
+ if (!ppgtt->pd_pages)
+ return -ENOMEM;
+
+ pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT));
+ if (!pt_pages) {
+ __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
+ return -ENOMEM;
+ }
+
+ ppgtt->gen8_pt_pages = pt_pages;
+ ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT);
+ ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT);
+ ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
+ ppgtt->enable = gen8_ppgtt_enable;
+ ppgtt->base.clear_range = gen8_ppgtt_clear_range;
+ ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
+ ppgtt->base.cleanup = gen8_ppgtt_cleanup;
+
+ BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
+
+ /*
+ * - Create a mapping for the page directories.
+ * - For each page directory:
+ * allocate space for page table mappings.
+ * map each page table
+ */
+ for (i = 0; i < max_pdp; i++) {
+ dma_addr_t temp;
+ temp = pci_map_page(ppgtt->base.dev->pdev,
+ &ppgtt->pd_pages[i], 0,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp))
+ goto err_out;
+
+ ppgtt->pd_dma_addr[i] = temp;
+
+ ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL);
+ if (!ppgtt->gen8_pt_dma_addr[i])
+ goto err_out;
+
+ for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+ struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j];
+ temp = pci_map_page(ppgtt->base.dev->pdev,
+ p, 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
+
+ if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp))
+ goto err_out;
+
+ ppgtt->gen8_pt_dma_addr[i][j] = temp;
+ }
+ }
+
+ /* For now, the PPGTT helper functions all require that the PDEs are
+ * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
+ * will never need to touch the PDEs again */
+ for (i = 0; i < max_pdp; i++) {
+ gen8_ppgtt_pde_t *pd_vaddr;
+ pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
+ for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+ dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
+ pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
+ I915_CACHE_LLC);
+ }
+ kunmap_atomic(pd_vaddr);
+ }
+
+ ppgtt->base.clear_range(&ppgtt->base, 0,
+ ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
+ true);
+
+ DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
+ ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
+ DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
+ ppgtt->num_pt_pages,
+ (ppgtt->num_pt_pages - num_pt_pages) +
+ size % (1<<30));
+ return 0;
+
+err_out:
+ ppgtt->base.cleanup(&ppgtt->base);
+ return ret;
+}
+
static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
{
struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
@@ -410,6 +698,8 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
if (INTEL_INFO(dev)->gen < 8)
ret = gen6_ppgtt_init(ppgtt);
+ else if (IS_GEN8(dev))
+ ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
else
BUG();
@@ -573,6 +863,57 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
return 0;
}
+static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
+{
+#ifdef writeq
+ writeq(pte, addr);
+#else
+ iowrite32((u32)pte, addr);
+ iowrite32(pte >> 32, addr + 4);
+#endif
+}
+
+static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
+ struct sg_table *st,
+ unsigned int first_entry,
+ enum i915_cache_level level)
+{
+ struct drm_i915_private *dev_priv = vm->dev->dev_private;
+ gen8_gtt_pte_t __iomem *gtt_entries =
+ (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
+ int i = 0;
+ struct sg_page_iter sg_iter;
+ dma_addr_t addr;
+
+ for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
+ addr = sg_dma_address(sg_iter.sg) +
+ (sg_iter.sg_pgoffset << PAGE_SHIFT);
+ gen8_set_pte(&gtt_entries[i],
+ gen8_pte_encode(addr, level, true));
+ i++;
+ }
+
+ /*
+ * XXX: This serves as a posting read to make sure that the PTE has
+ * actually been updated. There is some concern that even though
+ * registers and PTEs are within the same BAR that they are potentially
+ * of NUMA access patterns. Therefore, even with the way we assume
+ * hardware should work, we must keep this posting read for paranoia.
+ */
+ if (i != 0)
+ WARN_ON(readq(&gtt_entries[i-1])
+ != gen8_pte_encode(addr, level, true));
+
+#if 0 /* TODO: Still needed on GEN8? */
+ /* This next bit makes the above posting read even more important. We
+ * want to flush the TLBs only after we're certain all the PTE updates
+ * have finished.
+ */
+ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+ POSTING_READ(GFX_FLSH_CNTL_GEN6);
+#endif
+}
+
/*
* Binds an object into the global gtt with the specified cache level. The object
* will be accessible to the GPU via commands whose operands reference offsets
@@ -615,6 +956,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
POSTING_READ(GFX_FLSH_CNTL_GEN6);
}
+static void gen8_ggtt_clear_range(struct i915_address_space *vm,
+ unsigned int first_entry,
+ unsigned int num_entries,
+ bool use_scratch)
+{
+ struct drm_i915_private *dev_priv = vm->dev->dev_private;
+ gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
+ (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
+ const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
+ int i;
+
+ if (WARN(num_entries > max_entries,
+ "First entry = %d; Num entries = %d (max=%d)\n",
+ first_entry, num_entries, max_entries))
+ num_entries = max_entries;
+
+ scratch_pte = gen8_pte_encode(vm->scratch.addr,
+ I915_CACHE_LLC,
+ use_scratch);
+ for (i = 0; i < num_entries; i++)
+ gen8_set_pte(&gtt_base[i], scratch_pte);
+ readl(gtt_base);
+}
+
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
unsigned int first_entry,
unsigned int num_entries,
@@ -638,7 +1003,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
readl(gtt_base);
}
-
static void i915_ggtt_insert_entries(struct i915_address_space *vm,