summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 17:32:42 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 17:32:42 -0800
commit41ecf1404b34d9975eb97f5005d9e4274eaeb76a (patch)
tree8582dec3a644cfbe178132acded5ebb458e0e32e
parent2dc10ad81fc017837037e60439662e1b16bdffb9 (diff)
parentabed7d0710e8f892c267932a9492ccf447674fb8 (diff)
Merge tag 'for-linus-4.4-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip
Pull xen updates from David Vrabel: - Improve balloon driver memory hotplug placement. - Use unpopulated hotplugged memory for foreign pages (if supported/enabled). - Support 64 KiB guest pages on arm64. - CPU hotplug support on arm/arm64. * tag 'for-linus-4.4-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (44 commits) xen: fix the check of e_pfn in xen_find_pfn_range x86/xen: add reschedule point when mapping foreign GFNs xen/arm: don't try to re-register vcpu_info on cpu_hotplug. xen, cpu_hotplug: call device_offline instead of cpu_down xen/arm: Enable cpu_hotplug.c xenbus: Support multiple grants ring with 64KB xen/grant-table: Add an helper to iterate over a specific number of grants xen/xenbus: Rename *RING_PAGE* to *RING_GRANT* xen/arm: correct comment in enlighten.c xen/gntdev: use types from linux/types.h in userspace headers xen/gntalloc: use types from linux/types.h in userspace headers xen/balloon: Use the correct sizeof when declaring frame_list xen/swiotlb: Add support for 64KB page granularity xen/swiotlb: Pass addresses rather than frame numbers to xen_arch_need_swiotlb arm/xen: Add support for 64KB page granularity xen/privcmd: Add support for Linux 64KB page granularity net/xen-netback: Make it running on 64KB page granularity net/xen-netfront: Make it running on 64KB page granularity block/xen-blkback: Make it running on 64KB page granularity block/xen-blkfront: Make it running on 64KB page granularity ...
-rw-r--r--arch/arm/include/asm/xen/hypervisor.h10
-rw-r--r--arch/arm/include/asm/xen/page-coherent.h26
-rw-r--r--arch/arm/include/asm/xen/page.h22
-rw-r--r--arch/arm/xen/enlighten.c20
-rw-r--r--arch/arm/xen/mm.c39
-rw-r--r--arch/arm/xen/p2m.c6
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h5
-rw-r--r--arch/x86/include/asm/xen/page.h8
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/grant-table.c2
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/p2m.c19
-rw-r--r--arch/x86/xen/setup.c9
-rw-r--r--drivers/block/xen-blkback/blkback.c13
-rw-r--r--drivers/block/xen-blkback/common.h17
-rw-r--r--drivers/block/xen-blkback/xenbus.c11
-rw-r--r--drivers/block/xen-blkfront.c560
-rw-r--r--drivers/net/xen-netback/common.h16
-rw-r--r--drivers/net/xen-netback/netback.c167
-rw-r--r--drivers/net/xen-netfront.c122
-rw-r--r--drivers/tty/hvc/hvc_xen.c4
-rw-r--r--drivers/xen/Makefile2
-rw-r--r--drivers/xen/balloon.c341
-rw-r--r--drivers/xen/biomerge.c8
-rw-r--r--drivers/xen/cpu_hotplug.c14
-rw-r--r--drivers/xen/events/events_base.c2
-rw-r--r--drivers/xen/events/events_fifo.c2
-rw-r--r--drivers/xen/grant-table.c56
-rw-r--r--drivers/xen/privcmd.c10
-rw-r--r--drivers/xen/swiotlb-xen.c43
-rw-r--r--drivers/xen/xenbus/xenbus_client.c128
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c3
-rw-r--r--drivers/xen/xlate_mmu.c124
-rw-r--r--include/linux/memory_hotplug.h2
-rw-r--r--include/uapi/xen/gntalloc.h22
-rw-r--r--include/uapi/xen/gntdev.h34
-rw-r--r--include/xen/balloon.h12
-rw-r--r--include/xen/grant_table.h57
-rw-r--r--include/xen/page.h27
-rw-r--r--include/xen/xenbus.h4
-rw-r--r--mm/memory_hotplug.c29
41 files changed, 1365 insertions, 647 deletions
diff --git a/arch/arm/include/asm/xen/hypervisor.h b/arch/arm/include/asm/xen/hypervisor.h
index 04ff8e7b37df..95251512e2c4 100644
--- a/arch/arm/include/asm/xen/hypervisor.h
+++ b/arch/arm/include/asm/xen/hypervisor.h
@@ -26,4 +26,14 @@ void __init xen_early_init(void);
static inline void xen_early_init(void) { return; }
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void xen_arch_register_cpu(int num)
+{
+}
+
+static inline void xen_arch_unregister_cpu(int num)
+{
+}
+#endif
+
#endif /* _ASM_ARM_XEN_HYPERVISOR_H */
diff --git a/arch/arm/include/asm/xen/page-coherent.h b/arch/arm/include/asm/xen/page-coherent.h
index efd562412850..0375c8caa061 100644
--- a/arch/arm/include/asm/xen/page-coherent.h
+++ b/arch/arm/include/asm/xen/page-coherent.h
@@ -35,11 +35,15 @@ static inline void xen_dma_map_page(struct device *hwdev, struct page *page,
dma_addr_t dev_addr, unsigned long offset, size_t size,
enum dma_data_direction dir, struct dma_attrs *attrs)
{
- bool local = PFN_DOWN(dev_addr) == page_to_pfn(page);
- /* Dom0 is mapped 1:1, so if pfn == mfn the page is local otherwise
- * is a foreign page grant-mapped in dom0. If the page is local we
- * can safely call the native dma_ops function, otherwise we call
- * the xen specific function. */
+ bool local = XEN_PFN_DOWN(dev_addr) == page_to_xen_pfn(page);
+ /*
+ * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+ * multiple Xen page, it's not possible to have a mix of local and
+ * foreign Xen page. So if the first xen_pfn == mfn the page is local
+ * otherwise it's a foreign page grant-mapped in dom0. If the page is
+ * local we can safely call the native dma_ops function, otherwise we
+ * call the xen specific function.
+ */
if (local)
__generic_dma_ops(hwdev)->map_page(hwdev, page, offset, size, dir, attrs);
else
@@ -51,10 +55,14 @@ static inline void xen_dma_unmap_page(struct device *hwdev, dma_addr_t handle,
struct dma_attrs *attrs)
{
unsigned long pfn = PFN_DOWN(handle);
- /* Dom0 is mapped 1:1, so calling pfn_valid on a foreign mfn will
- * always return false. If the page is local we can safely call the
- * native dma_ops function, otherwise we call the xen specific
- * function. */
+ /*
+ * Dom0 is mapped 1:1, while the Linux page can be spanned accross
+ * multiple Xen page, it's not possible to have a mix of local and
+ * foreign Xen page. Dom0 is mapped 1:1, so calling pfn_valid on a
+ * foreign mfn will always return false. If the page is local we can
+ * safely call the native dma_ops function, otherwise we call the xen
+ * specific function.
+ */
if (pfn_valid(pfn)) {
if (__generic_dma_ops(hwdev)->unmap_page)
__generic_dma_ops(hwdev)->unmap_page(hwdev, handle, size, dir, attrs);
diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h
index 127956353b00..415dbc6e43fd 100644
--- a/arch/arm/include/asm/xen/page.h
+++ b/arch/arm/include/asm/xen/page.h
@@ -13,9 +13,6 @@
#define phys_to_machine_mapping_valid(pfn) (1)
-#define pte_mfn pte_pfn
-#define mfn_pte pfn_pte
-
/* Xen machine address */
typedef struct xmaddr {
phys_addr_t maddr;
@@ -31,6 +28,17 @@ typedef struct xpaddr {
#define INVALID_P2M_ENTRY (~0UL)
+/*
+ * The pseudo-physical frame (pfn) used in all the helpers is always based
+ * on Xen page granularity (i.e 4KB).
+ *
+ * A Linux page may be split across multiple non-contiguous Xen page so we
+ * have to keep track with frame based on 4KB page granularity.
+ *
+ * PV drivers should never make a direct usage of those helpers (particularly
+ * pfn_to_gfn and gfn_to_pfn).
+ */
+
unsigned long __pfn_to_mfn(unsigned long pfn);
extern struct rb_root phys_to_mach;
@@ -67,8 +75,8 @@ static inline unsigned long bfn_to_pfn(unsigned long bfn)
#define bfn_to_local_pfn(bfn) bfn_to_pfn(bfn)
/* VIRT <-> GUEST conversion */
-#define virt_to_gfn(v) (pfn_to_gfn(virt_to_pfn(v)))
-#define gfn_to_virt(m) (__va(gfn_to_pfn(m) << PAGE_SHIFT))
+#define virt_to_gfn(v) (pfn_to_gfn(virt_to_phys(v) >> XEN_PAGE_SHIFT))
+#define gfn_to_virt(m) (__va(gfn_to_pfn(m) << XEN_PAGE_SHIFT))
/* Only used in PV code. But ARM guests are always HVM. */
static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
@@ -107,8 +115,8 @@ static inline bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
#define xen_unmap(cookie) iounmap((cookie))
bool xen_arch_need_swiotlb(struct device *dev,
- unsigned long pfn,
- unsigned long bfn);
+ phys_addr_t phys,
+ dma_addr_t dev_addr);
unsigned long xen_get_swiotlb_free_pages(unsigned int order);
#endif /* _ASM_ARM_XEN_PAGE_H */
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index eeeab074e154..fc7ea529f462 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -86,16 +86,25 @@ static void xen_percpu_init(void)
int err;
int cpu = get_cpu();
+ /*
+ * VCPUOP_register_vcpu_info cannot be called twice for the same
+ * vcpu, so if vcpu_info is already registered, just get out. This
+ * can happen with cpu-hotplug.
+ */
+ if (per_cpu(xen_vcpu, cpu) != NULL)
+ goto after_register_vcpu_info;
+
pr_info("Xen: initializing cpu%d\n", cpu);
vcpup = per_cpu_ptr(xen_vcpu_info, cpu);
- info.mfn = __pa(vcpup) >> PAGE_SHIFT;
- info.offset = offset_in_page(vcpup);
+ info.mfn = virt_to_gfn(vcpup);
+ info.offset = xen_offset_in_page(vcpup);
err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
BUG_ON(err);
per_cpu(xen_vcpu, cpu) = vcpup;
+after_register_vcpu_info:
enable_percpu_irq(xen_events_irq, 0);
put_cpu();
}
@@ -124,6 +133,9 @@ static int xen_cpu_notification(struct notifier_block *self,
case CPU_STARTING:
xen_percpu_init();
break;
+ case CPU_DYING:
+ disable_percpu_irq(xen_events_irq);
+ break;
default:
break;
}
@@ -213,7 +225,7 @@ static int __init xen_guest_init(void)
xatp.domid = DOMID_SELF;
xatp.idx = 0;
xatp.space = XENMAPSPACE_shared_info;
- xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
+ xatp.gpfn = virt_to_gfn(shared_info_page);
if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
BUG();
@@ -284,7 +296,7 @@ void xen_arch_resume(void) { }
void xen_arch_suspend(void) { }
-/* In the hypervisor.S file. */
+/* In the hypercall.S file. */
EXPORT_SYMBOL_GPL(HYPERVISOR_event_channel_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_grant_table_op);
EXPORT_SYMBOL_GPL(HYPERVISOR_xen_version);
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 6dd911d1f0ac..7c34f7126b04 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -48,22 +48,22 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
size_t size, enum dma_data_direction dir, enum dma_cache_op op)
{
struct gnttab_cache_flush cflush;
- unsigned long pfn;
+ unsigned long xen_pfn;
size_t left = size;
- pfn = (handle >> PAGE_SHIFT) + offset / PAGE_SIZE;
- offset %= PAGE_SIZE;
+ xen_pfn = (handle >> XEN_PAGE_SHIFT) + offset / XEN_PAGE_SIZE;
+ offset %= XEN_PAGE_SIZE;
do {
size_t len = left;
/* buffers in highmem or foreign pages cannot cross page
* boundaries */
- if (len + offset > PAGE_SIZE)
- len = PAGE_SIZE - offset;
+ if (len + offset > XEN_PAGE_SIZE)
+ len = XEN_PAGE_SIZE - offset;
cflush.op = 0;
- cflush.a.dev_bus_addr = pfn << PAGE_SHIFT;
+ cflush.a.dev_bus_addr = xen_pfn << XEN_PAGE_SHIFT;
cflush.offset = offset;
cflush.length = len;
@@ -79,7 +79,7 @@ static void dma_cache_maint(dma_addr_t handle, unsigned long offset,
HYPERVISOR_grant_table_op(GNTTABOP_cache_flush, &cflush, 1);
offset = 0;
- pfn++;
+ xen_pfn++;
left -= len;
} while (left);
}
@@ -138,10 +138,29 @@ void __xen_dma_sync_single_for_device(struct device *hwdev,
}
bool xen_arch_need_swiotlb(struct device *dev,
- unsigned long pfn,
- unsigned long bfn)
+ phys_addr_t phys,
+ dma_addr_t dev_addr)
{
- return (!hypercall_cflush && (pfn != bfn) && !is_device_dma_coherent(dev));
+ unsigned int xen_pfn = XEN_PFN_DOWN(phys);
+ unsigned int bfn = XEN_PFN_DOWN(dev_addr);
+
+ /*
+ * The swiotlb buffer should be used if
+ * - Xen doesn't have the cache flush hypercall
+ * - The Linux page refers to foreign memory
+ * - The device doesn't support coherent DMA request
+ *
+ * The Linux page may be spanned acrros multiple Xen page, although
+ * it's not possible to have a mix of local and foreign Xen page.
+ * Furthermore, range_straddles_page_boundary is already checking
+ * if buffer is physically contiguous in the host RAM.
+ *
+ * Therefore we only need to check the first Xen page to know if we
+ * require a bounce buffer because the device doesn't support coherent
+ * memory and we are not able to flush the cache.
+ */
+ return (!hypercall_cflush && (xen_pfn != bfn) &&
+ !is_device_dma_coherent(dev));
}
int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index 887596c67b12..0ed01f2d5ee4 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -93,8 +93,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
for (i = 0; i < count; i++) {
if (map_ops[i].status)
continue;
- set_phys_to_machine(map_ops[i].host_addr >> PAGE_SHIFT,
- map_ops[i].dev_bus_addr >> PAGE_SHIFT);
+ set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
+ map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT);
}
return 0;
@@ -108,7 +108,7 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
int i;
for (i = 0; i < count; i++) {
- set_phys_to_machine(unmap_ops[i].host_addr >> PAGE_SHIFT,
+ set_phys_to_machine(unmap_ops[i].host_addr >> XEN_PAGE_SHIFT,
INVALID_P2M_ENTRY);
}
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index d866959e5685..8b2d4bea9962 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -57,4 +57,9 @@ static inline bool xen_x2apic_para_available(void)
}
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+void xen_arch_register_cpu(int num);
+void xen_arch_unregister_cpu(int num);
+#endif
+
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 0679e11d2cf7..f5fb840b43e8 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,7 +12,7 @@
#include <asm/pgtable.h>
#include <xen/interface/xen.h>
-#include <xen/grant_table.h>
+#include <xen/interface/grant_table.h>
#include <xen/features.h>
/* Xen machine address */
@@ -43,6 +43,8 @@ extern unsigned long *xen_p2m_addr;
extern unsigned long xen_p2m_size;
extern unsigned long xen_max_p2m_pfn;
+extern int xen_alloc_p2m_entry(unsigned long pfn);
+
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -296,8 +298,8 @@ void make_lowmem_page_readwrite(void *vaddr);
#define xen_unmap(cookie) iounmap((cookie))
static inline bool xen_arch_need_swiotlb(struct device *dev,
- unsigned long pfn,
- unsigned long bfn)
+ phys_addr_t phys,
+ dma_addr_t dev_addr)
{
return false;
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 993b7a71386d..5774800ff583 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -75,6 +75,7 @@
#include <asm/mwait.h>
#include <asm/pci_x86.h>
#include <asm/pat.h>
+#include <asm/cpu.h>
#ifdef CONFIG_ACPI
#include <linux/acpi.h>
@@ -1899,3 +1900,17 @@ const struct hypervisor_x86 x86_hyper_xen = {
.set_cpu_features = xen_set_cpu_features,
};
EXPORT_SYMBOL(x86_hyper_xen);
+
+#ifdef CONFIG_HOTPLUG_CPU
+void xen_arch_register_cpu(int num)
+{
+ arch_register_cpu(num);
+}
+EXPORT_SYMBOL(xen_arch_register_cpu);
+
+void xen_arch_unregister_cpu(int num)
+{
+ arch_unregister_cpu(num);
+}
+EXPORT_SYMBOL(xen_arch_unregister_cpu);
+#endif
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index 1580e7a5a4cf..e079500b17f3 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -133,7 +133,7 @@ static int __init xlated_setup_gnttab_pages(void)
kfree(pages);
return -ENOMEM;
}
- rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */);
+ rc = alloc_xenballooned_pages(nr_grant_frames, pages);
if (rc) {
pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__,
nr_grant_frames, rc);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 9c479fe40459..ac161db63388 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2888,6 +2888,7 @@ static int do_remap_gfn(struct vm_area_struct *vma,
addr += range;
if (err_ptr)
err_ptr += batch;
+ cond_resched();
}
out:
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 660b3cfef234..cab9f766bb06 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -530,7 +530,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg)
* the new pages are installed with cmpxchg; if we lose the race then
* simply free the page we allocated and use the one that's there.
*/
-static bool alloc_p2m(unsigned long pfn)
+int xen_alloc_p2m_entry(unsigned long pfn)
{
unsigned topidx;
unsigned long *top_mfn_p, *mid_mfn;
@@ -540,6 +540,9 @@ static bool alloc_p2m(unsigned long pfn)
unsigned long addr = (unsigned long)(xen_p2m_addr + pfn);
unsigned long p2m_pfn;
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
ptep = lookup_address(addr, &level);
BUG_ON(!ptep || level != PG_LEVEL_4K);
pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1));
@@ -548,7 +551,7 @@ static bool alloc_p2m(unsigned long pfn)
/* PMD level is missing, allocate a new one */
ptep = alloc_p2m_pmd(addr, pte_pg);
if (!ptep)
- return false;
+ return -ENOMEM;
}
if (p2m_top_mfn && pfn < MAX_P2M_PFN) {
@@ -566,7 +569,7 @@ static bool alloc_p2m(unsigned long pfn)
mid_mfn = alloc_p2m_page();
if (!mid_mfn)
- return false;
+ return -ENOMEM;
p2m_mid_mfn_init(mid_mfn, p2m_missing);
@@ -592,7 +595,7 @@ static bool alloc_p2m(unsigned long pfn)
p2m = alloc_p2m_page();
if (!p2m)
- return false;
+ return -ENOMEM;
if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
p2m_init(p2m);
@@ -625,8 +628,9 @@ static bool alloc_p2m(unsigned long pfn)
HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
}
- return true;
+ return 0;
}
+EXPORT_SYMBOL(xen_alloc_p2m_entry);
unsigned long __init set_phys_range_identity(unsigned long pfn_s,
unsigned long pfn_e)
@@ -688,7 +692,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
bool set_phys_to_machine(unsigned long pfn, unsigned long mfn)
{
if (unlikely(!__set_phys_to_machine(pfn, mfn))) {
- if (!alloc_p2m(pfn))
+ int ret;
+
+ ret = xen_alloc_p2m_entry(pfn);
+ if (ret < 0)
return false;
return __set_phys_to_machine(pfn, mfn);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 63320b6d35bc..7ab29518a3b9 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -212,7 +212,7 @@ static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
e_pfn = PFN_DOWN(entry->addr + entry->size);
/* We only care about E820 after this */
- if (e_pfn < *min_pfn)
+ if (e_pfn <= *min_pfn)
continue;
s_pfn = PFN_UP(entry->addr);
@@ -829,6 +829,8 @@ char * __init xen_memory_setup(void)
addr = xen_e820_map[0].addr;
size = xen_e820_map[0].size;
while (i < xen_e820_map_entries) {
+ bool discard = false;
+
chunk_size = size;
type = xen_e820_map[i].type;
@@ -843,10 +845,11 @@ char * __init xen_memory_setup(void)
xen_add_extra_mem(pfn_s, n_pfns);
xen_max_p2m_pfn = pfn_s + n_pfns;
} else
- type = E820_UNUSABLE;
+ discard = true;
}
- xen_align_and_add_e820_region(addr, chunk_size, type);
+ if (!discard)
+ xen_align_and_add_e820_region(addr, chunk_size, type);
addr += chunk_size;
size -= chunk_size;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6a685aec6994..f9099940c272 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -87,7 +87,7 @@ MODULE_PARM_DESC(max_persistent_grants,
* Maximum order of pages to be used for the shared ring between front and
* backend, 4KB page granularity is used.
*/
-unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
/*
@@ -961,7 +961,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req,
seg[n].nsec = segments[i].last_sect -
segments[i].first_sect + 1;
seg[n].offset = (segments[i].first_sect << 9);
- if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+ if ((segments[i].last_sect >= (XEN_PAGE_SIZE >> 9)) ||
(segments[i].last_sect < segments[i].first_sect)) {
rc = -EINVAL;
goto unmap;
@@ -1210,6 +1210,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
req_operation = req->operation == BLKIF_OP_INDIRECT ?
req->u.indirect.indirect_op : req->operation;
+
if ((req->operation == BLKIF_OP_INDIRECT) &&
(req_operation != BLKIF_OP_READ) &&
(req_operation != BLKIF_OP_WRITE)) {
@@ -1268,7 +1269,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
seg[i].nsec = req->u.rw.seg[i].last_sect -
req->u.rw.seg[i].first_sect + 1;
seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
- if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+ if ((req->u.rw.seg[i].last_sect >= (XEN_PAGE_SIZE >> 9)) ||
(req->u.rw.seg[i].last_sect <
req->u.rw.seg[i].first_sect))
goto fail_response;
@@ -1445,10 +1446,10 @@ static int __init xen_blkif_init(void)
if (!xen_domain())
return -ENODEV;
- if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
+ if (xen_blkif_max_ring_order > XENBUS_MAX_RING_GRANT_ORDER) {
pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
- xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
- xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
+ xen_blkif_max_ring_order, XENBUS_MAX_RING_GRANT_ORDER);
+ xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
}
rc = xen_blkif_interface_init();
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 45a044a53d1e..68e87a037b99 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -39,6 +39,7 @@
#include <asm/pgalloc.h>
#include <asm/hypervisor.h>
#include <xen/grant_table.h>
+#include <xen/page.h>
#include <xen/xenbus.h>
#include <xen/interface/io/ring.h>
#include <xen/interface/io/blkif.h>
@@ -51,12 +52,20 @@ extern unsigned int xen_blkif_max_ring_order;
*/
#define MAX_INDIRECT_SEGMENTS 256
-#define SEGS_PER_INDIRECT_FRAME \
- (PAGE_SIZE/sizeof(struct blkif_request_segment))
+/*
+ * Xen use 4K pages. The guest may use different page size (4K or 64K)
+ * Number of Xen pages per segment
+ */
+#define XEN_PAGES_PER_SEGMENT (PAGE_SIZE / XEN_PAGE_SIZE)
+
+#define XEN_PAGES_PER_INDIRECT_FRAME \
+ (XEN_PAGE_SIZE/sizeof(struct blkif_request_segment))
+#define SEGS_PER_INDIRECT_FRAME \
+ (XEN_PAGES_PER_INDIRECT_FRAME / XEN_PAGES_PER_SEGMENT)
+
#define MAX_INDIRECT_PAGES \
((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
-#define INDIRECT_PAGES(_segs) \
- ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) DIV_ROUND_UP(_segs, XEN_PAGES_PER_INDIRECT_FRAME)
/* Not a real protocol. Used to generate ring structs which contain
* the elements common to all protocols only. This way we get a
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 767657565de6..f53cff42f8da 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -176,21 +176,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
{
struct blkif_sring *sring;
sring = (struct blkif_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
+ BACK_RING_INIT(&blkif->blk_rings.native, sring,
+ XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
struct blkif_x86_32_sring *sring_x86_32;
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32,
+ XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
struct blkif_x86_64_sring *sring_x86_64;
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
- BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64,
+ XEN_PAGE_SIZE * nr_grefs);
break;
}
default:
@@ -826,7 +829,7 @@ again:
static int connect_ring(struct backend_info *be)
{
struct xenbus_device *dev = be->dev;
- unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
+ unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
unsigned int evtchn, nr_grefs, ring_page_order;
unsigned int pers_grants;
char protocol[64] = "";
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a69c02dadec0..2fee2eef988d 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -68,7 +68,7 @@ enum blkif_state {
struct grant {
grant_ref_t gref;
- unsigned long pfn;
+ struct page *page;
struct list_head node;
};
@@ -78,6 +78,7 @@ struct blk_shadow {
struct grant **grants_used;
struct grant **indirect_grants;
struct scatterlist *sg;
+ unsigned int num_sg;
};
struct split_bio {
@@ -106,8 +107,12 @@ static unsigned int xen_blkif_max_ring_order;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
-#define BLK_RING_SIZE(info) __CONST_RING_SIZE(blkif, PAGE_SIZE * (info)->nr_ring_pages)
-#define BLK_MAX_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE * XENBUS_MAX_RING_PAGES)
+#define BLK_RING_SIZE(info) \
+ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
+
+#define BLK_MAX_RING_SIZE \
+ __CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * XENBUS_MAX_RING_GRANTS)
+
/*
* ring-ref%i i=(-1UL) would take 11 characters + 'ring-ref' is 8, so 19
* characters are enough. Define to 20 to keep consist with backend.
@@ -128,7 +133,7 @@ struct blkfront_info
int vdevice;
blkif_vdev_t handle;
enum blkif_state connected;
- int ring_ref[XENBUS_MAX_RING_PAGES];
+ int ring_ref[XENBUS_MAX_RING_GRANTS];
unsigned int nr_ring_pages;
struct blkif_front_ring ring;
unsigned int evtchn, irq;
@@ -146,6 +151,7 @@ struct blkfront_info
unsigned int discard_granularity;
unsigned int discard_alignment;
unsigned int feature_persistent:1;
+ /* Number of 4KB segments handled */
unsigned int max_indirect_segments;
int is_ready;
struct blk_mq_tag_set tag_set;
@@ -174,10 +180,23 @@ static DEFINE_SPINLOCK(minor_lock);
#define DEV_NAME "xvd" /* name in /dev */
-#define SEGS_PER_INDIRECT_FRAME \
- (PAGE_SIZE/si