From 62cedb9f135794ec26a93ae29e5f0231ab263c84 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 16:35:49 +0100 Subject: mm: memory hotplug with an existing resource Add add_memory_resource() to add memory using an existing "System RAM" resource. This is useful if the memory region is being located by finding a free resource slot with allocate_resource(). Xen guests will make use of this in their balloon driver to hotplug arbitrary amounts of memory in response to toolstack requests. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper Reviewed-by: Tang Chen --- include/linux/memory_hotplug.h | 2 ++ mm/memory_hotplug.c | 29 +++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8f60e899b33c..2ea574ff9714 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -11,6 +11,7 @@ struct zone; struct pglist_data; struct mem_section; struct memory_block; +struct resource; #ifdef CONFIG_MEMORY_HOTPLUG @@ -266,6 +267,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); +extern int add_memory_resource(int nid, struct resource *resource); extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, bool for_device); extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index aa992e2df58a..0780d118d26e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1232,23 +1232,21 @@ int zone_for_memory(int nid, u64 start, u64 size, int zone_default, } /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ -int __ref add_memory(int nid, u64 start, u64 size) +int __ref add_memory_resource(int nid, struct resource *res) { + u64 start, size; pg_data_t *pgdat = NULL; bool new_pgdat; bool new_node; - struct resource *res; int ret; + start = res->start; + size = resource_size(res); + ret = check_hotplug_memory_range(start, size); if (ret) return ret; - res = register_memory_resource(start, size); - ret = -EEXIST; - if (!res) - return ret; - { /* Stupid hack to suppress address-never-null warning */ void *p = NODE_DATA(nid); new_pgdat = !p; @@ -1300,13 +1298,28 @@ error: /* rollback pgdat allocation and others */ if (new_pgdat) rollback_node_hotadd(nid, pgdat); - release_memory_resource(res); memblock_remove(start, size); out: mem_hotplug_done(); return ret; } +EXPORT_SYMBOL_GPL(add_memory_resource); + +int __ref add_memory(int nid, u64 start, u64 size) +{ + struct resource *res; + int ret; + + res = register_memory_resource(start, size); + if (!res) + return -EEXIST; + + ret = add_memory_resource(nid, res); + if (ret < 0) + release_memory_resource(res); + return ret; +} EXPORT_SYMBOL_GPL(add_memory); #ifdef CONFIG_MEMORY_HOTREMOVE -- cgit v1.2.3 From f6a6cb1afe74d6ccc81aa70aa4ac3953762e7e6e Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 13:18:12 +0100 Subject: xen/balloon: remove scratch page left overs Commit 0bb599fd30108883b00c7d4a226eeb49111e6932 (xen: remove scratch frames for ballooned pages and m2p override) removed the use of the scratch page for ballooned out pages. Remove some left over function definitions. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- include/xen/balloon.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/include/xen/balloon.h b/include/xen/balloon.h index a4c1c6a93691..cc2e1a7e44ec 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -29,9 +29,6 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem); void free_xenballooned_pages(int nr_pages, struct page **pages); -struct page *get_balloon_scratch_page(void); -void put_balloon_scratch_page(void); - struct device; #ifdef CONFIG_XEN_SELFBALLOONING extern int register_xen_selfballooning(struct device *dev); -- cgit v1.2.3 From f5775e0b6116b7e2425ccf535243b21768566d87 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 19 Jan 2015 11:08:05 +0000 Subject: x86/xen: discard RAM regions above the maximum reservation During setup, discard RAM regions that are above the maximum reservation (instead of marking them as E820_UNUSABLE). This allows hotplug memory to be placed at these addresses. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- arch/x86/xen/setup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1c30e4ab1022..387b60d9bd0e 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -829,6 +829,8 @@ char * __init xen_memory_setup(void) addr = xen_e820_map[0].addr; size = xen_e820_map[0].size; while (i < xen_e820_map_entries) { + bool discard = false; + chunk_size = size; type = xen_e820_map[i].type; @@ -843,10 +845,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; -- cgit v1.2.3 From 55b3da98a40dbb3776f7454daf0d95dde25c33d2 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 24 Jun 2015 15:58:42 +0100 Subject: xen/balloon: find non-conflicting regions to place hotplugged memory Instead of placing hotplugged memory at the end of RAM (which may conflict with PCI devices or reserved regions) use allocate_resource() to get a new, suitably aligned resource that does not conflict. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- v3: - Remove stale comment. --- drivers/xen/balloon.c | 73 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 20 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index c79329fcfa78..095bb6789731 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include @@ -208,26 +209,56 @@ static bool balloon_is_inflated(void) return false; } -/* - * reserve_additional_memory() adds memory region of size >= credit above - * max_pfn. New region is section aligned and size is modified to be multiple - * of section size. Those features allow optimal use of address space and - * establish proper alignment when this function is called first time after - * boot (last section not fully populated at boot time contains unused memory - * pages with PG_reserved bit not set; online_pages_range() does not allow page - * onlining in whole range if first onlined page does not have PG_reserved - * bit set). Real size of added memory is established at page onlining stage. - */ +static struct resource *additional_memory_resource(phys_addr_t size) +{ + struct resource *res; + int ret; + + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + return NULL; + + res->name = "System RAM"; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + ret = allocate_resource(&iomem_resource, res, + size, 0, -1, + PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); + if (ret < 0) { + pr_err("Cannot allocate new System RAM resource\n"); + kfree(res); + return NULL; + } + + return res; +} + +static void release_memory_resource(struct resource *resource) +{ + if (!resource) + return; + + /* + * No need to reset region to identity mapped since we now + * know that no I/O can be in this region + */ + release_resource(resource); + kfree(resource); +} static enum bp_state reserve_additional_memory(long credit) { + struct resource *resource; int nid, rc; - u64 hotplug_start_paddr; - unsigned long balloon_hotplug = credit; + unsigned long balloon_hotplug; + + balloon_hotplug = round_up(credit, PAGES_PER_SECTION); + + resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE); + if (!resource) + goto err; - hotplug_start_paddr = PFN_PHYS(SECTION_ALIGN_UP(max_pfn)); - balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION); - nid = memory_add_physaddr_to_nid(hotplug_start_paddr); + nid = memory_add_physaddr_to_nid(resource->start); #ifdef CONFIG_XEN_HAVE_PVMMU /* @@ -242,21 +273,20 @@ static enum bp_state reserve_additional_memory(long credit) if (!xen_feature(XENFEAT_auto_translated_physmap)) { unsigned long pfn, i; - pfn = PFN_DOWN(hotplug_start_paddr); + pfn = PFN_DOWN(resource->start); for (i = 0; i < balloon_hotplug; i++) { if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { pr_warn("set_phys_to_machine() failed, no memory added\n"); - return BP_ECANCELED; + goto err; } } } #endif - rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT); - + rc = add_memory_resource(nid, resource); if (rc) { pr_warn("Cannot add additional memory (%i)\n", rc); - return BP_ECANCELED; + goto err; } balloon_hotplug -= credit; @@ -265,6 +295,9 @@ static enum bp_state reserve_additional_memory(long credit) balloon_stats.balloon_hotplug = balloon_hotplug; return BP_DONE; + err: + release_memory_resource(resource); + return BP_ECANCELED; } static void xen_online_page(struct page *page) -- cgit v1.2.3 From de5a77d8422fc7ed0b2f4349bceb65a1a639e5b2 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 12:08:20 +0100 Subject: xen/balloon: rationalize memory hotplug stats The stats used for memory hotplug make no sense and are fiddled with in odd ways. Remove them and introduce total_pages to track the total number of pages (both populated and unpopulated) including those within hotplugged regions (note that this includes not yet onlined pages). This will be used in a subsequent commit (xen/balloon: only hotplug additional memory if required) when deciding whether additional memory needs to be hotplugged. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- drivers/xen/balloon.c | 75 +++++++++------------------------------------------ include/xen/balloon.h | 5 +--- 2 files changed, 13 insertions(+), 67 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 095bb6789731..ac8054765127 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -194,21 +194,6 @@ static enum bp_state update_schedule(enum bp_state state) } #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -static long current_credit(void) -{ - return balloon_stats.target_pages - balloon_stats.current_pages - - balloon_stats.hotplug_pages; -} - -static bool balloon_is_inflated(void) -{ - if (balloon_stats.balloon_low || balloon_stats.balloon_high || - balloon_stats.balloon_hotplug) - return true; - else - return false; -} - static struct resource *additional_memory_resource(phys_addr_t size) { struct resource *res; @@ -289,10 +274,7 @@ static enum bp_state reserve_additional_memory(long credit) goto err; } - balloon_hotplug -= credit; - - balloon_stats.hotplug_pages += credit; - balloon_stats.balloon_hotplug = balloon_hotplug; + balloon_stats.total_pages += balloon_hotplug; return BP_DONE; err: @@ -308,11 +290,6 @@ static void xen_online_page(struct page *page) __balloon_append(page); - if (balloon_stats.hotplug_pages) - --balloon_stats.hotplug_pages; - else - --balloon_stats.balloon_hotplug; - mutex_unlock(&balloon_mutex); } @@ -329,32 +306,22 @@ static struct notifier_block xen_memory_nb = { .priority = 0 }; #else -static long current_credit(void) +static enum bp_state reserve_additional_memory(long credit) { - unsigned long target = balloon_stats.target_pages; - - target = min(target, - balloon_stats.current_pages + - balloon_stats.balloon_low + - balloon_stats.balloon_high); - - return target - balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages; + return BP_DONE; } +#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ -static bool balloon_is_inflated(void) +static long current_credit(void) { - if (balloon_stats.balloon_low || balloon_stats.balloon_high) - return true; - else - return false; + return balloon_stats.target_pages - balloon_stats.current_pages; } -static enum bp_state reserve_additional_memory(long credit) +static bool balloon_is_inflated(void) { - balloon_stats.target_pages = balloon_stats.current_pages; - return BP_DONE; + return balloon_stats.balloon_low || balloon_stats.balloon_high; } -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ static enum bp_state increase_reservation(unsigned long nr_pages) { @@ -367,15 +334,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages) .domid = DOMID_SELF }; -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { - nr_pages = min(nr_pages, balloon_stats.balloon_hotplug); - balloon_stats.hotplug_pages += nr_pages; - balloon_stats.balloon_hotplug -= nr_pages; - return BP_DONE; - } -#endif - if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -438,15 +396,6 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) .domid = DOMID_SELF }; -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - if (balloon_stats.hotplug_pages) { - nr_pages = min(nr_pages, balloon_stats.hotplug_pages); - balloon_stats.hotplug_pages -= nr_pages; - balloon_stats.balloon_hotplug += nr_pages; - return BP_DONE; - } -#endif - if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); @@ -635,6 +584,8 @@ static void __init balloon_add_region(unsigned long start_pfn, don't subtract from it. */ __balloon_append(page); } + + balloon_stats.total_pages += extra_pfn_end - start_pfn; } static int __init balloon_init(void) @@ -652,6 +603,7 @@ static int __init balloon_init(void) balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; + balloon_stats.total_pages = balloon_stats.current_pages; balloon_stats.schedule_delay = 1; balloon_stats.max_schedule_delay = 32; @@ -659,9 +611,6 @@ static int __init balloon_init(void) balloon_stats.max_retry_count = RETRY_UNLIMITED; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - balloon_stats.hotplug_pages = 0; - balloon_stats.balloon_hotplug = 0; - set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); #endif diff --git a/include/xen/balloon.h b/include/xen/balloon.h index cc2e1a7e44ec..c8aee7a8b8d2 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -11,14 +11,11 @@ struct balloon_stats { /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; + unsigned long total_pages; unsigned long schedule_delay; unsigned long max_schedule_delay; unsigned long retry_count; unsigned long max_retry_count; -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG - unsigned long hotplug_pages; - unsigned long balloon_hotplug; -#endif }; extern struct balloon_stats balloon_stats; -- cgit v1.2.3 From b2ac6aa8f71bf57b948ee68cd913c350b932da88 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 12:10:28 +0100 Subject: xen/balloon: only hotplug additional memory if required Now that we track the total number of pages (included hotplugged regions), it is easy to determine if more memory needs to be hotplugged. Add a new BP_WAIT state to signal that the balloon process needs to wait until kicked by the memory add notifier (when the new section is onlined by userspace). Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- v3: - Return BP_WAIT if enough sections are already hotplugged. v2: - New BP_WAIT status after adding new memory sections. --- drivers/xen/balloon.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ac8054765127..ac6391bd8029 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -75,12 +75,14 @@ * balloon_process() state: * * BP_DONE: done or nothing to do, + * BP_WAIT: wait to be rescheduled, * BP_EAGAIN: error, go to sleep, * BP_ECANCELED: error, balloon operation canceled. */ enum bp_state { BP_DONE, + BP_WAIT, BP_EAGAIN, BP_ECANCELED }; @@ -167,6 +169,9 @@ static struct page *balloon_next_page(struct page *page) static enum bp_state update_schedule(enum bp_state state) { + if (state == BP_WAIT) + return BP_WAIT; + if (state == BP_ECANCELED) return BP_ECANCELED; @@ -231,12 +236,22 @@ static void release_memory_resource(struct resource *resource) kfree(resource); } -static enum bp_state reserve_additional_memory(long credit) +static enum bp_state reserve_additional_memory(void) { + long credit; struct resource *resource; int nid, rc; unsigned long balloon_hotplug; + credit = balloon_stats.target_pages - balloon_stats.total_pages; + + /* + * Already hotplugged enough pages? Wait for them to be + * onlined. + */ + if (credit <= 0) + return BP_WAIT; + balloon_hotplug = round_up(credit, PAGES_PER_SECTION); resource = additional_memory_resource(balloon_hotplug * PAGE_SIZE); @@ -276,7 +291,7 @@ static enum bp_state reserve_additional_memory(long credit) balloon_stats.total_pages += balloon_hotplug; - return BP_DONE; + return BP_WAIT; err: release_memory_resource(resource); return BP_ECANCELED; @@ -306,7 +321,7 @@ static struct notifier_block xen_memory_nb = { .priority = 0 }; #else -static enum bp_state reserve_additional_memory(long credit) +static enum bp_state reserve_additional_memory(void) { balloon_stats.target_pages = balloon_stats.current_pages; return BP_DONE; @@ -474,7 +489,7 @@ static void balloon_process(struct work_struct *work) if (balloon_is_inflated()) state = increase_reservation(credit); else - state = reserve_additional_memory(credit); + state = reserve_additional_memory(); } if (credit < 0) -- cgit v1.2.3 From 81b286e0f1fe520f2a96f736ffa7e508ac9139ba Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 13:12:46 +0100 Subject: xen/balloon: make alloc_xenballoon_pages() always allocate low pages All users of alloc_xenballoon_pages() wanted low memory pages, so remove the option for high memory. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- arch/x86/xen/grant-table.c | 2 +- drivers/xen/balloon.c | 21 ++++++++------------- drivers/xen/grant-table.c | 2 +- drivers/xen/privcmd.c | 2 +- drivers/xen/xenbus/xenbus_client.c | 3 +-- include/xen/balloon.h | 3 +-- 6 files changed, 13 insertions(+), 20 deletions(-) diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 1580e7a5a4cf..e079500b17f3 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c @@ -133,7 +133,7 @@ static int __init xlated_setup_gnttab_pages(void) kfree(pages); return -ENOMEM; } - rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); + rc = alloc_xenballooned_pages(nr_grant_frames, pages); if (rc) { pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, nr_grant_frames, rc); diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index ac6391bd8029..7ec933d505d2 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -136,17 +136,16 @@ static void balloon_append(struct page *page) } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ -static struct page *balloon_retrieve(bool prefer_highmem) +static struct page *balloon_retrieve(bool require_lowmem) { struct page *page; if (list_empty(&ballooned_pages)) return NULL; - if (prefer_highmem) - page = list_entry(ballooned_pages.prev, struct page, lru); - else - page = list_entry(ballooned_pages.next, struct page, lru); + page = list_entry(ballooned_pages.next, struct page, lru); + if (require_lowmem && PageHighMem(page)) + return NULL; list_del(&page->lru); if (PageHighMem(page)) @@ -521,24 +520,20 @@ EXPORT_SYMBOL_GPL(balloon_set_new_target); * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get * @pages: pages returned - * @highmem: allow highmem pages * @return 0 on success, error otherwise */ -int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem) +int alloc_xenballooned_pages(int nr_pages, struct page **pages) { int pgno = 0; struct page *page; mutex_lock(&balloon_mutex); while (pgno < nr_pages) { - page = balloon_retrieve(highmem); - if (page && (highmem || !PageHighMem(page))) { + page = balloon_retrieve(true); + if (page) { pages[pgno++] = page; } else { enum bp_state st; - if (page) - balloon_append(page); - st = decrease_reservation(nr_pages - pgno, - highmem ? GFP_HIGHUSER : GFP_USER); + st = decrease_reservation(nr_pages - pgno, GFP_USER); if (st != BP_DONE) goto out_undo; } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 62f591f8763c..a4b702c9ac68 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -687,7 +687,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages) int i; int ret; - ret = alloc_xenballooned_pages(nr_pages, pages, false); + ret = alloc_xenballooned_pages(nr_pages, pages); if (ret < 0) return ret; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5e9adac928e6..b199ad3d4587 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -401,7 +401,7 @@ static int alloc_empty_pages(struct vm_area_struct *vma, int numpgs) if (pages == NULL) return -ENOMEM; - rc = alloc_xenballooned_pages(numpgs, pages, 0); + rc = alloc_xenballooned_pages(numpgs, pages); if (rc != 0) { pr_warn("%s Could not alloc %d pfns rc:%d\n", __func__, numpgs, rc); diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 2ba09c1195c8..aa304d05101b 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -614,8 +614,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, if (!node) return -ENOMEM; - err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages, - false /* lowmem */); + err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages); if (err) goto out_err; diff --git a/include/xen/balloon.h b/include/xen/balloon.h index c8aee7a8b8d2..83efdeb243bf 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -22,8 +22,7 @@ extern struct balloon_stats balloon_stats; void balloon_set_new_target(unsigned long target); -int alloc_xenballooned_pages(int nr_pages, struct page **pages, - bool highmem); +int alloc_xenballooned_pages(int nr_pages, struct page **pages); void free_xenballooned_pages(int nr_pages, struct page **pages); struct device; -- cgit v1.2.3 From 1cf6a6c82918c9aad4bb73a7e7379a649e4d8e50 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 16:29:18 +0100 Subject: xen/balloon: use hotplugged pages for foreign mappings etc. alloc_xenballooned_pages() is used to get ballooned pages to back foreign mappings etc. Instead of having to balloon out real pages, use (if supported) hotplugged memory. This makes more memory available to the guest and reduces fragmentation in the p2m. This is only enabled if the xen.balloon.hotplug_unpopulated sysctl is set to 1. This sysctl defaults to 0 in case the udev rules to automatically online hotplugged memory do not exist. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- v3: - Add xen.balloon.hotplug_unpopulated sysctl to enable use of hotplug for unpopulated pages. --- drivers/xen/balloon.c | 90 +++++++++++++++++++++++++++++++++++++++++++++------ include/xen/balloon.h | 1 + 2 files changed, 81 insertions(+), 10 deletions(-) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 7ec933d505d2..25fd1bd949d8 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,46 @@ #include #include +static int xen_hotplug_unpopulated; + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + +static int zero; +static int one = 1; + +static struct ctl_table balloon_table[] = { + { + .procname = "hotplug_unpopulated", + .data = &xen_hotplug_unpopulated, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { } +}; + +static struct ctl_table balloon_root[] = { + { + .procname = "balloon", + .mode = 0555, + .child = balloon_table, + }, + { } +}; + +static struct ctl_table xen_root[] = { + { + .procname = "xen", + .mode = 0555, + .child = balloon_root, + }, + { } +}; + +#endif + /* * balloon_process() state: * @@ -99,6 +140,7 @@ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; /* List of ballooned pages, threaded through the mem_map array. */ static LIST_HEAD(ballooned_pages); +static DECLARE_WAIT_QUEUE_HEAD(balloon_wq); /* Main work function, always executed in process context. */ static void balloon_process(struct work_struct *work); @@ -127,6 +169,7 @@ static void __balloon_append(struct page *page) list_add(&page->lru, &ballooned_pages); balloon_stats.balloon_low++; } + wake_up(&balloon_wq); } static void balloon_append(struct page *page) @@ -242,7 +285,8 @@ static enum bp_state reserve_additional_memory(void) int nid, rc; unsigned long balloon_hotplug; - credit = balloon_stats.target_pages - balloon_stats.total_pages; + credit = balloon_stats.target_pages + balloon_stats.target_unpopulated + - balloon_stats.total_pages; /* * Already hotplugged enough pages? Wait for them to be @@ -323,7 +367,7 @@ static struct notifier_block xen_memory_nb = { static enum bp_state reserve_additional_memory(void) { balloon_stats.target_pages = balloon_stats.current_pages; - return BP_DONE; + return BP_ECANCELED; } #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ @@ -516,6 +560,28 @@ void balloon_set_new_target(unsigned long target) } EXPORT_SYMBOL_GPL(balloon_set_new_target); +static int add_ballooned_pages(int nr_pages) +{ + enum bp_state st; + + if (xen_hotplug_unpopulated) { + st = reserve_additional_memory(); + if (st != BP_ECANCELED) { + mutex_unlock(&balloon_mutex); + wait_event(balloon_wq, + !list_empty(&ballooned_pages)); + mutex_lock(&balloon_mutex); + return 0; + } + } + + st = decrease_reservation(nr_pages, GFP_USER); + if (st != BP_DONE) + return -ENOMEM; + + return 0; +} + /** * alloc_xenballooned_pages - get pages that have been ballooned out * @nr_pages: Number of pages to get @@ -526,27 +592,28 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages) { int pgno = 0; struct page *page; + int ret; + mutex_lock(&balloon_mutex); + + balloon_stats.target_unpopulated += nr_pages; + while (pgno < nr_pages) { page = balloon_retrieve(true); if (page) { pages[pgno++] = page; } else { - enum bp_state st; - st = decrease_reservation(nr_pages - pgno, GFP_USER); - if (st != BP_DONE) + ret = add_ballooned_pages(nr_pages - pgno); + if (ret < 0) goto out_undo; } } mutex_unlock(&balloon_mutex); return 0; out_undo: - while (pgno) - balloon_append(pages[--pgno]); - /* Free the memory back to the kernel soon */ - schedule_delayed_work(&balloon_worker, 0); mutex_unlock(&balloon_mutex); - return -ENOMEM; + free_xenballooned_pages(pgno, pages); + return ret; } EXPORT_SYMBOL(alloc_xenballooned_pages); @@ -566,6 +633,8 @@ void free_xenballooned_pages(int nr_pages, struct page **pages) balloon_append(pages[i]); } + balloon_stats.target_unpopulated -= nr_pages; + /* The balloon may be too large now. Shrink it if needed. */ if (current_credit()) schedule_delayed_work(&balloon_worker, 0); @@ -623,6 +692,7 @@ static int __init balloon_init(void) #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG set_online_page_callback(&xen_online_page); register_memory_notifier(&xen_memory_nb); + register_sysctl_table(xen_root); #endif /* diff --git a/include/xen/balloon.h b/include/xen/balloon.h index 83efdeb243bf..d1767dfb0d95 100644 --- a/include/xen/balloon.h +++ b/include/xen/balloon.h @@ -8,6 +8,7 @@ struct balloon_stats { /* We aim for 'current allocation' == 'target allocation'. */ unsigned long current_pages; unsigned long target_pages; + unsigned long target_unpopulated; /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; -- cgit v1.2.3 From 8edfcf882eb91ec9028c7334f90f6ef3db5b0fcf Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 22 Jul 2015 14:48:09 +0100 Subject: x86/xen: export xen_alloc_p2m_entry() Rename alloc_p2m() to xen_alloc_p2m_entry() and export it. This is useful for ensuring that a p2m entry is allocated (i.e., not a shared missing or identity entry) so that subsequent set_phys_to_machine() calls will require no further allocations. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- v3: - Make xen_alloc_p2m_entry() a nop on auto-xlate guests. --- arch/x86/include/asm/xen/page.h | 2 ++ arch/x86/xen/p2m.c | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 0679e11d2cf7..b922fa4bb4a1 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -43,6 +43,8 @@ extern unsigned long *xen_p2m_addr; extern unsigned long xen_p2m_size; extern unsigned long xen_max_p2m_pfn; +extern int xen_alloc_p2m_entry(unsigned long pfn); + extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 660b3cfef234..cab9f766bb06 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -530,7 +530,7 @@ static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) * the new pages are installed with cmpxchg; if we lose the race then * simply free the page we allocated and use the one that's there. */ -static bool alloc_p2m(unsigned long pfn) +int xen_alloc_p2m_entry(unsigned long pfn) { unsigned topidx; unsigned long *top_mfn_p, *mid_mfn; @@ -540,6 +540,9 @@ static bool alloc_p2m(unsigned long pfn) unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); unsigned long p2m_pfn; + if (xen_feature(XENFEAT_auto_translated_physmap)) + return 0; + ptep = lookup_address(addr, &level); BUG_ON(!ptep || level != PG_LEVEL_4K); pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); @@ -548,7 +551,7 @@ static bool alloc_p2m(unsigned long pfn) /* PMD level is missing, allocate a new one */ ptep = alloc_p2m_pmd(addr, pte_pg); if (!ptep) - return false; + return -ENOMEM; } if (p2m_top_mfn && pfn < MAX_P2M_PFN) { @@ -566,7 +569,7 @@ static bool alloc_p2m(unsigned long pfn) mid_mfn = alloc_p2m_page(); if (!mid_mfn) - return false; + return -ENOMEM; p2m_mid_mfn_init(mid_mfn, p2m_missing); @@ -592,7 +595,7 @@ static bool alloc_p2m(unsigned long pfn) p2m = alloc_p2m_page(); if (!p2m) - return false; + return -ENOMEM; if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) p2m_init(p2m); @@ -625,8 +628,9 @@ static bool alloc_p2m(unsigned long pfn) HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; } - return true; + return 0; } +EXPORT_SYMBOL(xen_alloc_p2m_entry); unsigned long __init set_phys_range_identity(unsigned long pfn_s, unsigned long pfn_e) @@ -688,7 +692,10 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { if (unlikely(!__set_phys_to_machine(pfn, mfn))) { - if (!alloc_p2m(pfn)) + int ret; + + ret = xen_alloc_p2m_entry(pfn); + if (ret < 0) return false; return __set_phys_to_machine(pfn, mfn); -- cgit v1.2.3 From 4a69c909deb0dd3cae653d14ac0ff52d5440a19c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Wed, 22 Jul 2015 14:50:37 +0100 Subject: xen/balloon: pre-allocate p2m entries for ballooned pages Pages returned by alloc_xenballooned_pages() will be used for grant mapping which will call set_phys_to_machine() (in PV guests). Ballooned pages are set as INVALID_P2M_ENTRY in the p2m and thus may be using the (shared) missing tables and a subsequent set_phys_to_machine() will need to allocate new tables. Since the grant mapping may be done from a context that cannot sleep, the p2m entries must already be allocated. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper --- drivers/xen/balloon.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 25fd1bd949d8..f56662324a47 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -602,6 +602,11 @@ int alloc_xenballooned_pages(int nr_pages, struct page **pages) page = balloon_retrieve(true); if (page) { pages[pgno++] = page; +#ifdef CONFIG_XEN_HAVE_PVMMU + ret = xen_alloc_p2m_entry(page_to_pfn(page)); + if (ret < 0) + goto out_undo; +#endif } else { ret = add_ballooned_pages(nr_pages - pgno); if (ret < 0) -- cgit v1.2.3 From a0f2e80fcd3b6b1369527828b02caab01af60c36 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 24 Jun 2015 18:03:14 +0100 Subject: net/xen-netback: xenvif_gop_frag_copy: move GSO check out of the loop The skb doesn't change within the function. Therefore it's only necessary to check if we need GSO once at the beginning. Signed-off-by: Julien Grall Acked-by: Wei Liu Signed-off-by: David Vrabel --- drivers/net/xen-netback/netback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index ec98d43916a8..c4e6c025d64d 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -288,6 +288,13 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb unsigned long bytes; int gso_type = XEN_NETIF_GSO_TYPE_NONE; + if (skb_is_gso(skb)) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) + gso_type = XEN_NETIF_GSO_TYPE_TCPV4; + else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + gso_type = XEN_NETIF_GSO_TYPE_TCPV6; + } + /* Data must not cross a page boundary. */ BUG_ON(size + offset > PAGE_SIZE<gso_type & SKB_GSO_TCPV4) - gso_type = XEN_NETIF_GSO_TYPE_TCPV4; - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) - gso_type = XEN_NETIF_GSO_TYPE_TCPV6; - } - if (*head && ((1 << gso_type) & queue->vif->gso_mask)) queue->rx.req_cons++; -- cgit v1.2.3 From 5031612b5eaf15cb6471c6e936a515090810c8f1 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 21 Jul 2015 17:57:20 +0100 Subject: arm/xen: Drop pte_mfn and mfn_pte They are not used in common code expect in one place in balloon.c which is only compiled when Linux is using PV MMU. It's not the case on ARM. Rather than worrying how to handle the 64KB case, drop them. Signed-off-by: Julien Grall Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/arm/include/asm/xen/page.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/include/asm/xen/page.h b/arch/arm/include/asm/xen/page.h index 127956353b00..98c9fc38b945 100644 --- a/arch/arm/include/asm/xen/page.h +++ b/arch/arm/include/asm/xen/page.h @@ -13,9 +13,6 @@ #define phys_to_machine_mapping_valid(pfn) (1) -#define pte_mfn pte_pfn -#define mfn_pte pfn_pte - /* Xen machine address */ typedef struct xmaddr { phys_addr_t maddr; -- cgit v1.2.3 From 1084b1988d22dc165c9dbbc2b0e057f9248ac4db Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 4 May 2015 15:47:16 +0100 Subject: xen: Add Xen specific page definition The Xen hypercall interface is always using 4K page granularity on ARM and x86 architecture. With the incoming support of 64K page granularity for ARM64 guest, it won't be possible to re-use the Linux page definition in Xen drivers. Introduce Xen page definition helpers based on the Linux page definition. They have exactly the same name but prefixed with XEN_/xen_ prefix. Also modify xen_page_to_gfn to use new Xen page definition. Signed-off-by: Julien Grall Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- include/xen/page.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/include/xen/page.h b/include/xen/page.h index 1daae485e336..96294ac93755 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -1,11 +1,36 @@ #ifndef _XEN_PAGE_H #define _XEN_PAGE_H +#include + +/* The hypercall interface supports only 4KB page */ +#define XEN_PAGE_SHIFT 12 +#define XEN_PAGE_SIZE (_AC(1, UL) << XEN_PAGE_SHIFT) +#define XEN_PAGE_MASK (~(XEN_PAGE_SIZE-1)) +#define xen_offset_in_page(p) ((unsigned long)(p) & ~XEN_PAGE_MASK) + +/* + * We assume that PAGE_SIZE is a multiple of XEN_PAGE_SIZE + * XXX: Add a BUILD_BUG_ON? + */ + +#define xen_pfn_to_page(xen_pfn) \ + ((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT))) +#define page_to_xen_pfn(page) \ + (((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT) + +#define XEN_PFN_PER_PAGE (PAGE_SIZE / XEN_PAGE_SIZE) + +#define XEN_PFN_DOWN(x) ((x) >> XEN_PAGE_SHIFT) +#define XEN_PFN_UP(x) (((x) + XEN_PAGE_SIZE-1) >> XEN_PAGE_SHIFT) +#define XEN_PFN_PHYS(x) ((phys_addr_t)(x) << XEN_PAGE_SHIFT) + #include +/* Return the GFN associated to the first 4KB of the page */ static inline unsigned long xen_page_to_gfn(struct page *page) { - return pfn_to_gfn(page_to_pfn(page)); + return pfn_to_gfn(page_to_xen_pfn(page)); } struct xen_memory_region { -- cgit v1.2.3 From 008c320a96d218712043f8db0111d5472697785c Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 19 Jun 2015 17:49:03 +0100 Subject: xen/grant: Introduce helpers to split a page into grant Currently, a grant is always based on the Xen page granularity (i.e 4KB). When Linux is using a different page granularity, a single page will be split between multiple grants. The new helpers will be in charge of splitting the Linux page into grants and call a function given by the caller on each grant. Also provide an helper to count the number of grants within a given contiguous region. Note that the x86/include/asm/xen/page.h is now including xen/interface/grant_table.h rather than xen/grant_table.h. It's necessary because xen/grant_table.h depends on asm/xen/page.h and will break the compilation. Furthermore, only definition in interface/grant_table.h is required. Signed-off-by: Julien Grall Reviewed-by: David Vrabel Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- arch/x86/include/asm/xen/page.h | 2 +- drivers/xen/grant-table.c | 26 +++++++++++++++++++++++++ include/xen/grant_table.h | 42 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index b922fa4bb4a1..fe58e3a935de 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -12,7 +12,7 @@ #include #include -#include +#include #include /* Xen machine address */ diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index a4b702c9ac68..dbeaa67dec47 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -776,6 +776,32 @@ void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count) } EXPORT_SYMBOL_GPL(gnttab_batch_copy); +void gnttab_foreach_grant_in_range(struct page *page, + unsigned int offset, + unsigned int len, + xen_grant_fn_t fn, + void *data) +{ + unsigned int goffset; + unsigned int glen; + unsigned long xen_pfn; + + len = min_t(unsigned int, PAGE_SIZE - offset, len); + goffset = xen_offset_in_page(offset); + + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset); + + while (len) { + glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len); + fn(pfn_to_gfn(xen_pfn), goffset, glen, data); + + goffset = 0; + xen_pfn++; + len -= glen; + } +} +EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range); + int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, struct gnttab_map_grant_ref *kmap_ops, struct page **pages, unsigned int count) diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 4478f4b4aae2..05b5b08c2afc 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -45,8 +45,10 @@ #include #include +#include #include #include +#include #define GNTTAB_RESERVED_XENSTORE 1 @@ -224,4 +226,44 @@ static inline struct xen_page_foreign *xen_page_foreign(struct page *page) #endif } +/* Split Linux page in chunk of the size of the grant and call fn + * + * Parameters of fn: + * gfn: guest frame number + * offset: offset in the grant + * len: length of the data in the grant. + * data: internal information + */ +typedef void (*xen_grant_fn_t)(unsigned long gfn, unsigned int offset, + unsigned int len, void *data); + +void gnttab_foreach_grant_in_range(struct page *page, + unsigned int offset, + unsigned int len, + xen_grant_fn_t fn, + void *data); + +/* Helper to get to call fn only on the first "grant chunk" */ +static inline void gnttab_for_one_grant(struct page *page, unsigned int offset, + unsigned len, xen_grant_fn_t fn, + void *data) +{ + /* The first request is limited to the size of one grant */ + len = min_t(unsigned int, XEN_PAGE_SIZE - (offset & ~XEN_PAGE_MASK), + len); + + gnttab_foreach_grant_in_range(page, offset, len, fn, data); +} + +/* Get the number of grant in a specified region + * + * start: Offset from the beginning of the first page + * len: total length of data (can cross multiple page) + */ +static inline unsigned int gnttab_count_grant(unsigned int start, + unsigned int len) +{ + return XEN_PFN_UP(xen_offset_in_page(start) + len); +} + #endif /* __ASM_GNTTAB_H__ */ -- cgit v1.2.3 From 3922f32c1e6db2e096ff095a5b8af0b940b97508 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 19 Jun 2015 18:05:06 +0100 Subject: xen/grant: Add helper gnttab_page_grant_foreign_access_ref_one Many PV drivers contain the idiom: pfn = page_to_gfn(...) /* Or similar */ gnttab_grant_foreign_access_ref Replace it by a new helper. Note that when Linux is using a different page granularity than Xen, the helper only gives access to the first 4KB grant. This is useful where drivers are allocating a full Linux page for each grant. Also include xen/interface/grant_table.h rather than xen/grant_table.h in asm/page.h for x86 to fix a compilation issue [1]. Only the former is useful in order to get the structure definition. [1] Interdependency between asm/page.h and xen/grant_table.h which result to page_mfn not being defined when necessary. Signed-off-by: Julien Grall Reviewed-by: David Vrabel Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- include/xen/grant_table.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 05b5b08c2afc..e17a4b381a16 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -131,6 +131,15 @@ void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly); +/* Give access to the first 4K of the page */ +static inline void gnttab_page_grant_foreign_access_ref_one( + grant_ref_t ref, domid_t domid, + struct page *page, int readonly) +{ + gnttab_grant_foreign_access_ref(ref, domid, xen_page_to_gfn(page), + readonly); +} + void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, unsigned long pfn); -- cgit v1.2.3 From 33204663ef85d9ce79009b2246afe6a2fef8eb1b Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Mon, 29 Jun 2015 17:35:24 +0100 Subject: block/xen-blkfront: Split blkif_queue_request in 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, blkif_queue_request has 2 distinct execution path: - Send a discard request - Send a read/write request The function is also allocating grants to use for generating the request. Although, this is only used for read/write request. Rather than having a function with 2 distinct execution path, separate the function in 2. This will also remove one level of tabulation. Signed-off-by: Julien Grall Reviewed-by: Roger Pau Monné Signed-off-by: David Vrabel --- drivers/block/xen-blkfront.c | 277 ++++++++++++++++++++++++------------------- 1 file changed, 153 insertions(+), 124 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 611170896b8c..bf9ccfe03f45 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -394,13 +394,35 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode, return 0; } -/* - * Generate a Xen blkfront IO request from a blk layer request. Reads - * and writes are handled as expected. - * - * @req: a request struct - */ -static int blkif_queue_request(struct request *req) +static int blkif_queue_discard_req(struct request *req) +{ + struct blkfront_info *info = req->rq_disk->private_data; + struct blkif_request *ring_req; + unsigned long id; + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt); + id = get_id_from_freelist(info); + info->shadow[id].request = req; + + ring_req->operation = BLKIF_OP_DISCARD; + ring_req->u.discard.nr_sectors = blk_rq_sectors(req); + ring_req->u.discard.id = id; + ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); + if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) + ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; + else + ring_req->u.discard.flag = 0; + + info->ring.req_prod_pvt++; + + /* Keep a private copy so we can reissue requests when recovering. */ + info->shadow[id].req = *ring_req; + + return 0; +} + +static int blkif_queue_rw_req(struct request *req) { struct blkfront_info *info = req->rq_disk->private_data; struct blkif_request *ring_req; @@ -420,9 +442,6 @@ static int blkif_queue_request(struct request *req) struct scatterlist *sg; int nseg, max_grefs; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) - return 1; - max_grefs = req->nr_phys_segments; if (max_grefs > BLKIF_MAX_SEGMENTS_PER_REQUEST) /* @@ -452,139 +471,131 @@ static int blkif_queue_request(struct request *req) id = get_id_from_freelist(info); info->shadow[id].request = req; - if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) { - ring_req->operation = BLKIF_OP_DISCARD; - ring_req->u.discard.nr_sectors = blk_rq_sectors(req); - ring_req->u.discard.id = id; - ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req); - if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard) - ring_req->u.discard.flag = BLKIF_DISCARD_SECURE; - else - ring_req->u.discard.flag = 0; + BUG_ON(info->max_indirect_segments == 0 && + req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); + BUG_ON(info->max_indirect_segments && + req->nr_phys_segments > info->max_indirect_segments); + nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); + ring_req->u.rw.id = id; + if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { + /* + * The indirect operation can only be a BLKIF_OP_READ or + * BLKIF_OP_WRITE + */ + BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); + ring_req->operation = BLKIF_OP_INDIRECT; + ring_req->u.indirect.indirect_op = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.indirect.handle = info->handle; + ring_req->u.indirect.nr_segments = nseg; } else { - BUG_ON(info->max_indirect_segments == 0 && - req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - BUG_ON(info->max_indirect_segments && - req->nr_phys_segments > info->max_indirect_segments); - nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg); - ring_req->u.rw.id = id; - if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) { + ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); + ring_req->u.rw.handle = info->handle; + ring_req->operation = rq_data_dir(req) ? + BLKIF_OP_WRITE : BLKIF_OP_READ; + if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { /* - * The indirect operation can only be a BLKIF_OP_READ or - * BLKIF_OP_WRITE + * Ideally we can do an unordered flush-to-disk. + * In case the backend onlysupports barriers, use that. + * A barrier request a superset of FUA, so we can + * implement it the same way. (It's also a FLUSH+FUA, + * since it is guaranteed ordered WRT previous writes.) */ - BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA)); - ring_req->operation = BLKIF_OP_INDIRECT; - ring_req->u.indirect.indirect_op = rq_data_dir(req) ? - BLKIF_OP_WRITE : BLKIF_OP_READ; - ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req); - ring_req->u.indirect.handle = info->handle; - ring_req->u.indirect.nr_segments = nseg; - } else { - ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req); - ring_req->u.rw.handle = info->handle; - ring_req->operation = rq_data_dir(req) ? - BLKIF_OP_WRITE : BLKIF_OP_READ; - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) { - /* - * Ideally we can do an unordered flush-to-disk. In case the - * backend onlysupports barriers, use that. A barrier request - * a superset of FUA, so we can implement it the same - * way. (It's also a FLUSH+FUA, since it is - * guaranteed ordered WRT previous writes.) - */ - switch (info->feature_flush & - ((REQ_FLUSH|REQ_FUA))) { - case REQ_FLUSH|REQ_FUA: - ring_req->operation = - BLKIF_OP_WRITE_BARRIER; - break; - case REQ_FLUSH: - ring_req->operation = - BLKIF_OP_FLUSH_DISKCACHE; - break; - default: - ring_req->operation = 0; - } + switch (info->feature_flush & + ((REQ_FLUSH|REQ_FUA))) { + case REQ_FLUSH|REQ_FUA: + ring_req->operation = + BLKIF_OP_WRITE_BARRIER; + break; + case REQ_FLUSH: + ring_req->operation = + BLKIF_OP_FLUSH_DISKCACHE; + break; + default: + ring_req->operation = 0; } - ring_req->u.rw.nr_segments = nseg; } - for_each_sg(info->shadow[id].sg, sg, nseg, i) { - fsect = sg->offset >> 9; - lsect = fsect + (sg->length >> 9) - 1; + ring_req->u.rw.nr_segments = nseg; + } + for_each_sg(info->shadow[id].sg, sg, nseg, i) { + fsect = sg->offset >> 9; + lsect = fsect + (sg->length >> 9) - 1; - if ((ring_req->operation == BLKIF_OP_INDIRECT) && - (i % SEGS_PER_INDIRECT_FRAME == 0)) { - unsigned long uninitialized_var(pfn); + if ((ring_req->operation == BLKIF_OP_INDIRECT) && + (i % SEGS_PER_INDIRECT_FRAME == 0)) { + unsigned long uninitialized_var(pfn); - if (segments) - kunmap_atomic(segments); + if (segments) + kunmap_atomic(segments); - n = i / SEGS_PER_INDIRECT_FRAME; - if (!info->feature_persistent) { - struct page *indirect_page; - - /* Fetch a pre-allocated page to use for indirect grefs */ - BUG_ON(list_empty(&info->indirect_pages)); - indirect_page = list_first_entry(&info->indirect_pages, - struct page, lru); - list_del(&indirect_page->lru); - pfn = page_to_pfn(indirect_page); - } - gnt_list_entry = get_grant(&gref_head, pfn, info); - info->shadow[id].indirect_grants[n] = gnt_list_entry; - segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); - ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; + n = i / SEGS_PER_INDIRECT_FRAME; + if (!info->feature_persistent) { + struct page *indirect_page; + + /* + * Fetch a pre-allocated page to use for + * indirect grefs + */ + BUG_ON(list_empty(&info->indirect_pages)); + indirect_page = list_first_entry(&info->indirect_pages, + struct page, lru); + list_del(&indirect_page->lru); + pfn = page_to_pfn(indirect_page); } + gnt_list_entry = get_grant(&gref_head, pfn, info); + info->shadow[id].indirect_grants[n] = gnt_list_entry; + segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); + ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; + } - gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info); - ref = gnt_list_entry->gref; + gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info); + ref = gnt_list_entry->gref; - info->shadow[id].grants_used[i] = gnt_list_entry; + info->shadow[id].grants_used[i] = gnt_list_entry; - if (rq_data_dir(req) && info->feature_persistent) { - char *bvec_data; - void *shared_data; + if (rq_data_dir(req) && info->feature_persistent) { + char *bvec_data; + void *shared_data; - BUG_ON(sg->offset + sg->length > PAGE_SIZE); + BUG_ON(sg->offset + sg->length > PAGE_SIZE); - shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); - bvec_data = kmap_atomic(sg_page(sg)); + shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); + bvec_data = kmap_atomic(sg_page(sg)); - /* - * this does not wipe data stored outside the - * range sg->offset..sg->offset+sg->length. - * Therefore, blkback *could* see data from - * previous requests. This is OK as long as - * persistent grants are shared with just one - * domain. It may need refactoring if this - * changes - */ - memcpy(shared_data + sg->offset, - bvec_data + sg->offset, - sg->length); + /* + * this does not wipe data stored outside the + * range sg->offset..sg->offset+sg->length. + * Therefore, blkback *could* see data from + * previous requests. This is OK as long as + * persistent grants are shared with just one + * domain. It may need refactoring if this + * changes + */ + memcpy(shared_data + sg->offset, + bvec_data + sg->offset, + sg->length); - kunmap_atomic(bvec_data); - kunmap_atomic(shared_data); - } - if (ring_req->operation != BLKIF_OP_INDIRECT) { - ring_req->u.rw.seg[i] = - (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; - } else { - n = i % SEGS_PER_INDIRECT_FRAME; - segments[n] = + kunmap_atomic(bvec_data); + kunmap_atomic(shared_data); + } + if (ring_req->operation != BLKIF_OP_INDIRECT) { + ring_req->u.rw.seg[i] = (struct blkif_request_segment) { - .gref = ref, - .first_sect = fsect, - .last_sect = lsect }; - } + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + } else { + n = i % SEGS_PER_INDIRECT_FRAME; + segments[n] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; } - if (segments) - kunmap_atomic(segments); } + if (segments) + kunmap_atomic(segments); info->ring.req_prod_pvt++; @@ -597,6 +608,24 @@ static int blkif_queue_request(struct request *req) return 0; } +/* + * Generate a Xen blkfront IO request from a blk layer request. Reads + * and writes are handled as expected. + * + * @req: a request struct + */ +static int blkif_queue_request(struct request *req) +{ + struct blkfront_info *info = req->rq_disk->private_data; + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + return 1; + + if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) + return blkif_queue_discard_req(req); + else + return blkif_queue_rw_req(req); +} static inline void flush_requests(struct blkfront_info *info) { -- cgit v1.2.3 From a7a6df222351de23791bb64165f14c21ff4d1653 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Tue, 30 Jun 2015 11:58:51 +0100 Subject: block/xen-blkfront: Store a page rather a pfn in the grant structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All the usage of the field pfn are done using the same idiom: pfn_to_page(grant->pfn) This will return always the same page. Store directly the page in the grant to clean up the code. Signed-off-by: Julien Grall Acked-by: Roger Pau Monné Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/block/xen-blkfront.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index bf9ccfe03f45..7552ca8f8ffe 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -68,7 +68,7 @@ enum blkif_state { struct grant { grant_ref_t gref; - unsigned long pfn; + struct page *page; struct list_head node; }; @@ -221,7 +221,7 @@ static int fill_grant_buffer(struct blkfront_info *info, int num) kfree(gnt_list_entry); goto out_of_memory; } - gnt_list_entry->pfn = page_to_pfn(granted_page); + gnt_list_entry->page = granted_page; } gnt_list_entry->gref = GRANT_INVALID_REF; @@ -236,7 +236,7 @@ out_of_memory: &info->grants, node) { list_del(&gnt_list_entry->node); if (info->feature_persistent) - __free_page(pfn_to_page(gnt_list_entry->pfn)); + __free_page(gnt_list_entry->page); kfree(gnt_list_entry); i--; } @@ -245,8 +245,8 @@ out_of_memory: } static struct grant *get_grant(grant_ref_t *gref_head, - unsigned long pfn, - struct blkfront_info *info) + struct page *page, + struct blkfront_info *info) { struct grant *gnt_list_entry; unsigned long buffer_gfn; @@ -265,10 +265,10 @@ static struct grant *get_grant(grant_ref_t *gref_head, gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head); BUG_ON(gnt_list_entry->gref == -ENOSPC); if (!info->feature_persistent) { - BUG_ON(!pfn); - gnt_list_entry->pfn = pfn; + BUG_ON(!page); + gnt_list_entry->page = page; } - buffer_gfn = pfn_to_gfn(gnt_list_entry->pfn); + buffer_gfn = xen_page_to_gfn(gnt_list_entry->page); gnttab_grant_foreign_access_ref(gnt_list_entry->gref, info->xbdev->otherend_id, buffer_gfn, 0); @@ -524,7 +524,7 @@ static int blkif_queue_rw_req(struct request *req) if ((ring_req->operation == BLKIF_OP_INDIRECT) && (i % SEGS_PER_INDIRECT_FRAME == 0)) { - unsigned long uninitialized_var(pfn); + struct page *uninitialized_var(page); if (segments) kunmap_atomic(segments); @@ -541,15 +541,15 @@ static int blkif_queue_rw_req(struct request *req) indirect_page = list_first_entry(&info->indirect_pages, struct page, lru); list_del(&indirect_page->lru); - pfn = page_to_pfn(indirect_page); + page = indirect_page; } - gnt_list_entry = get_grant(&gref_head, pfn, info); + gnt_list_entry = get_grant(&gref_head, page, info); info->shadow[id].indirect_grants[n] = gnt_list_entry; - segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); + segments = kmap_atomic(gnt_list_entry->page); ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref; } - gnt_list_entry = get_grant(&gref_head, page_to_pfn(sg_page(sg)), info); + gnt_list_entry = get_grant(&gref_head, sg_page(sg), info); ref = gnt_list_entry->gref; info->shadow[id].grants_used[i] = gnt_list_entry; @@ -560,7 +560,7 @@ static int blkif_queue_rw_req(struct request *req) BUG_ON(sg->offset + sg->length > PAGE_SIZE); - shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn)); + shared_data = kmap_atomic(gnt_list_entry->page); bvec_data = kmap_atomic(sg_page(sg)); /* @@ -1001,7 +1001,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) info->persistent_gnts_c--; } if (info->feature_persistent) - __free_page(pfn_to_page(persistent_gnt->pfn)); + __free_page(persistent_gnt->page); kfree(persistent_gnt); } } @@ -1036,7 +1036,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) persistent_gnt = info->shadow[i].grants_used[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); if (info->feature_persistent) - __free_page(pfn_to_page(persistent_gnt->pfn)); + __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1050,7 +1050,7 @@ static void blkif_free(struct blkfront_info *info, int suspend) for (j = 0; j < INDIRECT_GREFS(segs); j++) { persistent_gnt = info->shadow[i].indirect_grants[j]; gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); - __free_page(pfn_to_page(persistent_gnt->pfn)); + __free_page(persistent_gnt->page); kfree(persistent_gnt); } @@ -1101,8 +1101,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, if (bret->operation == BLKIF_OP_READ && info->feature_persistent) { for_each_sg(s->sg, sg, nseg, i) { BUG_ON(sg->offset + sg->length > PAGE_SIZE); - shared_data = kmap_atomic( - pfn_to_page(s->grants_used[i]->pfn)); + shared_data = kmap_atomic(s->grants_used[i]->page); bvec_data = kmap_atomic(sg_page(sg)); memcpy(bvec_data + sg->offset, shared_data + sg->offset, @@ -1154,7 +1153,7 @@ static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, * available pages for indirect grefs. */ if (!info->feature_persistent) { - indirect_page = pfn_to_page(s->indirect_grants[i]->pfn); + indirect_page = s->indirect_grants[i]->page; list_add(&indirect_page->lru, &info->indirect_pages); } s->indirect_grants[i]->gref = GRANT_INVALID_REF; -- cgit v1.2.3 From 4f503fbdf319e4411aa48852b8922c93a9cc0c5d Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Wed, 1 Jul 2015 14:10:38 +0100 Subject: block/xen-blkfront: split get_grant in 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare the code to support 64KB page granularity. The first implementation will use a full Linux page per indirect and p