From 144c61ad355a4f31a45a2cd91c808020c4322374 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 23 Jan 2015 08:37:01 +0000 Subject: xen/tmem: mark xen_tmem_init() __init As a module_init() function, this should have been this way from the beginning. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel --- drivers/xen/tmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index 83b5c53bec6b..8a65423bc696 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -374,7 +374,7 @@ static struct frontswap_ops tmem_frontswap_ops = { }; #endif -static int xen_tmem_init(void) +static int __init xen_tmem_init(void) { if (!xen_domain()) return 0; -- cgit v1.2.3 From 853d0289340026b30f93fd0e768340221d4e605c Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 5 Jan 2015 14:13:41 +0000 Subject: xen/grant-table: pre-populate kernel unmap ops for xen_gnttab_unmap_refs() When unmapping grants, instead of converting the kernel map ops to unmap ops on the fly, pre-populate the set of unmap ops. This allows the grant unmap for the kernel mappings to be trivially batched in the future. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 20 ++++++++++++++------ drivers/xen/grant-table.c | 4 ++-- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 073b4a19a8b0..6444172f2842 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -91,6 +91,7 @@ struct grant_map { struct gnttab_map_grant_ref *map_ops; struct gnttab_unmap_grant_ref *unmap_ops; struct gnttab_map_grant_ref *kmap_ops; + struct gnttab_unmap_grant_ref *kunmap_ops; struct page **pages; }; @@ -124,6 +125,7 @@ static void gntdev_free_map(struct grant_map *map) kfree(map->map_ops); kfree(map->unmap_ops); kfree(map->kmap_ops); + kfree(map->kunmap_ops); kfree(map); } @@ -140,11 +142,13 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->map_ops = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL); add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL); add->kmap_ops = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL); + add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL); add->pages = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL); if (NULL == add->grants || NULL == add->map_ops || NULL == add->unmap_ops || NULL == add->kmap_ops || + NULL == add->kunmap_ops || NULL == add->pages) goto err; @@ -155,6 +159,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) add->map_ops[i].handle = -1; add->unmap_ops[i].handle = -1; add->kmap_ops[i].handle = -1; + add->kunmap_ops[i].handle = -1; } add->index = 0; @@ -280,6 +285,8 @@ static int map_grant_pages(struct grant_map *map) map->flags | GNTMAP_host_map, map->grants[i].ref, map->grants[i].domid); + gnttab_set_unmap_op(&map->kunmap_ops[i], address, + map->flags | GNTMAP_host_map, -1); } } @@ -290,13 +297,14 @@ static int map_grant_pages(struct grant_map *map) return err; for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status) + if (map->map_ops[i].status) { err = -EINVAL; - else { - BUG_ON(map->map_ops[i].handle == -1); - map->unmap_ops[i].handle = map->map_ops[i].handle; - pr_debug("map handle=%d\n", map->map_ops[i].handle); + continue; } + + map->unmap_ops[i].handle = map->map_ops[i].handle; + if (use_ptemod) + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; } return err; } @@ -316,7 +324,7 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset, + use_ptemod ? map->kunmap_ops + offset : NULL, map->pages + offset, pages); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 7786291ba229..999d7abdbcec 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -738,7 +738,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct gnttab_map_grant_ref *kmap_ops, + struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { int ret; @@ -747,7 +747,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, if (ret) return ret; - return clear_foreign_p2m_mapping(unmap_ops, kmap_ops, pages, count); + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); -- cgit v1.2.3 From 0bb599fd30108883b00c7d4a226eeb49111e6932 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 5 Jan 2015 17:06:01 +0000 Subject: xen: remove scratch frames for ballooned pages and m2p override The scratch frame mappings for ballooned pages and the m2p override are broken. Remove them in preparation for replacing them with simpler mechanisms that works. The scratch pages did not ensure that the page was not in use. In particular, the foreign page could still be in use by hardware. If the guest reused the frame the hardware could read or write that frame. The m2p override did not handle the same frame being granted by two different grant references. Trying an M2P override lookup in this case is impossible. With the m2p override removed, the grant map/unmap for the kernel mappings (for x86 PV) can be easily batched in set_foreign_p2m_mapping() and clear_foreign_p2m_mapping(). Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/balloon.c | 86 ++------------------------------------------------- 1 file changed, 2 insertions(+), 84 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 3860d02729dc..0b52d92cb2e5 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -92,7 +92,6 @@ EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static xen_pfn_t frame_list[PAGE_SIZE / sizeof(unsigned long)]; -static DEFINE_PER_CPU(struct page *, balloon_scratch_page); /* List of ballooned pages, threaded through the mem_map array. */ @@ -423,22 +422,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) page = pfn_to_page(pfn); #ifdef CONFIG_XEN_HAVE_PVMMU - /* - * Ballooned out frames are effectively replaced with - * a scratch frame. Ensure direct mappings and the - * p2m are consistent. - */ if (!xen_feature(XENFEAT_auto_translated_physmap)) { if (!PageHighMem(page)) { - struct page *scratch_page = get_balloon_scratch_page(); - ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), - pfn_pte(page_to_pfn(scratch_page), - PAGE_KERNEL_RO), 0); + __pte_ma(0), 0); BUG_ON(ret); - - put_balloon_scratch_page(); } __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } @@ -500,18 +489,6 @@ static void balloon_process(struct work_struct *work) mutex_unlock(&balloon_mutex); } -struct page *get_balloon_scratch_page(void) -{ - struct page *ret = get_cpu_var(balloon_scratch_page); - BUG_ON(ret == NULL); - return ret; -} - -void put_balloon_scratch_page(void) -{ - put_cpu_var(balloon_scratch_page); -} - /* Resets the Xen limit, sets new target, and kicks off processing. */ void balloon_set_new_target(unsigned long target) { @@ -605,61 +582,13 @@ static void __init balloon_add_region(unsigned long start_pfn, } } -static int alloc_balloon_scratch_page(int cpu) -{ - if (per_cpu(balloon_scratch_page, cpu) != NULL) - return 0; - - per_cpu(balloon_scratch_page, cpu) = alloc_page(GFP_KERNEL); - if (per_cpu(balloon_scratch_page, cpu) == NULL) { - pr_warn("Failed to allocate balloon_scratch_page for cpu %d\n", cpu); - return -ENOMEM; - } - - return 0; -} - - -static int balloon_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - switch (action) { - case CPU_UP_PREPARE: - if (alloc_balloon_scratch_page(cpu)) - return NOTIFY_BAD; - break; - default: - break; - } - return NOTIFY_OK; -} - -static struct notifier_block balloon_cpu_notifier = { - .notifier_call = balloon_cpu_notify, -}; - static int __init balloon_init(void) { - int i, cpu; + int i; if (!xen_domain()) return -ENODEV; - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - register_cpu_notifier(&balloon_cpu_notifier); - - get_online_cpus(); - for_each_online_cpu(cpu) { - if (alloc_balloon_scratch_page(cpu)) { - put_online_cpus(); - unregister_cpu_notifier(&balloon_cpu_notifier); - return -ENOMEM; - } - } - put_online_cpus(); - } - pr_info("Initialising balloon driver\n"); balloon_stats.current_pages = xen_pv_domain() @@ -696,15 +625,4 @@ static int __init balloon_init(void) subsys_initcall(balloon_init); -static int __init balloon_clear(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - per_cpu(balloon_scratch_page, cpu) = NULL; - - return 0; -} -early_initcall(balloon_clear); - MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 0ae65f49af64d68f0daca37b83383115cae5e690 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Wed, 24 Dec 2014 14:03:16 +0000 Subject: x86/xen: require ballooned pages for grant maps Ballooned pages are always used for grant maps which means the original frame does not need to be saved in page->index nor restored after the grant unmap. This allows the workaround in netback for the conflicting use of the (unionized) page->index and page->pfmemalloc to be removed. Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/net/xen-netback/netback.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 908e65e9b821..64413189ad06 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1241,12 +1241,6 @@ static void xenvif_fill_frags(struct xenvif_queue *queue, struct sk_buff *skb) /* Take an extra reference to offset network stack's put_page */ get_page(queue->mmap_pages[pending_idx]); } - /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc - * overlaps with "index", and "mapping" is not set. I think mapping - * should be set. If delivered to local stack, it would drop this - * skb in sk_filter unless the socket has the right to use it. - */ - skb->pfmemalloc = false; } static int xenvif_get_extras(struct xenvif_queue *queue, -- cgit v1.2.3 From ff4b156f166b3931894d2a8b5cdba6cdf4da0618 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 8 Jan 2015 18:06:01 +0000 Subject: xen/grant-table: add helpers for allocating pages Add gnttab_alloc_pages() and gnttab_free_pages() to allocate/free pages suitable to for granted maps. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/block/xen-blkback/blkback.c | 8 ++++---- drivers/net/xen-netback/interface.c | 7 +++---- drivers/xen/gntdev.c | 4 ++-- drivers/xen/grant-table.c | 29 +++++++++++++++++++++++++++++ drivers/xen/xen-scsiback.c | 6 +++--- 5 files changed, 41 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 63fc7f06a014..908e630240bd 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -100,7 +100,7 @@ module_param(log_stats, int, 0644); #define BLKBACK_INVALID_HANDLE (~0) -/* Number of free pages to remove on each call to free_xenballooned_pages */ +/* Number of free pages to remove on each call to gnttab_free_pages */ #define NUM_BATCH_FREE_PAGES 10 static inline int get_free_page(struct xen_blkif *blkif, struct page **page) @@ -111,7 +111,7 @@ static inline int get_free_page(struct xen_blkif *blkif, struct page **page) if (list_empty(&blkif->free_pages)) { BUG_ON(blkif->free_pages_num != 0); spin_unlock_irqrestore(&blkif->free_pages_lock, flags); - return alloc_xenballooned_pages(1, page, false); + return gnttab_alloc_pages(1, page); } BUG_ON(blkif->free_pages_num == 0); page[0] = list_first_entry(&blkif->free_pages, struct page, lru); @@ -151,14 +151,14 @@ static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num) blkif->free_pages_num--; if (++num_pages == NUM_BATCH_FREE_PAGES) { spin_unlock_irqrestore(&blkif->free_pages_lock, flags); - free_xenballooned_pages(num_pages, page); + gnttab_free_pages(num_pages, page); spin_lock_irqsave(&blkif->free_pages_lock, flags); num_pages = 0; } } spin_unlock_irqrestore(&blkif->free_pages_lock, flags); if (num_pages != 0) - free_xenballooned_pages(num_pages, page); + gnttab_free_pages(num_pages, page); } #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page))) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 9259a732e8a4..2e07f8433412 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -483,9 +483,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) * better enable it. The long term solution would be to use just a * bunch of valid page descriptors, without dependency on ballooning */ - err = alloc_xenballooned_pages(MAX_PENDING_REQS, - queue->mmap_pages, - false); + err = gnttab_alloc_pages(MAX_PENDING_REQS, + queue->mmap_pages); if (err) { netdev_err(queue->vif->dev, "Could not reserve mmap_pages\n"); return -ENOMEM; @@ -662,7 +661,7 @@ void xenvif_disconnect(struct xenvif *vif) */ void xenvif_deinit_queue(struct xenvif_queue *queue) { - free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); + gnttab_free_pages(MAX_PENDING_REQS, queue->mmap_pages); } void xenvif_free(struct xenvif *vif) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 6444172f2842..8cc3f069a10f 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -119,7 +119,7 @@ static void gntdev_free_map(struct grant_map *map) return; if (map->pages) - free_xenballooned_pages(map->count, map->pages); + gnttab_free_pages(map->count, map->pages); kfree(map->pages); kfree(map->grants); kfree(map->map_ops); @@ -152,7 +152,7 @@ static struct grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count) NULL == add->pages) goto err; - if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */)) + if (gnttab_alloc_pages(count, add->pages)) goto err; for (i = 0; i < count; i++) { diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 999d7abdbcec..b4f93c490f83 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -671,6 +672,34 @@ void gnttab_free_auto_xlat_frames(void) } EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); +/** + * gnttab_alloc_pages - alloc pages suitable for grant mapping into + * @nr_pages: number of pages to alloc + * @pages: returns the pages + */ +int gnttab_alloc_pages(int nr_pages, struct page **pages) +{ + int ret; + + ret = alloc_xenballooned_pages(nr_pages, pages, false); + if (ret < 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(gnttab_alloc_pages); + +/** + * gnttab_free_pages - free pages allocated by gnttab_alloc_pages() + * @nr_pages; number of pages to free + * @pages: the pages + */ +void gnttab_free_pages(int nr_pages, struct page **pages) +{ + free_xenballooned_pages(nr_pages, pages); +} +EXPORT_SYMBOL(gnttab_free_pages); + /* Handling of paged out grant targets (GNTST_eagain) */ #define MAX_DELAY 256 static inline void diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index e999496eda3e..ecd540a7a562 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -227,7 +227,7 @@ static void put_free_pages(struct page **page, int num) return; if (i > scsiback_max_buffer_pages) { n = min(num, i - scsiback_max_buffer_pages); - free_xenballooned_pages(n, page + num - n); + gnttab_free_pages(n, page + num - n); n = num - n; } spin_lock_irqsave(&free_pages_lock, flags); @@ -244,7 +244,7 @@ static int get_free_page(struct page **page) spin_lock_irqsave(&free_pages_lock, flags); if (list_empty(&scsiback_free_pages)) { spin_unlock_irqrestore(&free_pages_lock, flags); - return alloc_xenballooned_pages(1, page, false); + return gnttab_alloc_pages(1, page); } page[0] = list_first_entry(&scsiback_free_pages, struct page, lru); list_del(&page[0]->lru); @@ -2106,7 +2106,7 @@ static void __exit scsiback_exit(void) while (free_pages_num) { if (get_free_page(&page)) BUG(); - free_xenballooned_pages(1, &page); + gnttab_free_pages(1, &page); } scsiback_deregister_configfs(); xenbus_unregister_driver(&scsiback_driver); -- cgit v1.2.3 From 8da7633f168b5428e2cfb7342408b2c44088f5df Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Wed, 24 Dec 2014 14:17:06 +0000 Subject: xen: mark grant mapped pages as foreign Use the "foreign" page flag to mark pages that have a grant map. Use page->private to store information of the grant (the granting domain and the grant reference). Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/xen/grant-table.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index b4f93c490f83..89dcca448bb6 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -679,12 +679,27 @@ EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames); */ int gnttab_alloc_pages(int nr_pages, struct page **pages) { + int i; int ret; ret = alloc_xenballooned_pages(nr_pages, pages, false); if (ret < 0) return ret; + for (i = 0; i < nr_pages; i++) { +#if BITS_PER_LONG < 64 + struct xen_page_foreign *foreign; + + foreign = kzalloc(sizeof(*foreign), GFP_KERNEL); + if (!foreign) { + gnttab_free_pages(nr_pages, pages); + return -ENOMEM; + } + set_page_private(pages[i], (unsigned long)foreign); +#endif + SetPagePrivate(pages[i]); + } + return 0; } EXPORT_SYMBOL(gnttab_alloc_pages); @@ -696,6 +711,16 @@ EXPORT_SYMBOL(gnttab_alloc_pages); */ void gnttab_free_pages(int nr_pages, struct page **pages) { + int i; + + for (i = 0; i < nr_pages; i++) { + if (PagePrivate(pages[i])) { +#if BITS_PER_LONG < 64 + kfree((void *)page_private(pages[i])); +#endif + ClearPagePrivate(pages[i]); + } + } free_xenballooned_pages(nr_pages, pages); } EXPORT_SYMBOL(gnttab_free_pages); @@ -756,12 +781,22 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, if (ret) return ret; - /* Retry eagain maps */ - for (i = 0; i < count; i++) + for (i = 0; i < count; i++) { + /* Retry eagain maps */ if (map_ops[i].status == GNTST_eagain) gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, map_ops + i, &map_ops[i].status, __func__); + if (map_ops[i].status == GNTST_okay) { + struct xen_page_foreign *foreign; + + SetPageForeign(pages[i]); + foreign = xen_page_foreign(pages[i]); + foreign->domid = map_ops[i].dom; + foreign->gref = map_ops[i].ref; + } + } + return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_map_refs); @@ -770,12 +805,16 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, struct gnttab_unmap_grant_ref *kunmap_ops, struct page **pages, unsigned int count) { + unsigned int i; int ret; ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count); if (ret) return ret; + for (i = 0; i < count; i++) + ClearPageForeign(pages[i]); + return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count); } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); -- cgit v1.2.3 From c2677a6fc4dee765fff8f7ac3d61f657dc295650 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 14:45:10 +0000 Subject: xen-netback: use foreign page information from the pages themselves Use the foreign page flag in netback to get the domid and grant ref needed for the grant copy. This signficiantly simplifies the netback code and makes netback work with foreign pages from other backends (e.g., blkback). This allows blkback to use iSCSI disks provided by domUs running on the same host. Signed-off-by: Jennifer Herbert Acked-by: Ian Campbell Acked-by: David S. Miller Signed-off-by: David Vrabel --- drivers/net/xen-netback/netback.c | 100 ++++---------------------------------- 1 file changed, 9 insertions(+), 91 deletions(-) (limited to 'drivers') diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 64413189ad06..ae3ab3752ea8 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -314,9 +314,7 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue, static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb, struct netrx_pending_operations *npo, struct page *page, unsigned long size, - unsigned long offset, int *head, - struct xenvif_queue *foreign_queue, - grant_ref_t foreign_gref) + unsigned long offset, int *head) { struct gnttab_copy *copy_gop; struct xenvif_rx_meta *meta; @@ -333,6 +331,8 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb offset &= ~PAGE_MASK; while (size > 0) { + struct xen_page_foreign *foreign; + BUG_ON(offset >= PAGE_SIZE); BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET); @@ -361,9 +361,10 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb copy_gop->flags = GNTCOPY_dest_gref; copy_gop->len = bytes; - if (foreign_queue) { - copy_gop->source.domid = foreign_queue->vif->domid; - copy_gop->source.u.ref = foreign_gref; + foreign = xen_page_foreign(page); + if (foreign) { + copy_gop->source.domid = foreign->domid; + copy_gop->source.u.ref = foreign->gref; copy_gop->flags |= GNTCOPY_source_gref; } else { copy_gop->source.domid = DOMID_SELF; @@ -405,35 +406,6 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb } } -/* - * Find the grant ref for a given frag in a chain of struct ubuf_info's - * skb: the skb itself - * i: the frag's number - * ubuf: a pointer to an element in the chain. It should not be NULL - * - * Returns a pointer to the element in the chain where the page were found. If - * not found, returns NULL. - * See the definition of callback_struct in common.h for more details about - * the chain. - */ -static const struct ubuf_info *xenvif_find_gref(const struct sk_buff *const skb, - const int i, - const struct ubuf_info *ubuf) -{ - struct xenvif_queue *foreign_queue = ubuf_to_queue(ubuf); - - do { - u16 pending_idx = ubuf->desc; - - if (skb_shinfo(skb)->frags[i].page.p == - foreign_queue->mmap_pages[pending_idx]) - break; - ubuf = (struct ubuf_info *) ubuf->ctx; - } while (ubuf); - - return ubuf; -} - /* * Prepare an SKB to be transmitted to the frontend. * @@ -459,8 +431,6 @@ static int xenvif_gop_skb(struct sk_buff *skb, int head = 1; int old_meta_prod; int gso_type; - const struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg; - const struct ubuf_info *const head_ubuf = ubuf; old_meta_prod = npo->meta_prod; @@ -507,68 +477,16 @@ static int xenvif_gop_skb(struct sk_buff *skb, len = skb_tail_pointer(skb) - data; xenvif_gop_frag_copy(queue, skb, npo, - virt_to_page(data), len, offset, &head, - NULL, - 0); + virt_to_page(data), len, offset, &head); data += len; } for (i = 0; i < nr_frags; i++) { - /* This variable also signals whether foreign_gref has a real - * value or not. - */ - struct xenvif_queue *foreign_queue = NULL; - grant_ref_t foreign_gref; - - if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) && - (ubuf->callback == &xenvif_zerocopy_callback)) { - const struct ubuf_info *const startpoint = ubuf; - - /* Ideally ubuf points to the chain element which - * belongs to this frag. Or if frags were removed from - * the beginning, then shortly before it. - */ - ubuf = xenvif_find_gref(skb, i, ubuf); - - /* Try again from the beginning of the list, if we - * haven't tried from there. This only makes sense in - * the unlikely event of reordering the original frags. - * For injected local pages it's an unnecessary second - * run. - */ - if (unlikely(!ubuf) && startpoint != head_ubuf) - ubuf = xenvif_find_gref(skb, i, head_ubuf); - - if (likely(ubuf)) { - u16 pending_idx = ubuf->desc; - - foreign_queue = ubuf_to_queue(ubuf); - foreign_gref = - foreign_queue->pending_tx_info[pending_idx].req.gref; - /* Just a safety measure. If this was the last - * element on the list, the for loop will - * iterate again if a local page were added to - * the end. Using head_ubuf here prevents the - * second search on the chain. Or the original - * frags changed order, but that's less likely. - * In any way, ubuf shouldn't be NULL. - */ - ubuf = ubuf->ctx ? - (struct ubuf_info *) ubuf->ctx : - head_ubuf; - } else - /* This frag was a local page, added to the - * array after the skb left netback. - */ - ubuf = head_ubuf; - } xenvif_gop_frag_copy(queue, skb, npo, skb_frag_page(&skb_shinfo(skb)->frags[i]), skb_frag_size(&skb_shinfo(skb)->frags[i]), skb_shinfo(skb)->frags[i].page_offset, - &head, - foreign_queue, - foreign_queue ? foreign_gref : UINT_MAX); + &head); } return npo->meta_prod - old_meta_prod; -- cgit v1.2.3 From 3f9f1c67572f5e5e6dc84216d48d1480f3c4fcf6 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Tue, 9 Dec 2014 18:28:37 +0000 Subject: xen/grant-table: add a mechanism to safely unmap pages that are in use Introduce gnttab_unmap_refs_async() that can be used to safely unmap pages that may be in use (ref count > 1). If the pages are in use the unmap is deferred and retried later. This polling is not very clever but it should be good enough if the cases where the delay is necessary are rare. The initial delay is 5 ms and is increased linearly on each subsequent retry (to reduce load if the page is in use for a long time). This is needed to allow block backends using grant mapping to safely use network storage (block or filesystem based such as iSCSI or NFS). The network storage driver may complete a block request whilst there is a queued network packet retry (because the ack from the remote end races with deciding to queue the retry). The pages for the retried packet would be grant unmapped and the network driver (or hardware) would access the unmapped page. Signed-off-by: Jennifer Herbert Acked-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/xen/grant-table.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'drivers') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 89dcca448bb6..17972fbacddc 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -819,6 +820,49 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +#define GNTTAB_UNMAP_REFS_DELAY 5 + +static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item); + +static void gnttab_unmap_work(struct work_struct *work) +{ + struct gntab_unmap_queue_data + *unmap_data = container_of(work, + struct gntab_unmap_queue_data, + gnttab_work.work); + if (unmap_data->age != UINT_MAX) + unmap_data->age++; + __gnttab_unmap_refs_async(unmap_data); +} + +static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) +{ + int ret; + int pc; + + for (pc = 0; pc < item->count; pc++) { + if (page_count(item->pages[pc]) > 1) { + unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1); + schedule_delayed_work(&item->gnttab_work, + msecs_to_jiffies(delay)); + return; + } + } + + ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops, + item->pages, item->count); + item->done(ret, item); +} + +void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item) +{ + INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work); + item->age = 0; + + __gnttab_unmap_refs_async(item); +} +EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async); + static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes) { int rc; -- cgit v1.2.3 From 1401c00e59ea021c575f74612fe2dbba36d6a4ee Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Fri, 9 Jan 2015 18:06:12 +0000 Subject: xen/gntdev: convert priv->lock to a mutex Unmapping may require sleeping and we unmap while holding priv->lock, so convert it to a mutex. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 8cc3f069a10f..3c2534433b30 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -67,7 +67,7 @@ struct gntdev_priv { * Only populated if populate_freeable_maps == 1 */ struct list_head freeable_maps; /* lock protects maps and freeable_maps */ - spinlock_t lock; + struct mutex lock; struct mm_struct *mm; struct mmu_notifier mn; }; @@ -221,9 +221,9 @@ static void gntdev_put_map(struct gntdev_priv *priv, struct grant_map *map) } if (populate_freeable_maps && priv) { - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_del(&map->next); - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } if (map->pages && !use_ptemod) @@ -395,9 +395,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma) * not do any unmapping, since that has been done prior to * closing the vma, but it may still iterate the unmap_ops list. */ - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map->vma = NULL; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } vma->vm_private_data = NULL; gntdev_put_map(priv, map); @@ -441,14 +441,14 @@ static void mn_invl_range_start(struct mmu_notifier *mn, struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn); struct grant_map *map; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { unmap_if_in_range(map, start, end); } list_for_each_entry(map, &priv->freeable_maps, next) { unmap_if_in_range(map, start, end); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static void mn_invl_page(struct mmu_notifier *mn, @@ -465,7 +465,7 @@ static void mn_release(struct mmu_notifier *mn, struct grant_map *map; int err; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { if (!map->vma) continue; @@ -484,7 +484,7 @@ static void mn_release(struct mmu_notifier *mn, err = unmap_grant_pages(map, /* offset */ 0, map->count); WARN_ON(err); } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); } static struct mmu_notifier_ops gntdev_mmu_ops = { @@ -506,7 +506,7 @@ static int gntdev_open(struct inode *inode, struct file *flip) INIT_LIST_HEAD(&priv->maps); INIT_LIST_HEAD(&priv->freeable_maps); - spin_lock_init(&priv->lock); + mutex_init(&priv->lock); if (use_ptemod) { priv->mm = get_task_mm(current); @@ -580,10 +580,10 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv, return -EFAULT; } - spin_lock(&priv->lock); + mutex_lock(&priv->lock); gntdev_add_map(priv, map); op.index = map->index << PAGE_SHIFT; - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (copy_to_user(u, &op, sizeof(op)) != 0) return -EFAULT; @@ -602,7 +602,7 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, return -EFAULT; pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count); if (map) { list_del(&map->next); @@ -610,7 +610,7 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv, list_add_tail(&map->next, &priv->freeable_maps); err = 0; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (map) gntdev_put_map(priv, map); return err; @@ -678,7 +678,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) out_flags = op.action; out_event = op.event_channel_port; - spin_lock(&priv->lock); + mutex_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { uint64_t begin = map->index << PAGE_SHIFT; @@ -706,7 +706,7 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) rc = 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); /* Drop the reference to the event channel we did not save in the map */ if (out_flags & UNMAP_NOTIFY_SEND_EVENT) @@ -756,7 +756,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) pr_debug("map %d+%d at %lx (pgoff %lx)\n", index, count, vma->vm_start, vma->vm_pgoff); - spin_lock(&priv->lock); + mutex_lock(&priv->lock); map = gntdev_find_map_index(priv, index, count); if (!map) goto unlock_out; @@ -791,7 +791,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) map->flags |= GNTMAP_readonly; } - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); if (use_ptemod) { err = apply_to_page_range(vma->vm_mm, vma->vm_start, @@ -819,11 +819,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) return 0; unlock_out: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); return err; out_unlock_put: - spin_unlock(&priv->lock); + mutex_unlock(&priv->lock); out_put_map: if (use_ptemod) map->vma = NULL; -- cgit v1.2.3 From 745282256c754ac5ed3dbe2fbef6471dc1373417 Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 15:07:46 +0000 Subject: xen/gntdev: safely unmap grants in case they are still in use Use gnttab_unmap_refs_async() to wait until the mapped pages are no longer in use before unmapping them. This allows userspace programs to safely use Direct I/O and AIO to a network filesystem which may retain refs to pages in queued skbs after the filesystem I/O has completed. Signed-off-by: Jennifer Herbert Reviewed-by: Stefano Stabellini Signed-off-by: David Vrabel --- drivers/xen/gntdev.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 3c2534433b30..bccc54a80559 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -309,9 +309,30 @@ static int map_grant_pages(struct grant_map *map) return err; } +struct unmap_grant_pages_callback_data +{ + struct completion completion; + int result; +}; + +static void unmap_grant_callback(int result, + struct gntab_unmap_queue_data *data) +{ + struct unmap_grant_pages_callback_data* d = data->data; + + d->result = result; + complete(&d->completion); +} + static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) { int i, err = 0; + struct gntab_unmap_queue_data unmap_data; + struct unmap_grant_pages_callback_data data; + + init_completion(&data.completion); + unmap_data.data = &data; + unmap_data.done= &unmap_grant_callback; if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { int pgno = (map->notify.addr >> PAGE_SHIFT); @@ -323,11 +344,16 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, - use_ptemod ? map->kunmap_ops + offset : NULL, map->pages + offset, - pages); - if (err) - return err; + unmap_data.unmap_ops = map->unmap_ops + offset; + unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL; + unmap_data.pages = map->pages + offset; + unmap_data.count = pages; + + gnttab_unmap_refs_async(&unmap_data); + + wait_for_completion(&data.completion); + if (data.result) + return data.result; for (i = 0; i < pages; i++) { if (map->unmap_ops[offset+i].status) -- cgit v1.2.3 From c43cf3ea838541ea9f066f4f1aa7b197cba6276e Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Mon, 5 Jan 2015 16:49:22 +0000 Subject: xen-blkback: safely unmap grants in case they are still in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use gnttab_unmap_refs_async() to wait until the mapped pages are no longer in use before unmapping them. This allows blkback to use network storage which may retain refs to pages in queued skbs after the block I/O has completed. Signed-off-by: Jennifer Herbert Acked-by: Roger Pau Monné Acked-by: Jens Axboe Signed-off-by: David Vrabel --- drivers/block/xen-blkback/blkback.c | 169 +++++++++++++++++++++++++----------- drivers/block/xen-blkback/common.h | 3 + 2 files changed, 122 insertions(+), 50 deletions(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 908e630240bd..2a04d341e598 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "common.h" /* @@ -262,6 +263,17 @@ static void put_persistent_gnt(struct xen_blkif *blkif, atomic_dec(&blkif->persistent_gnt_in_use); } +static void free_persistent_gnts_unmap_callback(int result, + struct gntab_unmap_queue_data *data) +{ + struct completion *c = data->data; + + /* BUG_ON used to reproduce existing behaviour, + but is this the best way to deal with this? */ + BUG_ON(result); + complete(c); +} + static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, unsigned int num) { @@ -269,8 +281,17 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct persistent_gnt *persistent_gnt; struct rb_node *n; - int ret = 0; int segs_to_unmap = 0; + struct gntab_unmap_queue_data unmap_data; + struct completion unmap_completion; + + init_completion(&unmap_completion); + + unmap_data.data = &unmap_completion; + unmap_data.done = &free_persistent_gnts_unmap_callback; + unmap_data.pages = pages; + unmap_data.unmap_ops = unmap; + unmap_data.kunmap_ops = NULL; foreach_grant_safe(persistent_gnt, n, root, node) { BUG_ON(persistent_gnt->handle == @@ -285,9 +306,11 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || !rb_next(&persistent_gnt->node)) { - ret = gnttab_unmap_refs(unmap, NULL, pages, - segs_to_unmap); - BUG_ON(ret); + + unmap_data.count = segs_to_unmap; + gnttab_unmap_refs_async(&unmap_data); + wait_for_completion(&unmap_completion); + put_free_pages(blkif, pages, segs_to_unmap); segs_to_unmap = 0; } @@ -653,18 +676,14 @@ void xen_blkbk_free_caches(struct xen_blkif *blkif) shrink_free_pagepool(blkif, 0 /* All */); } -/* - * Unmap the grant references, and also remove the M2P over-rides - * used in the 'pending_req'. - */ -static void xen_blkbk_unmap(struct xen_blkif *blkif, - struct grant_page *pages[], - int num) +static unsigned int xen_blkbk_unmap_prepare( + struct xen_blkif *blkif, + struct grant_page **pages, + unsigned int num, + struct gnttab_unmap_grant_ref *unmap_ops, + struct page **unmap_pages) { - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; - int ret; for (i = 0; i < num; i++) { if (pages[i]->persistent_gnt != NULL) { @@ -674,21 +693,95 @@ static void xen_blkbk_unmap(struct xen_blkif *blkif, if (pages[i]->handle == BLKBACK_INVALID_HANDLE) continue; unmap_pages[invcount] = pages[i]->page; - gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page), + gnttab_set_unmap_op(&unmap_ops[invcount], vaddr(pages[i]->page), GNTMAP_host_map, pages[i]->handle); pages[i]->handle = BLKBACK_INVALID_HANDLE; - if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, - invcount); + invcount++; + } + + return invcount; +} + +static void xen_blkbk_unmap_and_respond_callback(int result, struct gntab_unmap_queue_data *data) +{ + struct pending_req* pending_req = (struct pending_req*) (data->data); + struct xen_blkif *blkif = pending_req->blkif; + + /* BUG_ON used to reproduce existing behaviour, + but is this the best way to deal with this? */ + BUG_ON(result); + + put_free_pages(blkif, data->pages, data->count); + make_response(blkif, pending_req->id, + pending_req->operation, pending_req->status); + free_req(blkif, pending_req); + /* + * Make sure the request is freed before releasing blkif, + * or there could be a race between free_req and the + * cleanup done in xen_blkif_free during shutdown. + * + * NB: The fact that we might try to wake up pending_free_wq + * before drain_complete (in case there's a drain going on) + * it's not a problem with our current implementation + * because we can assure there's no thread waiting on + * pending_free_wq if there's a drain going on, but it has + * to be taken into account if the current model is changed. + */ + if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { + complete(&blkif->drain_complete); + } + xen_blkif_put(blkif); +} + +static void xen_blkbk_unmap_and_respond(struct pending_req *req) +{ + struct gntab_unmap_queue_data* work = &req->gnttab_unmap_data; + struct xen_blkif *blkif = req->blkif; + struct grant_page **pages = req->segments; + unsigned int invcount; + + invcount = xen_blkbk_unmap_prepare(blkif, pages, req->nr_pages, + req->unmap, req->unmap_pages); + + work->data = req; + work->done = xen_blkbk_unmap_and_respond_callback; + work->unmap_ops = req->unmap; + work->kunmap_ops = NULL; + work->pages = req->unmap_pages; + work->count = invcount; + + gnttab_unmap_refs_async(&req->gnttab_unmap_data); +} + + +/* + * Unmap the grant references. + * + * This could accumulate ops up to the batch size to reduce the number + * of hypercalls, but since this is only used in error paths there's + * no real need. + */ +static void xen_blkbk_unmap(struct xen_blkif *blkif, + struct grant_page *pages[], + int num) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int invcount = 0; + int ret; + + while (num) { + unsigned int batch = min(num, BLKIF_MAX_SEGMENTS_PER_REQUEST); + + invcount = xen_blkbk_unmap_prepare(blkif, pages, batch, + unmap, unmap_pages); + if (invcount) { + ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); BUG_ON(ret); put_free_pages(blkif, unmap_pages, invcount); - invcount = 0; } - } - if (invcount) { - ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount); - BUG_ON(ret); - put_free_pages(blkif, unmap_pages, invcount); + pages += batch; + num -= batch; } } @@ -982,32 +1075,8 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the grant references associated with 'request' and provide * the proper response on the ring. */ - if (atomic_dec_and_test(&pending_req->pendcnt)) { - struct xen_blkif *blkif = pending_req->blkif; - - xen_blkbk_unmap(blkif, - pending_req->segments, - pending_req->nr_pages); - make_response(blkif, pending_req->id, - pending_req->operation, pending_req->status); - free_req(blkif, pending_req); - /* - * Make sure the request is freed before releasing blkif, - * or there could be a race between free_req and the - * cleanup done in xen_blkif_free during shutdown. - * - * NB: The fact that we might try to wake up pending_free_wq - * before drain_complete (in case there's a drain going on) - * it's not a problem with our current implementation - * because we can assure there's no thread waiting on - * pending_free_wq if there's a drain going on, but it has - * to be taken into account if the current model is changed. - */ - if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { - complete(&blkif->drain_complete); - } - xen_blkif_put(blkif); - } + if (atomic_dec_and_test(&pending_req->pendcnt)) + xen_blkbk_unmap_and_respond(pending_req); } /* diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index f65b807e3236..cc90a840e616 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -350,6 +350,9 @@ struct pending_req { struct grant_page *indirect_pages[MAX_INDIRECT_PAGES]; struct seg_buf seg[MAX_INDIRECT_SEGMENTS]; struct bio *biolist[MAX_INDIRECT_SEGMENTS]; + struct gnttab_unmap_grant_ref unmap[MAX_INDIRECT_SEGMENTS]; + struct page *unmap_pages[MAX_INDIRECT_SEGMENTS]; + struct gntab_unmap_queue_data gnttab_unmap_data; }; -- cgit v1.2.3 From 923b2919e2c318ee1c360a2119a14889fd0fcce4 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:56:54 +0000 Subject: xen/gntdev: mark userspace PTEs as special on x86 PV guests In an x86 PV guest, get_user_pages_fast() on a userspace address range containing foreign mappings does not work correctly because the M2P lookup of the MFN from a userspace PTE may return the wrong page. Force get_user_pages_fast() to fail on such addresses by marking the PTEs as special. If Xen has XENFEAT_gnttab_map_avail_bits (available since at least 4.0), we can do so efficiently in the grant map hypercall. Otherwise, it needs to be done afterwards. This is both inefficient and racy (the mapping is visible to the task before we fixup the PTEs), but will be fine for well-behaved applications that do not use the mapping until after the mmap() system call returns. Guests with XENFEAT_auto_translated_physmap (ARM and x86 HVM or PVH) do not need this since get_user_pages() has always worked correctly for them. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index bccc54a80559..20c65771017d 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -244,6 +244,14 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, BUG_ON(pgnr >= map->count); pte_maddr = arbitrary_virt_to_machine(pte).maddr; + /* + * Set the PTE as special to force get_user_pages_fast() fall + * back to the slow path. If this is not supported as part of + * the grant map, it will be done afterwards. + */ + if (xen_feature(XENFEAT_gnttab_map_avail_bits)) + flags |= (1 << _GNTMAP_guest_avail0); + gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags, map->grants[pgnr].ref, map->grants[pgnr].domid); @@ -252,6 +260,15 @@ static int find_grant_ptes(pte_t *pte, pgtable_t token, return 0; } +#ifdef CONFIG_X86 +static int set_grant_ptes_as_special(pte_t *pte, pgtable_t token, + unsigned long addr, void *data) +{ + set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte)); + return 0; +} +#endif + static int map_grant_pages(struct grant_map *map) { int i, err = 0; @@ -840,6 +857,23 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) if (err) goto out_put_map; } + } else { +#ifdef CONFIG_X86 + /* + * If the PTEs were not made special by the grant map + * hypercall, do so here. + * + * This is racy since the mapping is already visible + * to userspace but userspace should be well-behaved + * enough to not touch it until the mmap() call + * returns. + */ + if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) { + apply_to_page_range(vma->vm_mm, vma->vm_start, + vma->vm_end - vma->vm_start, + set_grant_ptes_as_special, NULL); + } +#endif } return 0; -- cgit v1.2.3 From dab069c61aa386f6a46c620f3a1075a4818f285b Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 18 Dec 2014 14:59:07 +0000 Subject: xen/gntdev: provide find_special_page VMA operation For a PV guest, use the find_special_page op to find the right page. To handle VMAs being split, remember the start of the original VMA so the correct index in the pages array can be calculated. Signed-off-by: David Vrabel Reviewed-by: Stefano Stabellini --- drivers/xen/gntdev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 20c65771017d..d5bb1a33d0a3 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -93,6 +93,7 @@ struct grant_map { struct gnttab_map_grant_ref *kmap_ops; struct gnttab_unmap_grant_ref *kunmap_ops; struct page **pages; + unsigned long pages_vm_start; }; static int unmap_grant_pages(struct grant_map *map, int offset, int pages); @@ -446,9 +447,18 @@ static void gntdev_vma_close(struct vm_area_struct *vma) gntdev_put_map(priv, map); } +static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma, + unsigned long addr) +{ + struct grant_map *map = vma->vm_private_data; + + return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT]; +} + static struct vm_operations_struct gntdev_vmops = { .open = gntdev_vma_open, .close = gntdev_vma_close, + .find_special_page = gntdev_vma_find_special_page, }; /* ------------------------------------------------------------------ */ @@ -874,6 +884,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) set_grant_ptes_as_special, NULL); } #endif + map->pages_vm_start = vma->vm_start; } return 0; -- cgit v1.2.3 From a2e75bc2ee207351e6806e77a5379c6c1dd4598a Mon Sep 17 00:00:00 2001 From: Jennifer Herbert Date: Thu, 5 Feb 2015 14:45:40 +0000 Subject: xenbus: Add proper handling of XS_ERROR from Xenbus for transactions. If Xenstore sends back a XS_ERROR for TRANSACTION_END, the driver BUGs because it cannot find the matching transaction in the list. For TRANSACTION_START, it leaks memory. Check the message as returned from xenbus_dev_request_and_reply(), and clean up for TRANSACTION_START or discard the error for TRANSACTION_END. Signed-off-by: Jennifer Herbert Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_dev_frontend.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 85534ea63555..9433e46518c8 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -326,10 +326,13 @@ static int xenbus_write_transaction(unsigned msg_type, } if (msg_type == XS_TRANSACTION_START) { - trans->handle.id = simple_strtoul(reply, NULL, 0); - - list_add(&trans->list, &u->transactions); - } else if (msg_type == XS_TRANSACTION_END) { + if (u->u.msg.type == XS_ERROR) + kfree(trans); + else { + trans->handle.id = simple_strtoul(reply, NULL, 0); + list_add(&trans->list, &u->transactions); + } + } else if (u->u.msg.type == XS_TRANSACTION_END) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; -- cgit v1.2.3 From 72978b2fe2f2cdf9f319c6c6dcdbe92b38de2be2 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Mon, 19 Jan 2015 13:19:38 +0000 Subject: xen/manage: Fix USB interaction issues when resuming Commit 61a734d305e1 ("xen/manage: Always freeze/thaw processes when suspend/resuming") ensured that userspace processes were always frozen before suspending to reduce interaction issues when resuming devices. However, freeze_processes() does not freeze kernel threads. Freeze kernel threads as well to prevent deadlocks with the khubd thread when resuming devices. This is what native suspend and resume does. Example deadlock: [ 7279.648010] [] ? xen_poll_irq_timeout+0x3e/0x50 [ 7279.648010] [] xen_poll_irq+0x10/0x20 [ 7279.648010] [] xen_lock_spinning+0xb3/0x120 [ 7279.648010] [] __raw_callee_save_xen_lock_spinning+0x11/0x20 [ 7279.648010] [] ? usb_control_msg+0xe6/0x120 [ 7279.648010] [] ? _raw_spin_lock_irq+0x50/0x60 [ 7279.648010] [] wait_for_completion+0xac/0x160 [ 7279.648010] [] ? try_to_wake_up+0x2c0/0x2c0 [ 7279.648010] [] dpm_wait+0x32/0x40 [ 7279.648010] [] device_resume+0x90/0x210 [ 7279.648010] [] dpm_resume+0x121/0x250 [ 7279.648010] [] ? xenbus_dev_request_and_reply+0xc0/0xc0 [ 7279.648010] [] dpm_resume_end+0x15/0x30 [ 7279.648010] [] do_suspend+0x10a/0x200 [ 7279.648010] [] ? xen_pre_suspend+0x20/0x20 [ 7279.648010] [] shutdown_handler+0x120/0x150 [ 7279.648010] [] xenwatch_thread+0x9f/0x160 [ 7279.648010] [] ? finish_wait+0x80/0x80 [ 7279.648010] [] kthread+0xc9/0xe0 [ 7279.648010] [] ? flush_kthread_worker+0x80/0x80 [ 7279.648010] [] ret_from_fork+0x7c/0xb0 [ 7279.648010] [] ? flush_kthread_worker+0x80/0x80 [ 7441.216287] INFO: task khubd:89 blocked for more than 120 seconds. [ 7441.219457] Tainted: G X 3.13.11-ckt12.kz #1 [ 7441.222176] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [ 7441.225827] khubd D ffff88003f433440 0 89 2 0x00000000 [ 7441.229258] ffff88003ceb9b98 0000000000000046 ffff88003ce83000 0000000000013440 [ 7441.232959] ffff88003ceb9fd8 0000000000013440 ffff88003cd13000 ffff88003ce83000 [ 7441.236658] 0000000000000286 ffff88003d3e0000 ffff88003ceb9bd0 00000001001aa01e [ 7441.240415] Call Trace: [ 7441.241614] [] schedule+0x29/0x70 [ 7441.243930] [] schedule_timeout+0x166/0x2c0 [ 7441.246681] [] ? call_timer_fn+0x110/0x110 [ 7441.249339] [] schedule_timeout_uninterruptible+0x1e/0x20 [ 7441.252644] [] msleep+0x20/0x30 [ 7441.254812] [] hub_port_reset+0xf0/0x580 [ 7441.257400] [] hub_port_init+0x75/0xb40 [ 7441.259981] [] ? update_autosuspend+0x39/0x60 [ 7441.262817] [] ? pm_runtime_set_autosuspend_delay+0x50/0xa0 [ 7441.266212] [] hub_thread+0x71a/0x1750 [ 7441.268728] [] ? finish_wait+0x80/0x80 [ 7441.271272] [] ? usb_port_resume+0x670/0x670 [ 7441.274067] [] kthread+0xc9/0xe0 [ 7441.276305] [] ? flush_kthread_worker+0x80/0x80 [ 7441.279131] [] ret_from_fork+0x7c/0xb0 [ 7441.281659] [] ? flush_kthread_worker+0x80/0x80 Signed-off-by: Ross Lagerwall Cc: stable@vger.kernel.org Signed-off-by: David Vrabel --- drivers/xen/manage.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index f8bb36f9d9ce..bf1940706422 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -105,10 +105,16 @@ static void do_suspend(void) err = freeze_processes(); if (err) { - pr_err("%s: freeze failed %d\n", __func__, err); + pr_err("%s: freeze processes failed %d\n", __func__, err); goto out; } + err = freeze_kernel_threads(); + if (err) { + pr_err("%s: freeze kernel threads failed %d\n", __func__, err); + goto out_thaw; + } + err = dpm_suspend_start(PMSG_FREEZE); if (err) { pr_err("%s: dpm_suspend_start %d\n", __func__, err); -- cgit v1.2.3