From 8ca19a8937ad91703cfefccf13bd8017b39510cd Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 27 Oct 2011 17:58:48 -0400 Subject: xen/gntalloc: Change gref_lock to a mutex The event channel release function cannot be called under a spinlock because it can attempt to acquire a mutex due to the event channel reference acquired when setting up unmap notifications. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index f6832f46aea4..439352d094db 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -74,7 +74,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " "the gntalloc device"); static LIST_HEAD(gref_list); -static DEFINE_SPINLOCK(gref_lock); +static DEFINE_MUTEX(gref_mutex); static int gref_size; struct notify_info { @@ -143,15 +143,15 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, } /* Add to gref lists. */ - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); list_splice_tail(&queue_gref, &gref_list); list_splice_tail(&queue_file, &priv->list); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return 0; undo: - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref_size -= (op->count - i); list_for_each_entry(gref, &queue_file, next_file) { @@ -167,7 +167,7 @@ undo: */ if (unlikely(!list_empty(&queue_gref))) list_splice_tail(&queue_gref, &gref_list); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rc; } @@ -251,7 +251,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp) pr_debug("%s: priv %p\n", __func__, priv); - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); while (!list_empty(&priv->list)) { gref = list_entry(priv->list.next, struct gntalloc_gref, next_file); @@ -261,7 +261,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp) __del_gref(gref); } kfree(priv); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return 0; } @@ -286,21 +286,21 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, goto out; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); /* Clean up pages that were at zero (local) users but were still mapped * by remote domains. Since those pages count towards the limit that we * are about to enforce, removing them here is a good idea. */ do_cleanup(); if (gref_size + op.count > limit) { - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); rc = -ENOSPC; goto out_free; } gref_size += op.count; op.index = priv->index; priv->index += op.count * PAGE_SIZE; - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); rc = add_grefs(&op, gref_ids, priv); if (rc < 0) @@ -343,7 +343,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, goto dealloc_grant_out; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, op.index, op.count); if (gref) { /* Remove from the file list only, and decrease reference count. @@ -363,7 +363,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, do_cleanup(); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); dealloc_grant_out: return rc; } @@ -383,7 +383,7 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, index = op.index & ~(PAGE_SIZE - 1); pgoff = op.index & (PAGE_SIZE - 1); - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, index, 1); if (!gref) { @@ -400,8 +400,9 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, gref->notify.pgoff = pgoff; gref->notify.event = op.event_channel_port; rc = 0; + unlock_out: - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rc; } @@ -433,9 +434,9 @@ static void gntalloc_vma_open(struct vm_area_struct *vma) if (!gref) return; - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref->users++; - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); } static void gntalloc_vma_close(struct vm_area_struct *vma) @@ -444,11 +445,11 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) if (!gref) return; - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref->users--; if (gref->users == 0) __del_gref(gref); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); } static struct vm_operations_struct gntalloc_vmops = { @@ -471,7 +472,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); if (gref == NULL) { rv = -ENOENT; @@ -499,7 +500,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) rv = 0; out_unlock: - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rv; } -- cgit v1.2.3 From 0cc678f850f2cba0cedbd133fcbbf175554cd6c6 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 27 Oct 2011 17:58:49 -0400 Subject: xen/gnt{dev,alloc}: reserve event channels for notify When using the unmap notify ioctl, the event channel used for notification needs to be reserved to avoid it being deallocated prior to sending the notification. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 21 ++++++++++++++++++++- drivers/xen/gntdev.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 439352d094db..c95181f43a6a 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -178,8 +178,10 @@ static void __del_gref(struct gntalloc_gref *gref) tmp[gref->notify.pgoff] = 0; kunmap(gref->page); } - if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(gref->notify.event); + evtchn_put(gref->notify.event); + } gref->notify.flags = 0; @@ -396,6 +398,23 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, goto unlock_out; } + /* We need to grab a reference to the event channel we are going to use + * to send the notify before releasing the reference we may already have + * (if someone has called this ioctl twice). This is required so that + * it is possible to change the clear_byte part of the notification + * without disturbing the event channel part, which may now be the last + * reference to that event channel. + */ + if (op.action & UNMAP_NOTIFY_SEND_EVENT) { + if (evtchn_get(op.event_channel_port)) { + rc = -EINVAL; + goto unlock_out; + } + } + + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + evtchn_put(gref->notify.event); + gref->notify.flags = op.action; gref->notify.pgoff = pgoff; gref->notify.event = op.event_channel_port; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 39871326afa2..a7308559a26a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -193,8 +193,10 @@ static void gntdev_put_map(struct grant_map *map) atomic_sub(map->count, &pages_mapped); - if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); + evtchn_put(map->notify.event); + } if (map->pages) { if (!use_ptemod) @@ -599,6 +601,8 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) struct ioctl_gntdev_unmap_notify op; struct grant_map *map; int rc; + int out_flags; + unsigned int out_event; if (copy_from_user(&op, u, sizeof(op))) return -EFAULT; @@ -606,6 +610,21 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) return -EINVAL; + /* We need to grab a reference to the event channel we are going to use + * to send the notify before releasing the reference we may already have + * (if someone has called this ioctl twice). This is required so that + * it is possible to change the clear_byte part of the notification + * without disturbing the event channel part, which may now be the last + * reference to that event channel. + */ + if (op.action & UNMAP_NOTIFY_SEND_EVENT) { + if (evtchn_get(op.event_channel_port)) + return -EINVAL; + } + + out_flags = op.action; + out_event = op.event_channel_port; + spin_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { @@ -624,12 +643,22 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) goto unlock_out; } + out_flags = map->notify.flags; + out_event = map->notify.event; + map->notify.flags = op.action; map->notify.addr = op.index - (map->index << PAGE_SHIFT); map->notify.event = op.event_channel_port; + rc = 0; + unlock_out: spin_unlock(&priv->lock); + + /* Drop the reference to the event channel we did not save in the map */ + if (out_flags & UNMAP_NOTIFY_SEND_EVENT) + evtchn_put(out_event); + return rc; } -- cgit v1.2.3 From 420eb554d5ee6daad743d8190383219f757dd66c Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 27 Oct 2011 17:58:47 -0400 Subject: xen/event: Add reference counting to event channels Event channels exposed to userspace by the evtchn module may be used by other modules in an asynchronous manner, which requires that reference counting be used to prevent the event channel from being closed before the signals are delivered. The reference count on new event channels defaults to -1 which indicates the event channel is not referenced outside the kernel; evtchn_get fails if called on such an event channel. The event channels made visible to userspace by evtchn have a normal reference count. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++- drivers/xen/evtchn.c | 2 +- 2 files changed, 74 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 6e075cdd0c6b..a3bcd6175f4a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -87,6 +87,7 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; + int refcnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned short evtchn; /* event channel */ @@ -406,6 +407,7 @@ static void xen_irq_init(unsigned irq) panic("Unable to allocate metadata for IRQ%d\n", irq); info->type = IRQT_UNBOUND; + info->refcnt = -1; irq_set_handler_data(irq, info); @@ -469,6 +471,8 @@ static void xen_free_irq(unsigned irq) irq_set_handler_data(irq, NULL); + WARN_ON(info->refcnt > 0); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -637,7 +641,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", irq, gsi); - goto out; /* XXX need refcount? */ + goto out; } irq = xen_allocate_irq_gsi(gsi); @@ -939,9 +943,16 @@ static void unbind_from_irq(unsigned int irq) { struct evtchn_close close; int evtchn = evtchn_from_irq(irq); + struct irq_info *info = irq_get_handler_data(irq); mutex_lock(&irq_mapping_update_lock); + if (info->refcnt > 0) { + info->refcnt--; + if (info->refcnt != 0) + goto done; + } + if (VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) @@ -970,6 +981,7 @@ static void unbind_from_irq(unsigned int irq) xen_free_irq(irq); + done: mutex_unlock(&irq_mapping_update_lock); } @@ -1065,6 +1077,66 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id) } EXPORT_SYMBOL_GPL(unbind_from_irqhandler); +int evtchn_make_refcounted(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + struct irq_info *info; + + if (irq == -1) + return -ENOENT; + + info = irq_get_handler_data(irq); + + if (!info) + return -ENOENT; + + WARN_ON(info->refcnt != -1); + + info->refcnt = 1; + + return 0; +} +EXPORT_SYMBOL_GPL(evtchn_make_refcounted); + +int evtchn_get(unsigned int evtchn) +{ + int irq; + struct irq_info *info; + int err = -ENOENT; + + mutex_lock(&irq_mapping_update_lock); + + irq = evtchn_to_irq[evtchn]; + if (irq == -1) + goto done; + + info = irq_get_handler_data(irq); + + if (!info) + goto done; + + err = -EINVAL; + if (info->refcnt <= 0) + goto done; + + info->refcnt++; + err = 0; + done: + mutex_unlock(&irq_mapping_update_lock); + + return err; +} +EXPORT_SYMBOL_GPL(evtchn_get); + +void evtchn_put(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + if (WARN_ON(irq == -1)) + return; + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(evtchn_put); + void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { int irq = per_cpu(ipi_to_irq, cpu)[vector]; diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index dbc13e94b612..b1f60a0c0bea 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -268,7 +268,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); if (rc >= 0) - rc = 0; + rc = evtchn_make_refcounted(port); return rc; } -- cgit v1.2.3 From 0f9f5a9588468cddeccc9146b86798492c7cd4f5 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:58:06 +0800 Subject: xen/granttable: Introducing grant table V2 stucture This patch introduces new structures of grant table V2, grant table V2 is an extension from V1. Grant table is shared between guest and Xen, and Xen is responsible to do corresponding work for grant operations, such as: figure out guest's grant table version, perform different actions based on different grant table version, etc. Although full-page structure of V2 is different from V1, it play the same role as V1. Acked-by: Ian Campbell Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 181 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 141 insertions(+), 40 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index bf1c094f4ebf..18355a53763f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -53,7 +53,7 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry_v1)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -64,7 +64,63 @@ static DEFINE_SPINLOCK(gnttab_list_lock); unsigned long xen_hvm_resume_frames; EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); -static struct grant_entry *shared; +static union { + struct grant_entry_v1 *v1; + void *addr; +} gnttab_shared; + +/*This is a structure of function pointers for grant table*/ +struct gnttab_ops { + /* + * Mapping a list of frames for storing grant entries. First input + * parameter is used to storing grant table address when grant table + * being setup, second parameter is the number of frames to map grant + * table. Returning GNTST_okay means success and negative value means + * failure. + */ + int (*map_frames)(unsigned long *, unsigned int); + /* + * Release a list of frames which are mapped in map_frames for grant + * entry status. + */ + void (*unmap_frames)(void); + /* + * Introducing a valid entry into the grant table, granting the frame + * of this grant entry to domain for accessing, or transfering, or + * transitively accessing. First input parameter is reference of this + * introduced grant entry, second one is domid of granted domain, third + * one is the frame to be granted, and the last one is status of the + * grant entry to be updated. + */ + void (*update_entry)(grant_ref_t, domid_t, unsigned long, unsigned); + /* + * Stop granting a grant entry to domain for accessing. First input + * parameter is reference of a grant entry whose grant access will be + * stopped, second one is not in use now. If the grant entry is + * currently mapped for reading or writing, just return failure(==0) + * directly and don't tear down the grant access. Otherwise, stop grant + * access for this entry and return success(==1). + */ + int (*end_foreign_access_ref)(grant_ref_t, int); + /* + * Stop granting a grant entry to domain for transfer. If tranfer has + * not started, just reclaim the grant entry and return failure(==0). + * Otherwise, wait for the transfer to complete and then return the + * frame. + */ + unsigned long (*end_foreign_transfer_ref)(grant_ref_t); + /* + * Query the status of a grant entry. Input parameter is reference of + * queried grant entry, return value is the status of queried entry. + * Detailed status(writing/reading) can be gotten from the return value + * by bit operations. + */ + int (*query_foreign_access)(grant_ref_t); +}; + +static struct gnttab_ops *gnttab_interface; + +static int grant_table_version; static struct gnttab_free_callback *gnttab_free_callback_list; @@ -142,23 +198,23 @@ static void put_free_entry(grant_ref_t ref) spin_unlock_irqrestore(&gnttab_list_lock, flags); } -static void update_grant_entry(grant_ref_t ref, domid_t domid, - unsigned long frame, unsigned flags) +/* + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + */ +static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) { - /* - * Introducing a valid entry into the grant table: - * 1. Write ent->domid. - * 2. Write ent->frame: - * GTF_permit_access: Frame to which access is permitted. - * GTF_accept_transfer: Pseudo-phys frame slot being filled by new - * frame, or zero if none. - * 3. Write memory barrier (WMB). - * 4. Write ent->flags, inc. valid type. - */ - shared[ref].frame = frame; - shared[ref].domid = domid; + gnttab_shared.v1[ref].domid = domid; + gnttab_shared.v1[ref].frame = frame; wmb(); - shared[ref].flags = flags; + gnttab_shared.v1[ref].flags = flags; } /* @@ -167,7 +223,7 @@ static void update_grant_entry(grant_ref_t ref, domid_t domid, void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { - update_grant_entry(ref, domid, frame, + gnttab_interface->update_entry(ref, domid, frame, GTF_permit_access | (readonly ? GTF_readonly : 0)); } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); @@ -187,31 +243,37 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -int gnttab_query_foreign_access(grant_ref_t ref) +static int gnttab_query_foreign_access_v1(grant_ref_t ref) { - u16 nflags; - - nflags = shared[ref].flags; + return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); +} - return nflags & (GTF_reading|GTF_writing); +int gnttab_query_foreign_access(grant_ref_t ref) +{ + return gnttab_interface->query_foreign_access(ref); } EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; - nflags = shared[ref].flags; + nflags = gnttab_shared.v1[ref].flags; do { flags = nflags; if (flags & (GTF_reading|GTF_writing)) { printk(KERN_ALERT "WARNING: g.e. still in use!\n"); return 0; } - } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); + } while ((nflags = sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0)) != flags); return 1; } + +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + return gnttab_interface->end_foreign_access_ref(ref, readonly); +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); void gnttab_end_foreign_access(grant_ref_t ref, int readonly, @@ -246,11 +308,11 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { - update_grant_entry(ref, domid, pfn, GTF_accept_transfer); + gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer); } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); -unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) { unsigned long frame; u16 flags; @@ -259,24 +321,29 @@ unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) + while (!((flags = gnttab_shared.v1[ref].flags) & GTF_transfer_committed)) { + if (sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = shared[ref].flags; + flags = gnttab_shared.v1[ref].flags; cpu_relax(); } rmb(); /* Read the frame number /after/ reading completion status. */ - frame = shared[ref].frame; + frame = gnttab_shared.v1[ref].frame; BUG_ON(frame == 0); return frame; } + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + return gnttab_interface->end_foreign_transfer_ref(ref); +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) @@ -520,6 +587,23 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) +{ + int rc; + + rc = arch_gnttab_map_shared(frames, nr_gframes, + gnttab_max_grant_frames(), + &gnttab_shared.addr); + BUG_ON(rc); + + return 0; +} + +static void gnttab_unmap_frames_v1(void) +{ + arch_gnttab_unmap_shared(gnttab_shared.addr, nr_grant_frames); +} + static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; @@ -567,19 +651,35 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) BUG_ON(rc || setup.status); - rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(), - &shared); - BUG_ON(rc); + rc = gnttab_interface->map_frames(frames, nr_gframes); kfree(frames); - return 0; + return rc; +} + +static struct gnttab_ops gnttab_v1_ops = { + .map_frames = gnttab_map_frames_v1, + .unmap_frames = gnttab_unmap_frames_v1, + .update_entry = gnttab_update_entry_v1, + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, + .query_foreign_access = gnttab_query_foreign_access_v1, +}; + +static void gnttab_request_version(void) +{ + grant_table_version = 1; + gnttab_interface = &gnttab_v1_ops; + printk(KERN_INFO "Grant tables using version %d layout.\n", + grant_table_version); } int gnttab_resume(void) { unsigned int max_nr_gframes; + gnttab_request_version(); max_nr_gframes = gnttab_max_grant_frames(); if (max_nr_gframes < nr_grant_frames) return -ENOSYS; @@ -587,9 +687,10 @@ int gnttab_resume(void) if (xen_pv_domain()) return gnttab_map(0, nr_grant_frames - 1); - if (!shared) { - shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes); - if (shared == NULL) { + if (gnttab_shared.addr == NULL) { + gnttab_shared.addr = ioremap(xen_hvm_resume_frames, + PAGE_SIZE * max_nr_gframes); + if (gnttab_shared.addr == NULL) { printk(KERN_WARNING "Failed to ioremap gnttab share frames!"); return -ENOMEM; @@ -603,7 +704,7 @@ int gnttab_resume(void) int gnttab_suspend(void) { - arch_gnttab_unmap_shared(shared, nr_grant_frames); + gnttab_interface->unmap_frames(); return 0; } -- cgit v1.2.3 From b1e495b2fae578b1bd3ab1906cb15aac43f96fee Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:58:47 +0800 Subject: xen/granttable: Refactor some code Acked-by: Ian Campbell Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 18355a53763f..0518d0404942 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -257,15 +257,17 @@ EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; + u16 *pflags; - nflags = gnttab_shared.v1[ref].flags; + pflags = &gnttab_shared.v1[ref].flags; + nflags = *pflags; do { flags = nflags; if (flags & (GTF_reading|GTF_writing)) { printk(KERN_ALERT "WARNING: g.e. still in use!\n"); return 0; } - } while ((nflags = sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0)) != flags); + } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); return 1; } @@ -316,20 +318,23 @@ static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) { unsigned long frame; u16 flags; + u16 *pflags; + + pflags = &gnttab_shared.v1[ref].flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = gnttab_shared.v1[ref].flags) & GTF_transfer_committed)) { - if (sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0) == flags) + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = gnttab_shared.v1[ref].flags; + flags = *pflags; cpu_relax(); } -- cgit v1.2.3 From 85ff6acb075a484780b3d763fdf41596d8fc0970 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:59:21 +0800 Subject: xen/granttable: Grant tables V2 implementation Receiver-side copying of packets is based on this implementation, it gives better performance and better CPU accounting. It totally supports three types: full-page, sub-page and transitive grants. However this patch does not cover sub-page and transitive grants, it mainly focus on Full-page part and implements grant table V2 interfaces corresponding to what already exists in grant table V1, such as: grant table V2 initialization, mapping, releasing and exported interfaces. Each guest can only supports one type of grant table type, every entry in grant table should be the same version. It is necessary to set V1 or V2 version before initializing the grant table. Grant table exported interfaces of V2 are same with those of V1, Xen is responsible to judge what grant table version guests are using in every grant operation. V2 fulfills the same role of V1, and it is totally backwards compitable with V1. If dom0 support grant table V2, the guests runing on it can run with either V1 or V2. Acked-by: Ian Campbell Signed-off-by: Annie Li [v1: Modified alloc_vm_area call (new parameters), indentation, and cleanpatch warnings] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 171 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 167 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0518d0404942..301869f60dc7 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -53,7 +54,10 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry_v1)) +#define GREFS_PER_GRANT_FRAME \ +(grant_table_version == 1 ? \ +(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \ +(PAGE_SIZE / sizeof(union grant_entry_v2))) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -66,6 +70,7 @@ EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); static union { struct grant_entry_v1 *v1; + union grant_entry_v2 *v2; void *addr; } gnttab_shared; @@ -120,6 +125,9 @@ struct gnttab_ops { static struct gnttab_ops *gnttab_interface; +/*This reflects status of grant entries, so act as a global value*/ +static grant_status_t *grstatus; + static int grant_table_version; static struct gnttab_free_callback *gnttab_free_callback_list; @@ -127,6 +135,7 @@ static struct gnttab_free_callback *gnttab_free_callback_list; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define SPP (PAGE_SIZE / sizeof(grant_status_t)) static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) { @@ -199,6 +208,7 @@ static void put_free_entry(grant_ref_t ref) } /* + * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. * Introducing a valid entry into the grant table: * 1. Write ent->domid. * 2. Write ent->frame: @@ -217,6 +227,15 @@ static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid, gnttab_shared.v1[ref].flags = flags; } +static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) +{ + gnttab_shared.v2[ref].hdr.domid = domid; + gnttab_shared.v2[ref].full_page.frame = frame; + wmb(); + gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags; +} + /* * Public grant-issuing interface functions */ @@ -248,6 +267,11 @@ static int gnttab_query_foreign_access_v1(grant_ref_t ref) return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); } +static int gnttab_query_foreign_access_v2(grant_ref_t ref) +{ + return grstatus[ref] & (GTF_reading|GTF_writing); +} + int gnttab_query_foreign_access(grant_ref_t ref) { return gnttab_interface->query_foreign_access(ref); @@ -272,6 +296,29 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) return 1; } +static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) +{ + gnttab_shared.v2[ref].hdr.flags = 0; + mb(); + if (grstatus[ref] & (GTF_reading|GTF_writing)) { + return 0; + } else { + /* The read of grstatus needs to have acquire + semantics. On x86, reads already have + that, and we just need to protect against + compiler reorderings. On other + architectures we may need a full + barrier. */ +#ifdef CONFIG_X86 + barrier(); +#else + mb(); +#endif + } + + return 1; +} + int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) { return gnttab_interface->end_foreign_access_ref(ref, readonly); @@ -345,6 +392,37 @@ static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) return frame; } +static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref) +{ + unsigned long frame; + u16 flags; + u16 *pflags; + + pflags = &gnttab_shared.v2[ref].hdr.flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) + return 0; + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = *pflags; + cpu_relax(); + } + + rmb(); /* Read the frame number /after/ reading completion status. */ + frame = gnttab_shared.v2[ref].full_page.frame; + BUG_ON(frame == 0); + + return frame; +} + unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { return gnttab_interface->end_foreign_transfer_ref(ref); @@ -592,6 +670,11 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +static unsigned nr_status_frames(unsigned nr_grant_frames) +{ + return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP; +} + static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) { int rc; @@ -606,7 +689,56 @@ static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) static void gnttab_unmap_frames_v1(void) { - arch_gnttab_unmap_shared(gnttab_shared.addr, nr_grant_frames); + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); +} + +static int gnttab_map_frames_v2(unsigned long *frames, unsigned int nr_gframes) +{ + uint64_t *sframes; + unsigned int nr_sframes; + struct gnttab_get_status_frames getframes; + int rc; + + nr_sframes = nr_status_frames(nr_gframes); + + /* No need for kzalloc as it is initialized in following hypercall + * GNTTABOP_get_status_frames. + */ + sframes = kmalloc(nr_sframes * sizeof(uint64_t), GFP_ATOMIC); + if (!sframes) + return -ENOMEM; + + getframes.dom = DOMID_SELF; + getframes.nr_frames = nr_sframes; + set_xen_guest_handle(getframes.frame_list, sframes); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, + &getframes, 1); + if (rc == -ENOSYS) { + kfree(sframes); + return -ENOSYS; + } + + BUG_ON(rc || getframes.status); + + rc = arch_gnttab_map_status(sframes, nr_sframes, + nr_status_frames(gnttab_max_grant_frames()), + &grstatus); + BUG_ON(rc); + kfree(sframes); + + rc = arch_gnttab_map_shared(frames, nr_gframes, + gnttab_max_grant_frames(), + &gnttab_shared.addr); + BUG_ON(rc); + + return 0; +} + +static void gnttab_unmap_frames_v2(void) +{ + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); + arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames)); } static int gnttab_map(unsigned int start_idx, unsigned int end_idx) @@ -640,6 +772,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return rc; } + /* No need for kzalloc as it is initialized in following hypercall + * GNTTABOP_setup_table. + */ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); if (!frames) return -ENOMEM; @@ -672,10 +807,38 @@ static struct gnttab_ops gnttab_v1_ops = { .query_foreign_access = gnttab_query_foreign_access_v1, }; +static struct gnttab_ops gnttab_v2_ops = { + .map_frames = gnttab_map_frames_v2, + .unmap_frames = gnttab_unmap_frames_v2, + .update_entry = gnttab_update_entry_v2, + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, + .query_foreign_access = gnttab_query_foreign_access_v2, +}; + static void gnttab_request_version(void) { - grant_table_version = 1; - gnttab_interface = &gnttab_v1_ops; + int rc; + struct gnttab_set_version gsv; + + gsv.version = 2; + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); + if (rc == 0) { + grant_table_version = 2; + gnttab_interface = &gnttab_v2_ops; + } else if (grant_table_version == 2) { + /* + * If we've already used version 2 features, + * but then suddenly discover that they're not + * available (e.g. migrating to an older + * version of Xen), almost unbounded badness + * can happen. + */ + panic("we need grant tables version 2, but only version 1 is available"); + } else { + grant_table_version = 1; + gnttab_interface = &gnttab_v1_ops; + } printk(KERN_INFO "Grant tables using version %d layout.\n", grant_table_version); } -- cgit v1.2.3 From c123799a41bf466ce5b199331aac4c1f28f67ec3 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:59:56 +0800 Subject: xen/granttable: Keep code format clean Acked-by: Ian Campbell Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 301869f60dc7..bd325fd0000b 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -50,7 +50,6 @@ #include #include - /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff @@ -598,8 +597,8 @@ unsigned int gnttab_max_grant_frames(void) EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; pte_t *pte; @@ -649,7 +648,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count) { int i, ret; -- cgit v1.2.3 From 9dbc71d53ce4e0260d0a8307838cd9ebddc07a12 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 12 Dec 2011 18:13:57 +0800 Subject: xen/granttable: Improve comments for function pointers Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 48 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index bd325fd0000b..1589ea1a2445 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -76,50 +76,50 @@ static union { /*This is a structure of function pointers for grant table*/ struct gnttab_ops { /* - * Mapping a list of frames for storing grant entries. First input - * parameter is used to storing grant table address when grant table - * being setup, second parameter is the number of frames to map grant - * table. Returning GNTST_okay means success and negative value means - * failure. + * Mapping a list of frames for storing grant entries. Frames parameter + * is used to store grant table address when grant table being setup, + * nr_gframes is the number of frames to map grant table. Returning + * GNTST_okay means success and negative value means failure. */ - int (*map_frames)(unsigned long *, unsigned int); + int (*map_frames)(unsigned long *frames, unsigned int nr_gframes); /* * Release a list of frames which are mapped in map_frames for grant * entry status. */ void (*unmap_frames)(void); /* - * Introducing a valid entry into the grant table, granting the frame - * of this grant entry to domain for accessing, or transfering, or - * transitively accessing. First input parameter is reference of this - * introduced grant entry, second one is domid of granted domain, third - * one is the frame to be granted, and the last one is status of the - * grant entry to be updated. + * Introducing a valid entry into the grant table, granting the frame of + * this grant entry to domain for accessing or transfering. Ref + * parameter is reference of this introduced grant entry, domid is id of + * granted domain, frame is the page frame to be granted, and flags is + * status of the grant entry to be updated. */ - void (*update_entry)(grant_ref_t, domid_t, unsigned long, unsigned); + void (*update_entry)(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags); /* - * Stop granting a grant entry to domain for accessing. First input - * parameter is reference of a grant entry whose grant access will be - * stopped, second one is not in use now. If the grant entry is + * Stop granting a grant entry to domain for accessing. Ref parameter is + * reference of a grant entry whose grant access will be stopped, + * readonly is not in use in this function. If the grant entry is * currently mapped for reading or writing, just return failure(==0) * directly and don't tear down the grant access. Otherwise, stop grant * access for this entry and return success(==1). */ - int (*end_foreign_access_ref)(grant_ref_t, int); + int (*end_foreign_access_ref)(grant_ref_t ref, int readonly); /* - * Stop granting a grant entry to domain for transfer. If tranfer has - * not started, just reclaim the grant entry and return failure(==0). - * Otherwise, wait for the transfer to complete and then return the - * frame. + * Stop granting a grant entry to domain for transfer. Ref parameter is + * reference of a grant entry whose grant transfer will be stopped. If + * tranfer has not started, just reclaim the grant entry and return + * failure(==0). Otherwise, wait for the transfer to complete and then + * return the frame. */ - unsigned long (*end_foreign_transfer_ref)(grant_ref_t); + unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); /* - * Query the status of a grant entry. Input parameter is reference of + * Query the status of a grant entry. Ref parameter is reference of * queried grant entry, return value is the status of queried entry. * Detailed status(writing/reading) can be gotten from the return value * by bit operations. */ - int (*query_foreign_access)(grant_ref_t); + int (*query_foreign_access)(grant_ref_t ref); }; static struct gnttab_ops *gnttab_interface; -- cgit v1.2.3 From 6666754b11297526c699f8df63c52d50c24fe946 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 12 Dec 2011 18:14:42 +0800 Subject: xen/granttable: Support sub-page grants - They can't be used to map the page (so can only be used in a GNTTABOP_copy hypercall). - It's possible to grant access with a finer granularity than whole pages. - Xen guarantees that they can be revoked quickly (a normal map grant can only be revoked with the cooperation of the domain which has been granted access). Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 1589ea1a2445..c8312c7056f1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -120,6 +120,17 @@ struct gnttab_ops { * by bit operations. */ int (*query_foreign_access)(grant_ref_t ref); + /* + * Grant a domain to access a range of bytes within the page referred by + * an available grant entry. Ref parameter is reference of a grant entry + * which will be sub-page accessed, domid is id of grantee domain, frame + * is frame address of subpage grant, flags is grant type and flag + * information, page_off is offset of the range of bytes, and length is + * length of bytes to be accessed. + */ + void (*update_subpage_entry)(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, unsigned length); }; static struct gnttab_ops *gnttab_interface; @@ -261,6 +272,66 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); +void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + gnttab_shared.v2[ref].sub_page.frame = frame; + gnttab_shared.v2[ref].sub_page.page_off = page_off; + gnttab_shared.v2[ref].sub_page.length = length; + gnttab_shared.v2[ref].hdr.domid = domid; + wmb(); + gnttab_shared.v2[ref].hdr.flags = + GTF_permit_access | GTF_sub_page | flags; +} + +int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + if (flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_transitive)) + return -EPERM; + + if (gnttab_interface->update_subpage_entry == NULL) + return -ENOSYS; + + gnttab_interface->update_subpage_entry(ref, domid, frame, flags, + page_off, length); + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref); + +int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, + int flags, unsigned page_off, + unsigned length) +{ + int ref, rc; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags, + page_off, length); + if (rc < 0) { + put_free_entry(ref); + return rc; + } + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage); + +bool gnttab_subpage_grants_available(void) +{ + return gnttab_interface->update_subpage_entry != NULL; +} +EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); + static int gnttab_query_foreign_access_v1(grant_ref_t ref) { return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); @@ -813,6 +884,7 @@ static struct gnttab_ops gnttab_v2_ops = { .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, .query_foreign_access = gnttab_query_foreign_access_v2, + .update_subpage_entry = gnttab_update_subpage_entry_v2, }; static void gnttab_request_version(void) -- cgit v1.2.3 From 9438ce9dbbf512fd717051284f568d5cb35e5cf1 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 12 Dec 2011 18:15:07 +0800 Subject: xen/granttable: Support transitive grants These allow a domain A which has been granted access on a page of domain B's memory to issue domain C with a copy-grant on the same page. This is useful e.g. for forwarding packets between domains. Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index c8312c7056f1..a3d0e1e278c1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -131,6 +131,18 @@ struct gnttab_ops { void (*update_subpage_entry)(grant_ref_t ref, domid_t domid, unsigned long frame, int flags, unsigned page_off, unsigned length); + /* + * Redirect an available grant entry on domain A to another grant + * reference of domain B, then allow domain C to use grant reference + * of domain B transitively. Ref parameter is an available grant entry + * reference on domain A, domid is id of domain C which accesses grant + * entry transitively, flags is grant type and flag information, + * trans_domid is id of domain B whose grant entry is finally accessed + * transitively, trans_gref is grant entry transitive reference of + * domain B. + */ + void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags, + domid_t trans_domid, grant_ref_t trans_gref); }; static struct gnttab_ops *gnttab_interface; @@ -332,6 +344,63 @@ bool gnttab_subpage_grants_available(void) } EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); +void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) +{ + gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; + gnttab_shared.v2[ref].transitive.gref = trans_gref; + gnttab_shared.v2[ref].hdr.domid = domid; + wmb(); + gnttab_shared.v2[ref].hdr.flags = + GTF_permit_access | GTF_transitive | flags; +} + +int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) +{ + if (flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page)) + return -EPERM; + + if (gnttab_interface->update_trans_entry == NULL) + return -ENOSYS; + + gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid, + trans_gref); + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref); + +int gnttab_grant_foreign_access_trans(domid_t domid, int flags, + domid_t trans_domid, + grant_ref_t trans_gref) +{ + int ref, rc; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags, + trans_domid, trans_gref); + if (rc < 0) { + put_free_entry(ref); + return rc; + } + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans); + +bool gnttab_trans_grants_available(void) +{ + return gnttab_interface->update_trans_entry != NULL; +} +EXPORT_SYMBOL_GPL(gnttab_trans_grants_available); + static int gnttab_query_foreign_access_v1(grant_ref_t ref) { return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); @@ -885,6 +954,7 @@ static struct gnttab_ops gnttab_v2_ops = { .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, .query_foreign_access = gnttab_query_foreign_access_v2, .update_subpage_entry = gnttab_update_subpage_entry_v2, + .update_trans_entry = gnttab_update_trans_entry_v2, }; static void gnttab_request_version(void) -- cgit v1.2.3 From c3b3f16d1bceb5ac5f21528f889810b4ac5a3596 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 28 Nov 2011 11:49:09 -0500 Subject: xen/events: prevent calling evtchn_get on invalid channels The event channel number provided to evtchn_get can be provided by userspace, so needs to be checked against the maximum number of event channels prior to using it to index into evtchn_to_irq. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index a3bcd6175f4a..e5e5812a1014 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1104,6 +1104,9 @@ int evtchn_get(unsigned int evtchn) struct irq_info *info; int err = -ENOENT; + if (evtchn >= NR_EVENT_CHANNELS) + return -EINVAL; + mutex_lock(&irq_mapping_update_lock); irq = evtchn_to_irq[evtchn]; -- cgit v1.2.3 From 0105d2b4fbc24c2fb6ca9bae650784dd7ddf0b12 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 28 Nov 2011 11:49:10 -0500 Subject: xen/gntalloc: release grant references on page free gnttab_end_foreign_access_ref does not return the grant reference it is passed to the free list; gnttab_free_grant_reference needs to be explicitly called. While gnttab_end_foreign_access provides a wrapper for this, it is unsuitable because it does not return errors. Reported-by: Anil Madhavapeddy Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index c95181f43a6a..f330a4b8b685 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -191,6 +191,8 @@ static void __del_gref(struct gntalloc_gref *gref) if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) return; + + gnttab_free_grant_reference(gref->gref_id); } gref_size--; -- cgit v1.2.3 From 243082e0d59f169a1fa502f51ee5a820889fae93 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 28 Nov 2011 11:49:11 -0500 Subject: xen/gntalloc: fix reference counts on multi-page mappings When a multi-page mapping of gntalloc is created, the reference counts of all pages in the vma are incremented. However, the vma open/close operations only adjusted the reference count of the first page in the mapping, leaking the other pages. Store a struct in the vm_private_data to track the original page count to properly free the pages when the last reference to the vma is closed. Reported-by: Anil Madhavapeddy Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 56 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 13 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index f330a4b8b685..e8ea56583b4c 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -99,6 +99,12 @@ struct gntalloc_file_private_data { uint64_t index; }; +struct gntalloc_vma_private_data { + struct gntalloc_gref *gref; + int users; + int count; +}; + static void __del_gref(struct gntalloc_gref *gref); static void do_cleanup(void) @@ -451,25 +457,39 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, static void gntalloc_vma_open(struct vm_area_struct *vma) { - struct gntalloc_gref *gref = vma->vm_private_data; - if (!gref) + struct gntalloc_vma_private_data *priv = vma->vm_private_data; + + if (!priv) return; mutex_lock(&gref_mutex); - gref->users++; + priv->users++; mutex_unlock(&gref_mutex); } static void gntalloc_vma_close(struct vm_area_struct *vma) { - struct gntalloc_gref *gref = vma->vm_private_data; - if (!gref) + struct gntalloc_vma_private_data *priv = vma->vm_private_data; + struct gntalloc_gref *gref, *next; + int i; + + if (!priv) return; mutex_lock(&gref_mutex); - gref->users--; - if (gref->users == 0) - __del_gref(gref); + priv->users--; + if (priv->users == 0) { + gref = priv->gref; + for (i = 0; i < priv->count; i++) { + gref->users--; + next = list_entry(gref->next_gref.next, + struct gntalloc_gref, next_gref); + if (gref->users == 0) + __del_gref(gref); + gref = next; + } + kfree(priv); + } mutex_unlock(&gref_mutex); } @@ -481,19 +501,25 @@ static struct vm_operations_struct gntalloc_vmops = { static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) { struct gntalloc_file_private_data *priv = filp->private_data; + struct gntalloc_vma_private_data *vm_priv; struct gntalloc_gref *gref; int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; int rv, i; - pr_debug("%s: priv %p, page %lu+%d\n", __func__, - priv, vma->vm_pgoff, count); - if (!(vma->vm_flags & VM_SHARED)) { printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); return -EINVAL; } + vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL); + if (!vm_priv) + return -ENOMEM; + mutex_lock(&gref_mutex); + + pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__, + priv, vm_priv, vma->vm_pgoff, count); + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); if (gref == NULL) { rv = -ENOENT; @@ -502,9 +528,13 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) goto out_unlock; } - vma->vm_private_data = gref; + vm_priv->gref = gref; + vm_priv->users = 1; + vm_priv->count = count; + + vma->vm_private_data = vm_priv; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; vma->vm_ops = &gntalloc_vmops; -- cgit v1.2.3 From d8414d3c157dc1f83e73c17447ba41fe5afa9d3d Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Fri, 16 Dec 2011 11:34:33 -0500 Subject: xen: Add privcmd device driver Access to arbitrary hypercalls is currently provided via xenfs. This adds a standard character device to handle this. The support in xenfs remains for backward compatibility and uses the device driver code. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Kconfig | 7 + drivers/xen/Makefile | 2 + drivers/xen/privcmd.c | 437 ++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/privcmd.h | 3 + drivers/xen/xenfs/Makefile | 2 +- drivers/xen/xenfs/privcmd.c | 400 ---------------------------------------- drivers/xen/xenfs/super.c | 3 +- drivers/xen/xenfs/xenfs.h | 1 - 8 files changed, 452 insertions(+), 403 deletions(-) create mode 100644 drivers/xen/privcmd.c create mode 100644 drivers/xen/privcmd.h delete mode 100644 drivers/xen/xenfs/privcmd.c (limited to 'drivers/xen') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8795480c2350..a1ced521cf74 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -86,6 +86,7 @@ config XEN_BACKEND config XENFS tristate "Xen filesystem" + select XEN_PRIVCMD default y help The xen filesystem provides a way for domains to share @@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND xen-pciback.hide=(03:00.0)(04:00.0) If in doubt, say m. + +config XEN_PRIVCMD + tristate + depends on XEN + default m + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 974fffdf22b2..aa31337192cc 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ +obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o +xen-privcmd-y := privcmd.o diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c new file mode 100644 index 000000000000..4e8d3da89ad5 --- /dev/null +++ b/drivers/xen/privcmd.c @@ -0,0 +1,437 @@ +/****************************************************************************** + * privcmd.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "privcmd.h" + +MODULE_LICENSE("GPL"); + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); +#endif + +static long privcmd_ioctl_hypercall(void __user *udata) +{ + struct privcmd_hypercall hypercall; + long ret; + + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) + return -EFAULT; + + ret = privcmd_call(hypercall.op, + hypercall.arg[0], hypercall.arg[1], + hypercall.arg[2], hypercall.arg[3], + hypercall.arg[4]); + + return ret; +} + +static void free_page_list(struct list_head *pages) +{ + struct page *p, *n; + + list_for_each_entry_safe(p, n, pages, lru) + __free_page(p); + + INIT_LIST_HEAD(pages); +} + +/* + * Given an array of items in userspace, return a list of pages + * containing the data. If copying fails, either because of memory + * allocation failure or a problem reading user memory, return an + * error code; its up to the caller to dispose of any partial list. + */ +static int gather_array(struct list_head *pagelist, + unsigned nelem, size_t size, + void __user *data) +{ + unsigned pageidx; + void *pagedata; + int ret; + + if (size > PAGE_SIZE) + return 0; + + pageidx = PAGE_SIZE; + pagedata = NULL; /* quiet, gcc */ + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page = alloc_page(GFP_KERNEL); + + ret = -ENOMEM; + if (page == NULL) + goto fail; + + pagedata = page_address(page); + + list_add_tail(&page->lru, pagelist); + pageidx = 0; + } + + ret = -EFAULT; + if (copy_from_user(pagedata + pageidx, data, size)) + goto fail; + + data += size; + pageidx += size; + } + + ret = 0; + +fail: + return ret; +} + +/* + * Call function "fn" on each element of the array fragmented + * over a list of pages. + */ +static int traverse_pages(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret = 0; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + pagedata = NULL; /* hush, gcc */ + + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + pageidx = 0; + } + + ret = (*fn)(pagedata + pageidx, state); + if (ret) + break; + pageidx += size; + } + + return ret; +} + +struct mmap_mfn_state { + unsigned long va; + struct vm_area_struct *vma; + domid_t domain; +}; + +static int mmap_mfn_range(void *data, void *state) +{ + struct privcmd_mmap_entry *msg = data; + struct mmap_mfn_state *st = state; + struct vm_area_struct *vma = st->vma; + int rc; + + /* Do not allow range to wrap the address space. */ + if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) + return -EINVAL; + + /* Range chunks must be contiguous in va space. */ + if ((msg->va != st->va) || + ((msg->va+(msg->npages< vma->vm_end)) + return -EINVAL; + + rc = xen_remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, msg->npages, + vma->vm_page_prot, + st->domain); + if (rc < 0) + return rc; + + st->va += msg->npages << PAGE_SHIFT; + + return 0; +} + +static long privcmd_ioctl_mmap(void __user *udata) +{ + struct privcmd_mmap mmapcmd; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + LIST_HEAD(pagelist); + struct mmap_mfn_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) + return -EFAULT; + + rc = gather_array(&pagelist, + mmapcmd.num, sizeof(struct privcmd_mmap_entry), + mmapcmd.entry); + + if (rc || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + { + struct page *page = list_first_entry(&pagelist, + struct page, lru); + struct privcmd_mmap_entry *msg = page_address(page); + + vma = find_vma(mm, msg->va); + rc = -EINVAL; + + if (!vma || (msg->va != vma->vm_start) || + !privcmd_enforce_singleshot_mapping(vma)) + goto out_up; + } + + state.va = vma->vm_start; + state.vma = vma; + state.domain = mmapcmd.dom; + + rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), + &pagelist, + mmap_mfn_range, &state); + + +out_up: + up_write(&mm->mmap_sem); + +out: + free_page_list(&pagelist); + + return rc; +} + +struct mmap_batch_state { + domid_t domain; + unsigned long va; + struct vm_area_struct *vma; + int err; + + xen_pfn_t __user *user; +}; + +static int mmap_batch_fn(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain) < 0) { + *mfnp |= 0xf0000000U; + st->err++; + } + st->va += PAGE_SIZE; + + return 0; +} + +static int mmap_return_errors(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + return put_user(*mfnp, st->user++); +} + +static struct vm_operations_struct privcmd_vm_ops; + +static long privcmd_ioctl_mmap_batch(void __user *udata) +{ + int ret; + struct privcmd_mmapbatch m; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long nr_pages; + LIST_HEAD(pagelist); + struct mmap_batch_state state; + + if (!xen_initial_domain()) + return -E