summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 20:58:12 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 20:58:12 -0800
commit848b81415c42ff3dc9a4204749087b015c37ef66 (patch)
tree391da3a73aea48632248220d2d6b8d45a88f7eae /drivers/block
parent992956189de58cae9f2be40585bc25105cd7c5ad (diff)
parent6fd59a83b9261fa53eaf98fb5514abba504a3ea3 (diff)
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton: "Incoming: - lots of misc stuff - backlight tree updates - lib/ updates - Oleg's percpu-rwsem changes - checkpatch - rtc - aoe - more checkpoint/restart support I still have a pile of MM stuff pending - Pekka should be merging later today after which that is good to go. A number of other things are twiddling thumbs awaiting maintainer merges." * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (180 commits) scatterlist: don't BUG when we can trivially return a proper error. docs: update documentation about /proc/<pid>/fdinfo/<fd> fanotify output fs, fanotify: add @mflags field to fanotify output docs: add documentation about /proc/<pid>/fdinfo/<fd> output fs, notify: add procfs fdinfo helper fs, exportfs: add exportfs_encode_inode_fh() helper fs, exportfs: escape nil dereference if no s_export_op present fs, epoll: add procfs fdinfo helper fs, eventfd: add procfs fdinfo helper procfs: add ability to plug in auxiliary fdinfo providers tools/testing/selftests/kcmp/kcmp_test.c: print reason for failure in kcmp_test breakpoint selftests: print failure status instead of cause make error kcmp selftests: print fail status instead of cause make error kcmp selftests: make run_tests fix mem-hotplug selftests: print failure status instead of cause make error cpu-hotplug selftests: print failure status instead of cause make error mqueue selftests: print failure status instead of cause make error vm selftests: print failure status instead of cause make error ubifs: use prandom_bytes mtd: nandsim: use prandom_bytes ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/aoe/aoe.h57
-rw-r--r--drivers/block/aoe/aoeblk.c104
-rw-r--r--drivers/block/aoe/aoechr.c7
-rw-r--r--drivers/block/aoe/aoecmd.c715
-rw-r--r--drivers/block/aoe/aoedev.c243
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/aoe/aoenet.c15
7 files changed, 822 insertions, 321 deletions
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index d2ed7f18d1ac..175649468c95 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,5 +1,5 @@
/* Copyright (c) 2012 Coraid, Inc. See COPYING for GPL terms. */
-#define VERSION "50"
+#define VERSION "81"
#define AOE_MAJOR 152
#define DEVICE_NAME "aoe"
@@ -10,7 +10,7 @@
#define AOE_PARTITIONS (16)
#endif
-#define WHITESPACE " \t\v\f\n"
+#define WHITESPACE " \t\v\f\n,"
enum {
AOECMD_ATA,
@@ -73,21 +73,29 @@ enum {
DEVFL_TKILL = (1<<1), /* flag for timer to know when to kill self */
DEVFL_EXT = (1<<2), /* device accepts lba48 commands */
DEVFL_GDALLOC = (1<<3), /* need to alloc gendisk */
- DEVFL_KICKME = (1<<4), /* slow polling network card catch */
- DEVFL_NEWSIZE = (1<<5), /* need to update dev size in block layer */
+ DEVFL_GD_NOW = (1<<4), /* allocating gendisk */
+ DEVFL_KICKME = (1<<5), /* slow polling network card catch */
+ DEVFL_NEWSIZE = (1<<6), /* need to update dev size in block layer */
+ DEVFL_FREEING = (1<<7), /* set when device is being cleaned up */
+ DEVFL_FREED = (1<<8), /* device has been cleaned up */
};
enum {
DEFAULTBCNT = 2 * 512, /* 2 sectors */
MIN_BUFS = 16,
- NTARGETS = 8,
+ NTARGETS = 4,
NAOEIFS = 8,
NSKBPOOLMAX = 256,
NFACTIVE = 61,
TIMERTICK = HZ / 10,
- MINTIMER = HZ >> 2,
- MAXTIMER = HZ << 1,
+ RTTSCALE = 8,
+ RTTDSCALE = 3,
+ RTTAVG_INIT = USEC_PER_SEC / 4 << RTTSCALE,
+ RTTDEV_INIT = RTTAVG_INIT / 4,
+
+ HARD_SCORN_SECS = 10, /* try another remote port after this */
+ MAX_TAINT = 1000, /* cap on aoetgt taint */
};
struct buf {
@@ -100,10 +108,17 @@ struct buf {
struct request *rq;
};
+enum frame_flags {
+ FFL_PROBE = 1,
+};
+
struct frame {
struct list_head head;
u32 tag;
+ struct timeval sent; /* high-res time packet was sent */
+ u32 sent_jiffs; /* low-res jiffies-based sent time */
ulong waited;
+ ulong waited_total;
struct aoetgt *t; /* parent target I belong to */
sector_t lba;
struct sk_buff *skb; /* command skb freed on module exit */
@@ -112,6 +127,7 @@ struct frame {
struct bio_vec *bv;
ulong bcnt;
ulong bv_off;
+ char flags;
};
struct aoeif {
@@ -122,28 +138,31 @@ struct aoeif {
struct aoetgt {
unsigned char addr[6];
- ushort nframes;
+ ushort nframes; /* cap on frames to use */
struct aoedev *d; /* parent device I belong to */
struct list_head ffree; /* list of free frames */
struct aoeif ifs[NAOEIFS];
struct aoeif *ifp; /* current aoeif in use */
- ushort nout;
- ushort maxout;
- ulong falloc;
- ulong lastwadj; /* last window adjustment */
+ ushort nout; /* number of AoE commands outstanding */
+ ushort maxout; /* current value for max outstanding */
+ ushort next_cwnd; /* incr maxout after decrementing to zero */
+ ushort ssthresh; /* slow start threshold */
+ ulong falloc; /* number of allocated frames */
+ int taint; /* how much we want to avoid this aoetgt */
int minbcnt;
int wpkts, rpkts;
+ char nout_probes;
};
struct aoedev {
struct aoedev *next;
ulong sysminor;
ulong aoemajor;
+ u32 rttavg; /* scaled AoE round trip time average */
+ u32 rttdev; /* scaled round trip time mean deviation */
u16 aoeminor;
u16 flags;
u16 nopen; /* (bd_openers isn't available without sleeping) */
- u16 rttavg; /* round trip average of requests/responses */
- u16 mintimer;
u16 fw_ver; /* version of blade's firmware */
u16 lasttag; /* last tag sent */
u16 useme;
@@ -151,7 +170,7 @@ struct aoedev {
struct work_struct work;/* disk create work struct */
struct gendisk *gd;
struct request_queue *blkq;
- struct hd_geometry geo;
+ struct hd_geometry geo;
sector_t ssize;
struct timer_list timer;
spinlock_t lock;
@@ -164,11 +183,12 @@ struct aoedev {
} ip;
ulong maxbcnt;
struct list_head factive[NFACTIVE]; /* hash of active frames */
- struct aoetgt *targets[NTARGETS];
+ struct list_head rexmitq; /* deferred retransmissions */
+ struct aoetgt **targets;
+ ulong ntargets; /* number of allocated aoetgt pointers */
struct aoetgt **tgt; /* target in use when working */
- struct aoetgt *htgt; /* target needing rexmit assistance */
- ulong ntargets;
ulong kicked;
+ char ident[512];
};
/* kthread tracking */
@@ -195,6 +215,7 @@ void aoecmd_cfg(ushort aoemajor, unsigned char aoeminor);
struct sk_buff *aoecmd_ata_rsp(struct sk_buff *);
void aoecmd_cfg_rsp(struct sk_buff *);
void aoecmd_sleepwork(struct work_struct *);
+void aoecmd_wreset(struct aoetgt *t);
void aoecmd_cleanslate(struct aoedev *);
void aoecmd_exit(void);
int aoecmd_init(void);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 00dfc5008ad4..a129f8c8073d 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -16,11 +16,19 @@
#include <linux/netdevice.h>
#include <linux/mutex.h>
#include <linux/export.h>
+#include <linux/moduleparam.h>
+#include <scsi/sg.h>
#include "aoe.h"
static DEFINE_MUTEX(aoeblk_mutex);
static struct kmem_cache *buf_pool_cache;
+/* GPFS needs a larger value than the default. */
+static int aoe_maxsectors;
+module_param(aoe_maxsectors, int, 0644);
+MODULE_PARM_DESC(aoe_maxsectors,
+ "When nonzero, set the maximum number of sectors per I/O request");
+
static ssize_t aoedisk_show_state(struct device *dev,
struct device_attribute *attr, char *page)
{
@@ -59,7 +67,7 @@ static ssize_t aoedisk_show_netif(struct device *dev,
nd = nds;
ne = nd + ARRAY_SIZE(nds);
t = d->targets;
- te = t + NTARGETS;
+ te = t + d->ntargets;
for (; t < te && *t; t++) {
ifp = (*t)->ifs;
e = ifp + NAOEIFS;
@@ -91,6 +99,14 @@ static ssize_t aoedisk_show_fwver(struct device *dev,
return snprintf(page, PAGE_SIZE, "0x%04x\n", (unsigned int) d->fw_ver);
}
+static ssize_t aoedisk_show_payload(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct gendisk *disk = dev_to_disk(dev);
+ struct aoedev *d = disk->private_data;
+
+ return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
+}
static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
@@ -99,12 +115,14 @@ static struct device_attribute dev_attr_firmware_version = {
.attr = { .name = "firmware-version", .mode = S_IRUGO },
.show = aoedisk_show_fwver,
};
+static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
static struct attribute *aoe_attrs[] = {
&dev_attr_state.attr,
&dev_attr_mac.attr,
&dev_attr_netif.attr,
&dev_attr_firmware_version.attr,
+ &dev_attr_payload.attr,
NULL,
};
@@ -129,9 +147,18 @@ aoeblk_open(struct block_device *bdev, fmode_t mode)
struct aoedev *d = bdev->bd_disk->private_data;
ulong flags;
+ if (!virt_addr_valid(d)) {
+ pr_crit("aoe: invalid device pointer in %s\n",
+ __func__);
+ WARN_ON(1);
+ return -ENODEV;
+ }
+ if (!(d->flags & DEVFL_UP) || d->flags & DEVFL_TKILL)
+ return -ENODEV;
+
mutex_lock(&aoeblk_mutex);
spin_lock_irqsave(&d->lock, flags);
- if (d->flags & DEVFL_UP) {
+ if (d->flags & DEVFL_UP && !(d->flags & DEVFL_TKILL)) {
d->nopen++;
spin_unlock_irqrestore(&d->lock, flags);
mutex_unlock(&aoeblk_mutex);
@@ -195,9 +222,38 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
+static int
+aoeblk_ioctl(struct block_device *bdev, fmode_t mode, uint cmd, ulong arg)
+{
+ struct aoedev *d;
+
+ if (!arg)
+ return -EINVAL;
+
+ d = bdev->bd_disk->private_data;
+ if ((d->flags & DEVFL_UP) == 0) {
+ pr_err("aoe: disk not up\n");
+ return -ENODEV;
+ }
+
+ if (cmd == HDIO_GET_IDENTITY) {
+ if (!copy_to_user((void __user *) arg, &d->ident,
+ sizeof(d->ident)))
+ return 0;
+ return -EFAULT;
+ }
+
+ /* udev calls scsi_id, which uses SG_IO, resulting in noise */
+ if (cmd != SG_IO)
+ pr_info("aoe: unknown ioctl 0x%x\n", cmd);
+
+ return -ENOTTY;
+}
+
static const struct block_device_operations aoe_bdops = {
.open = aoeblk_open,
.release = aoeblk_release,
+ .ioctl = aoeblk_ioctl,
.getgeo = aoeblk_getgeo,
.owner = THIS_MODULE,
};
@@ -212,6 +268,18 @@ aoeblk_gdalloc(void *vp)
struct request_queue *q;
enum { KB = 1024, MB = KB * KB, READ_AHEAD = 2 * MB, };
ulong flags;
+ int late = 0;
+
+ spin_lock_irqsave(&d->lock, flags);
+ if (d->flags & DEVFL_GDALLOC
+ && !(d->flags & DEVFL_TKILL)
+ && !(d->flags & DEVFL_GD_NOW))
+ d->flags |= DEVFL_GD_NOW;
+ else
+ late = 1;
+ spin_unlock_irqrestore(&d->lock, flags);
+ if (late)
+ return;
gd = alloc_disk(AOE_PARTITIONS);
if (gd == NULL) {
@@ -231,23 +299,24 @@ aoeblk_gdalloc(void *vp)
if (q == NULL) {
pr_err("aoe: cannot allocate block queue for %ld.%d\n",
d->aoemajor, d->aoeminor);
- mempool_destroy(mp);
- goto err_disk;
+ goto err_mempool;
}
- d->blkq = blk_alloc_queue(GFP_KERNEL);
- if (!d->blkq)
- goto err_mempool;
- d->blkq->backing_dev_info.name = "aoe";
- if (bdi_init(&d->blkq->backing_dev_info))
- goto err_blkq;
spin_lock_irqsave(&d->lock, flags);
- blk_queue_max_hw_sectors(d->blkq, BLK_DEF_MAX_SECTORS);
+ WARN_ON(!(d->flags & DEVFL_GD_NOW));
+ WARN_ON(!(d->flags & DEVFL_GDALLOC));
+ WARN_ON(d->flags & DEVFL_TKILL);
+ WARN_ON(d->gd);
+ WARN_ON(d->flags & DEVFL_UP);
+ blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
+ q->backing_dev_info.name = "aoe";
q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
d->bufpool = mp;
d->blkq = gd->queue = q;
q->queuedata = d;
d->gd = gd;
+ if (aoe_maxsectors)
+ blk_queue_max_hw_sectors(q, aoe_maxsectors);
gd->major = AOE_MAJOR;
gd->first_minor = d->sysminor;
gd->fops = &aoe_bdops;
@@ -263,18 +332,21 @@ aoeblk_gdalloc(void *vp)
add_disk(gd);
aoedisk_add_sysfs(d);
+
+ spin_lock_irqsave(&d->lock, flags);
+ WARN_ON(!(d->flags & DEVFL_GD_NOW));
+ d->flags &= ~DEVFL_GD_NOW;
+ spin_unlock_irqrestore(&d->lock, flags);
return;
-err_blkq:
- blk_cleanup_queue(d->blkq);
- d->blkq = NULL;
err_mempool:
- mempool_destroy(d->bufpool);
+ mempool_destroy(mp);
err_disk:
put_disk(gd);
err:
spin_lock_irqsave(&d->lock, flags);
- d->flags &= ~DEVFL_GDALLOC;
+ d->flags &= ~DEVFL_GD_NOW;
+ schedule_work(&d->work);
spin_unlock_irqrestore(&d->lock, flags);
}
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index ed57a890c643..42e67ad6bd20 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -39,6 +39,11 @@ struct ErrMsg {
};
static DEFINE_MUTEX(aoechr_mutex);
+
+/* A ring buffer of error messages, to be read through
+ * "/dev/etherd/err". When no messages are present,
+ * readers will block waiting for messages to appear.
+ */
static struct ErrMsg emsgs[NMSG];
static int emsgs_head_idx, emsgs_tail_idx;
static struct completion emsgs_comp;
@@ -282,7 +287,7 @@ aoechr_init(void)
int n, i;
n = register_chrdev(AOE_MAJOR, "aoechr", &aoe_fops);
- if (n < 0) {
+ if (n < 0) {
printk(KERN_ERR "aoe: can't register char device\n");
return n;
}
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 9fe4f1865558..25ef5c014fca 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -22,6 +22,7 @@
#define MAXIOC (8192) /* default meant to avoid most soft lockups */
static void ktcomplete(struct frame *, struct sk_buff *);
+static int count_targets(struct aoedev *d, int *untainted);
static struct buf *nextbuf(struct aoedev *);
@@ -29,7 +30,7 @@ static int aoe_deadsecs = 60 * 3;
module_param(aoe_deadsecs, int, 0644);
MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
-static int aoe_maxout = 16;
+static int aoe_maxout = 64;
module_param(aoe_maxout, int, 0644);
MODULE_PARM_DESC(aoe_maxout,
"Only aoe_maxout outstanding packets for every MAC on eX.Y.");
@@ -43,6 +44,8 @@ static struct {
spinlock_t lock;
} iocq;
+static struct page *empty_page;
+
static struct sk_buff *
new_skb(ulong len)
{
@@ -59,6 +62,23 @@ new_skb(ulong len)
}
static struct frame *
+getframe_deferred(struct aoedev *d, u32 tag)
+{
+ struct list_head *head, *pos, *nx;
+ struct frame *f;
+
+ head = &d->rexmitq;
+ list_for_each_safe(pos, nx, head) {
+ f = list_entry(pos, struct frame, head);
+ if (f->tag == tag) {
+ list_del(pos);
+ return f;
+ }
+ }
+ return NULL;
+}
+
+static struct frame *
getframe(struct aoedev *d, u32 tag)
{
struct frame *f;
@@ -162,8 +182,10 @@ aoe_freetframe(struct frame *f)
t = f->t;
f->buf = NULL;
+ f->lba = 0;
f->bv = NULL;
f->r_skb = NULL;
+ f->flags = 0;
list_add(&f->head, &t->ffree);
}
@@ -217,20 +239,25 @@ newframe(struct aoedev *d)
struct frame *f;
struct aoetgt *t, **tt;
int totout = 0;
+ int use_tainted;
+ int has_untainted;
- if (d->targets[0] == NULL) { /* shouldn't happen, but I'm paranoid */
+ if (!d->targets || !d->targets[0]) {
printk(KERN_ERR "aoe: NULL TARGETS!\n");
return NULL;
}
tt = d->tgt; /* last used target */
- for (;;) {
+ for (use_tainted = 0, has_untainted = 0;;) {
tt++;
- if (tt >= &d->targets[NTARGETS] || !*tt)
+ if (tt >= &d->targets[d->ntargets] || !*tt)
tt = d->targets;
t = *tt;
- totout += t->nout;
+ if (!t->taint) {
+ has_untainted = 1;
+ totout += t->nout;
+ }
if (t->nout < t->maxout
- && t != d->htgt
+ && (use_tainted || !t->taint)
&& t->ifp->nd) {
f = newtframe(d, t);
if (f) {
@@ -239,8 +266,12 @@ newframe(struct aoedev *d)
return f;
}
}
- if (tt == d->tgt) /* we've looped and found nada */
- break;
+ if (tt == d->tgt) { /* we've looped and found nada */
+ if (!use_tainted && !has_untainted)
+ use_tainted = 1;
+ else
+ break;
+ }
}
if (totout == 0) {
d->kicked++;
@@ -277,21 +308,68 @@ fhash(struct frame *f)
list_add_tail(&f->head, &d->factive[n]);
}
+static void
+ata_rw_frameinit(struct frame *f)
+{
+ struct aoetgt *t;
+ struct aoe_hdr *h;
+ struct aoe_atahdr *ah;
+ struct sk_buff *skb;
+ char writebit, extbit;
+
+ skb = f->skb;
+ h = (struct aoe_hdr *) skb_mac_header(skb);
+ ah = (struct aoe_atahdr *) (h + 1);
+ skb_put(skb, sizeof(*h) + sizeof(*ah));
+ memset(h, 0, skb->len);
+
+ writebit = 0x10;
+ extbit = 0x4;
+
+ t = f->t;
+ f->tag = aoehdr_atainit(t->d, t, h);
+ fhash(f);
+ t->nout++;
+ f->waited = 0;
+ f->waited_total = 0;
+ if (f->buf)
+ f->lba = f->buf->sector;
+
+ /* set up ata header */
+ ah->scnt = f->bcnt >> 9;
+ put_lba(ah, f->lba);
+ if (t->d->flags & DEVFL_EXT) {
+ ah->aflags |= AOEAFL_EXT;
+ } else {
+ extbit = 0;
+ ah->lba3 &= 0x0f;
+ ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
+ }
+ if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
+ skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+ ah->aflags |= AOEAFL_WRITE;
+ skb->len += f->bcnt;
+ skb->data_len = f->bcnt;
+ skb->truesize += f->bcnt;
+ t->wpkts++;
+ } else {
+ t->rpkts++;
+ writebit = 0;
+ }
+
+ ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+ skb->dev = t->ifp->nd;
+}
+
static int
aoecmd_ata_rw(struct aoedev *d)
{
struct frame *f;
- struct aoe_hdr *h;
- struct aoe_atahdr *ah;
struct buf *buf;
struct aoetgt *t;
struct sk_buff *skb;
struct sk_buff_head queue;
ulong bcnt, fbcnt;
- char writebit, extbit;
-
- writebit = 0x10;
- extbit = 0x4;
buf = nextbuf(d);
if (buf == NULL)
@@ -326,50 +404,18 @@ aoecmd_ata_rw(struct aoedev *d)
} while (fbcnt);
/* initialize the headers & frame */
- skb = f->skb;
- h = (struct aoe_hdr *) skb_mac_header(skb);
- ah = (struct aoe_atahdr *) (h+1);
- skb_put(skb, sizeof *h + sizeof *ah);
- memset(h, 0, skb->len);
- f->tag = aoehdr_atainit(d, t, h);
- fhash(f);
- t->nout++;
- f->waited = 0;
f->buf = buf;
f->bcnt = bcnt;
- f->lba = buf->sector;
-
- /* set up ata header */
- ah->scnt = bcnt >> 9;
- put_lba(ah, buf->sector);
- if (d->flags & DEVFL_EXT) {
- ah->aflags |= AOEAFL_EXT;
- } else {
- extbit = 0;
- ah->lba3 &= 0x0f;
- ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
- }
- if (bio_data_dir(buf->bio) == WRITE) {
- skb_fillup(skb, f->bv, f->bv_off, bcnt);
- ah->aflags |= AOEAFL_WRITE;
- skb->len += bcnt;
- skb->data_len = bcnt;
- skb->truesize += bcnt;
- t->wpkts++;
- } else {
- t->rpkts++;
- writebit = 0;
- }
-
- ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit;
+ ata_rw_frameinit(f);
/* mark all tracking fields and load out */
buf->nframesout += 1;
buf->sector += bcnt >> 9;
- skb->dev = t->ifp->nd;
- skb = skb_clone(skb, GFP_ATOMIC);
+ skb = skb_clone(f->skb, GFP_ATOMIC);
if (skb) {
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
__skb_queue_head_init(&queue);
__skb_queue_tail(&queue, skb);
aoenet_xmit(&queue);
@@ -442,11 +488,14 @@ resend(struct aoedev *d, struct frame *f)
h = (struct aoe_hdr *) skb_mac_header(skb);
ah = (struct aoe_atahdr *) (h+1);
- snprintf(buf, sizeof buf,
- "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
- "retransmit", d->aoemajor, d->aoeminor, f->tag, jiffies, n,
- h->src, h->dst, t->nout);
- aoechr_error(buf);
+ if (!(f->flags & FFL_PROBE)) {
+ snprintf(buf, sizeof(buf),
+ "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n",
+ "retransmit", d->aoemajor, d->aoeminor,
+ f->tag, jiffies, n,
+ h->src, h->dst, t->nout);
+ aoechr_error(buf);
+ }
f->tag = n;
fhash(f);
@@ -458,12 +507,46 @@ resend(struct aoedev *d, struct frame *f)
skb = skb_clone(skb, GFP_ATOMIC);
if (skb == NULL)
return;
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
__skb_queue_head_init(&queue);
__skb_queue_tail(&queue, skb);
aoenet_xmit(&queue);
}
static int
+tsince_hr(struct frame *f)
+{
+ struct timeval now;
+ int n;
+
+ do_gettimeofday(&now);
+ n = now.tv_usec - f->sent.tv_usec;
+ n += (now.tv_sec - f->sent.tv_sec) * USEC_PER_SEC;
+
+ if (n < 0)
+ n = -n;
+
+ /* For relatively long periods, use jiffies to avoid
+ * discrepancies caused by updates to the system time.
+ *
+ * On system with HZ of 1000, 32-bits is over 49 days
+ * worth of jiffies, or over 71 minutes worth of usecs.
+ *
+ * Jiffies overflow is handled by subtraction of unsigned ints:
+ * (gdb) print (unsigned) 2 - (unsigned) 0xfffffffe
+ * $3 = 4
+ * (gdb)
+ */
+ if (n > USEC_PER_SEC / 4) {
+ n = ((u32) jiffies) - f->sent_jiffs;
+ n *= USEC_PER_SEC / HZ;
+ }
+
+ return n;
+}
+
+static int
tsince(u32 tag)
{
int n;
@@ -472,7 +555,7 @@ tsince(u32 tag)
n -= tag & 0xffff;
if (n < 0)
n += 1<<16;
- return n;
+ return jiffies_to_usecs(n + 1);
}
static struct aoeif *
@@ -503,70 +586,189 @@ ejectif(struct aoetgt *t, struct aoeif *ifp)
dev_put(nd);
}
-static int
-sthtith(struct aoedev *d)
+static struct frame *
+reassign_frame(struct frame *f)
{
- struct frame *f, *nf;
- struct list_head *nx, *pos, *head;
+ struct frame *nf;
struct sk_buff *skb;
- struct aoetgt *ht = d->htgt;
- int i;
- for (i = 0; i < NFACTIVE; i++) {
- head = &d->factive[i];
- list_for_each_safe(pos, nx, head) {
- f = list_entry(pos, struct frame, head);
- if (f->t != ht)
- continue;
+ nf = newframe(f->t->d);
+ if (!nf)
+ return NULL;
+ if (nf->t == f->t) {
+ aoe_freetframe(nf);
+ return NULL;
+ }
- nf = newframe(d);
- if (!nf)
- return 0;
+ skb = nf->skb;
+ nf->skb = f->skb;
+ nf->buf = f->buf;
+ nf->bcnt = f->bcnt;
+ nf->lba = f->lba;
+ nf->bv = f->bv;
+ nf->bv_off = f->bv_off;
+ nf->waited = 0;
+ nf->waited_total = f->waited_total;
+ nf->sent = f->sent;
+ nf->sent_jiffs = f->sent_jiffs;
+ f->skb = skb;
+
+ return nf;
+}
- /* remove frame from active list */
- list_del(pos);
+static void
+probe(struct aoetgt *t)
+{
+ struct aoedev *d;
+ struct frame *f;
+ struct sk_buff *skb;
+ struct sk_buff_head queue;
+ size_t n, m;
+ int frag;
- /* reassign all pertinent bits to new outbound frame */
- skb = nf->skb;
- nf->skb = f->skb;
- nf->buf = f->buf;
- nf->bcnt = f->bcnt;
- nf->lba = f->lba;
- nf->bv = f->bv;
- nf->bv_off = f->bv_off;
- nf->waited = 0;
- f->skb = skb;
+ d = t->d;
+ f = newtframe(d, t);
+ if (!f) {
+ pr_err("%s %pm for e%ld.%d: %s\n",
+ "aoe: cannot probe remote address",
+ t->addr,
+ (long) d->aoemajor, d->aoeminor,
+ "no frame available");
+ return;
+ }
+ f->flags |= FFL_PROBE;
+ ifrotate(t);
+ f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+ ata_rw_frameinit(f);
+ skb = f->skb;
+ for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+ if (n < PAGE_SIZE)
+ m = n;
+ else
+ m = PAGE_SIZE;
+ skb_fill_page_desc(skb, frag, empty_page, 0, m);
+ }
+ skb->len += f->bcnt;
+ skb->data_len = f->bcnt;
+ skb->truesize += f->bcnt;
+
+ skb = skb_clone(f->skb, GFP_ATOMIC);
+ if (skb) {
+ do_gettimeofday(&f->sent);
+ f->sent_jiffs = (u32) jiffies;
+ __skb_queue_head_init(&queue);
+ __skb_queue_tail(&queue, skb);
+ aoenet_xmit(&queue);
+ }
+}
+
+static long
+rto(struct aoedev *d)
+{
+ long t;
+
+ t = 2 * d->rttavg >> RTTSCALE;
+ t += 8 * d->rttdev >> RTTDSCALE;
+ if (t == 0)
+ t = 1;
+
+ return t;
+}
+
+static void
+rexmit_deferred(struct aoedev *d)
+{
+ struct aoetgt *t;
+ struct frame *f;
+ struct frame *nf;
+ struct list_head *pos, *nx, *head;
+ int since;
+ int untainted;
+
+ count_targets(d, &untainted);
+
+ head = &d->rexmitq;
+ list_for_each_safe(pos, nx, head) {
+ f = list_entry(pos, struct frame, head);
+ t = f->t;
+ if (t->taint) {
+ if (!(f->flags & FFL_PROBE)) {
+ nf = reassign_frame(f);
+ if (nf) {
+ if (t->nout_probes == 0
+ && untainted > 0) {
+ probe(t);
+ t->nout_probes++;
+ }
+ list_replace(&f->head, &nf->head);
+ pos = &nf->head;
+ aoe_freetframe(f);
+ f = nf;
+ t = f->t;
+ }
+ } else if (untainted < 1) {
+ /* don't probe w/o other untainted aoetgts */
+ goto stop_probe;
+ } else if (tsince_hr(f) < t->taint * rto(d)) {
+ /* reprobe slowly when taint is high */
+ continue;
+ }
+ } else if (f->flags & FFL_PROBE) {
+stop_probe: /* don't probe untainted aoetgts */
+ list_del(pos);
aoe_freetframe(f);
- ht->nout--;
- nf->t->nout++;
- resend(d, nf);
+ /* leaving d->kicked, because this is routine */
+ f->t->d->flags |= DEVFL_KICKME;
+ continue;
}
+ if (t->nout >= t->maxout)
+ continue;
+ list_del(pos);
+ t->nout++;
+ if (f->flags & FFL_PROBE)
+ t->nout_probes++;
+ since = tsince_hr(f);
+ f->waited += since;
+ f->waited_total += since;
+ resend(d, f);
}
- /* We've cleaned up the outstanding so take away his
- * interfaces so he won't be used. We should remove him from
- * the target array here, but cleaning up a target is
- * involved. PUNT!
- */
- memset(ht->ifs, 0, sizeof ht->ifs);
- d->htgt = NULL;
- return 1;
}
-static inline unsigned char
-ata_scnt(unsigned char *packet) {
- struct aoe_hdr *h;
- struct aoe_atahdr *ah;
+/* An aoetgt accumulates demerits quickly, and successful
+ * probing redeems the aoetgt slowly.
+ */
+static void
+scorn(struct aoetgt *t)
+{
+ int n;
- h = (struct aoe_hdr *) packet;
- ah = (struct aoe_atahdr *) (h+1);
- return ah->scnt;
+ n = t->taint++;
+ t->taint += t->taint * 2;
+ if (n > t->taint)
+ t->taint = n;
+ if (t->taint > MAX_TAINT)
+ t->taint = MAX_TAINT;
+}
+
+static int
+count_targets(struct aoedev *d, int *untainted)
+{
+ int i, good;
+
+ for (i = good = 0; i < d->ntargets && d->targets[i]; ++i)
+ if (d->targets[i]->taint == 0)
+ good++;
+
+ if (untainted)
+ *untainted = good;
+ return i;
}
static void
rexmit_timer(ulong vp)
{
struct aoedev *d;
- struct aoetgt *t, **tt, **te;
+ struct aoetgt *t;
struct aoeif *ifp;
struct frame *f;
struct list_head *head, *pos, *nx;
@@ -574,15 +776,18 @@ rexmit_timer(ulong vp)
register long timeout;
ulong flags, n;
int i;
+ int utgts; /* number of aoetgt descriptors (not slots) */
+ int since;
d = (struct aoedev *) vp;
- /* timeout is always ~150% of the moving average */
- timeout = d->rttavg;
- timeout += timeout >> 1;
-
spin_lock_irqsave(&d->lock, flags);
+ /* timeout based on observed timings and variations */
+ timeout = rto(d);
+
+ utgts = count_targets(d, NULL);
+
if (d->flags & DEVFL_TKILL) {
spin_unlock_irqrestore(&d->lock, flags);
return;
@@ -593,67 +798,61 @@ rexmit_timer(ulong vp)
head = &d->factive[i];
list_for_each_safe(pos, nx, head) {
f = list_entry(pos, struct frame, head);
- if (tsince(f->tag) < timeout)
+ if (tsince_hr(f) < timeout)
break; /* end of expired frames */
/* move to flist for later processing */
list_move_tail(pos, &flist);
}
}
- /* window check */
- tt = d->targets;
- te = tt + d->ntargets;
- for (; tt < te && (t = *tt); tt++) {
- if (t->nout == t->maxout
- && t->maxout < t->nframes
- && (jiffies - t->lastwadj)/HZ > 10) {
- t->maxout++;
- t->lastwadj = jiffies;
- }
- }
-
- if (!list_empty(&flist)) { /* retransmissions necessary */
- n = d->rttavg <<= 1;
- if (n > MAXTIMER)
- d->rttavg = MAXTIMER;
- }
/* process expired frames */
while (!list_empty(&flist)) {
pos = flist.next;
f = list_entry(pos, struct frame, head);
- n = f->waited += timeout;
- n /= HZ;
- if (n > aoe_deadsecs) {
+ since = tsince_hr(f);
+ n = f->waited_total + since;
+ n /= USEC_PER_SEC;
+ if (aoe_deadsecs
+ && n > aoe_deadsecs
+ && !(f->flags & FFL_PROBE)) {
/* Waited too long. Device failure.
* Hang all frames on first hash bucket for downdev
* to clean up.
*/
list_splice(&flist, &d->factive[0]);
aoedev_downdev(d);
- break;
+ goto out;
}
- list_del(pos);
t = f->t;
- if (n > aoe_deadsecs/2)
- d->htgt = t; /* see if another target can help */
-
- if (t->nout == t->maxout) {
- if (t->maxout > 1)
- t->maxout--;
- t->lastwadj = jiffies;
+ n = f->waited + since;
+ n /= USEC_PER_SEC;
+ if (aoe_deadsecs && utgts > 0
+ && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS))
+ scorn(t); /* avoid this target */
+
+ if (t->maxout != 1) {
+ t->ssthresh = t->maxout / 2;
+ t->maxout = 1;
}
- ifp = getif(t, f->skb->dev);
- if (ifp && ++ifp->lost > (t->nframes << 1)
- && (ifp != t->ifs || t->ifs[1].nd)) {
- ejectif(t, ifp);
- ifp = NULL;
+ if (f->flags & FFL_PROBE) {
+ t->nout_probes--;
+ } else {
+ ifp = getif(t, f->skb->dev);
+ if (ifp && ++ifp->lost > (t->nframes << 1)
+ && (ifp != t->ifs || t->ifs[1].nd)) {
+ ejectif(t, ifp);
+ ifp = NULL;
+ }
}
- resend(d, f);
+ list_move_tail(pos, &d->rexmitq);
+ t->nout--;
}
+ rexmit_deferred(d);
- if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
+out:
+ if ((d->flags & DEVFL_KICKME) && d->blkq) {
d->flags &= ~DEVFL_KICKME;
d->blkq->request_fn(d->blkq);
}
@@ -774,8 +973,7 @@ nextbuf(struct aoedev *d)
void
aoecmd_work(struct aoedev *d)
{
- if (d->htgt && !sthtith(d))
- return;
+ rexmit_deferred(d);
while (aoecmd_ata_rw(d))
;
}
@@ -809,6 +1007,17 @@ aoecmd_sleepwork(struct work_struct *work)
}
static void
+ata_ident_fixstring(u16 *id, int ns)
+{
+ u16 s;
+
+ while (ns-- > 0) {
+ s = *id;
+ *id++ = s >> 8 | s << 8;
+ }
+}
+
+static void
ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
{
u64 ssize;
@@ -843,6 +1052,11 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
d->geo.sectors = get_unaligned_le16(&id[56 << 1]);
}
+ ata_ident_fixstring((u16 *) &id[10<<1], 10); /* serial */
+ ata_ident_fixstring((u16 *) &id[23<<1], 4); /* firmware */
+ ata_ident_fixstring((u16 *) &id[27<<1], 20); /* model */
+ memcpy(d->ident, id, sizeof(d->ident));
+
if (d->ssize != ssize)
printk(KERN_INFO
"aoe: %pm e%ld.%d v%04x has %llu sectors\n",
@@ -862,26 +1076,28 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id)
}
static void
-calc_rttavg(struct aoedev *d, int rtt)
+calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt)
{
register long n;
n = rtt;
- if (n < 0) {
- n = -rtt;
- if (n < MINTIMER)
- n = MINTIMER;
- else if (n > MAXTIMER)
- n = MAXTIMER;