From 1afcfd5948ff27cdbc6d91e9f3cdbdd7f3b1e566 Mon Sep 17 00:00:00 2001 From: Stefan Weinhuber Date: Tue, 27 Dec 2011 11:27:23 +0100 Subject: [S390] dasd: fix expiration handling for recovery requests The 'expires' value of a ccw requests defines how long the device driver should wait for a response from the evice after the request has been submitted to the channel subsystem. After the expiration time (e.g. 30 seconds) the waiting request will be cancelled and started again. This protects the DASD devices from beeing blocked by errors that cause the answering I/O interrupt to be lost. In case of error recovery requests, this 'expires' value used to be set to 0, so in case of a lost interrupt, such a recovery request would never expire and block the device. To prevent this kind of problem, all recovery requests need to have an expires value > 0 as well. If not specified otherwise, this should be the same expires value as for the original request. Signed-off-by: Stefan Weinhuber Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_3990_erp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/s390/block') diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index 87a0cf160fe5..0326571e7ffa 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1718,7 +1718,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense) erp->startdev = device; erp->memdev = device; erp->magic = default_erp->magic; - erp->expires = 0; + erp->expires = default_erp->expires; erp->retries = 256; erp->buildclk = get_clock(); erp->status = DASD_CQR_FILLED; @@ -2363,7 +2363,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr) erp->memdev = device; erp->block = cqr->block; erp->magic = cqr->magic; - erp->expires = 0; + erp->expires = cqr->expires; erp->retries = 256; erp->buildclk = get_clock(); erp->status = DASD_CQR_FILLED; -- cgit v1.2.3 From b206181d636d416fde48c7f493d7ac5d935b57e3 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 27 Dec 2011 11:27:27 +0100 Subject: [S390] dasd: add sanity check to detect path connection error Prevents possible data corruption by detecting cabling error. Therefor read and compare the UID for all available channel paths. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 409 ++++++++++++++++++++++++++++++----------- 1 file changed, 300 insertions(+), 109 deletions(-) (limited to 'drivers/s390/block') diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 6ab29680586a..763f1bd9605a 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -752,24 +752,13 @@ dasd_eckd_cdl_reclen(int recid) return sizes_trk0[recid]; return LABEL_SIZE; } - -/* - * Generate device unique id that specifies the physical device. - */ -static int dasd_eckd_generate_uid(struct dasd_device *device) +/* create unique id from private structure. */ +static void create_uid(struct dasd_eckd_private *private) { - struct dasd_eckd_private *private; - struct dasd_uid *uid; int count; - unsigned long flags; + struct dasd_uid *uid; - private = (struct dasd_eckd_private *) device->private; - if (!private) - return -ENODEV; - if (!private->ned || !private->gneq) - return -ENODEV; uid = &private->uid; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); memset(uid, 0, sizeof(struct dasd_uid)); memcpy(uid->vendor, private->ned->HDA_manufacturer, sizeof(uid->vendor) - 1); @@ -792,6 +781,23 @@ static int dasd_eckd_generate_uid(struct dasd_device *device) private->vdsneq->uit[count]); } } +} + +/* + * Generate device unique id that specifies the physical device. + */ +static int dasd_eckd_generate_uid(struct dasd_device *device) +{ + struct dasd_eckd_private *private; + unsigned long flags; + + private = (struct dasd_eckd_private *) device->private; + if (!private) + return -ENODEV; + if (!private->ned || !private->gneq) + return -ENODEV; + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + create_uid(private); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); return 0; } @@ -811,6 +817,21 @@ static int dasd_eckd_get_uid(struct dasd_device *device, struct dasd_uid *uid) return -EINVAL; } +/* + * compare device UID with data of a given dasd_eckd_private structure + * return 0 for match + */ +static int dasd_eckd_compare_path_uid(struct dasd_device *device, + struct dasd_eckd_private *private) +{ + struct dasd_uid device_uid; + + create_uid(private); + dasd_eckd_get_uid(device, &device_uid); + + return memcmp(&device_uid, &private->uid, sizeof(struct dasd_uid)); +} + static void dasd_eckd_fill_rcd_cqr(struct dasd_device *device, struct dasd_ccw_req *cqr, __u8 *rcd_buffer, @@ -1005,59 +1026,120 @@ static int dasd_eckd_read_conf(struct dasd_device *device) int conf_len, conf_data_saved; int rc; __u8 lpm, opm; - struct dasd_eckd_private *private; + struct dasd_eckd_private *private, path_private; struct dasd_path *path_data; + struct dasd_uid *uid; + char print_path_uid[60], print_device_uid[60]; private = (struct dasd_eckd_private *) device->private; path_data = &device->path_data; opm = ccw_device_get_path_mask(device->cdev); - lpm = 0x80; conf_data_saved = 0; /* get configuration data per operational path */ for (lpm = 0x80; lpm; lpm>>= 1) { - if (lpm & opm) { - rc = dasd_eckd_read_conf_lpm(device, &conf_data, - &conf_len, lpm); - if (rc && rc != -EOPNOTSUPP) { /* -EOPNOTSUPP is ok */ - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, - "Read configuration data returned " - "error %d", rc); - return rc; - } - if (conf_data == NULL) { - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", - "No configuration data " - "retrieved"); - /* no further analysis possible */ - path_data->opm |= lpm; - continue; /* no error */ + if (!(lpm & opm)) + continue; + rc = dasd_eckd_read_conf_lpm(device, &conf_data, + &conf_len, lpm); + if (rc && rc != -EOPNOTSUPP) { /* -EOPNOTSUPP is ok */ + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Read configuration data returned " + "error %d", rc); + return rc; + } + if (conf_data == NULL) { + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", + "No configuration data " + "retrieved"); + /* no further analysis possible */ + path_data->opm |= lpm; + continue; /* no error */ + } + /* save first valid configuration data */ + if (!conf_data_saved) { + kfree(private->conf_data); + private->conf_data = conf_data; + private->conf_len = conf_len; + if (dasd_eckd_identify_conf_parts(private)) { + private->conf_data = NULL; + private->conf_len = 0; + kfree(conf_data); + continue; } - /* save first valid configuration data */ - if (!conf_data_saved) { - kfree(private->conf_data); - private->conf_data = conf_data; - private->conf_len = conf_len; - if (dasd_eckd_identify_conf_parts(private)) { - private->conf_data = NULL; - private->conf_len = 0; - kfree(conf_data); - continue; - } - conf_data_saved++; + /* + * build device UID that other path data + * can be compared to it + */ + dasd_eckd_generate_uid(device); + conf_data_saved++; + } else { + path_private.conf_data = conf_data; + path_private.conf_len = DASD_ECKD_RCD_DATA_SIZE; + if (dasd_eckd_identify_conf_parts( + &path_private)) { + path_private.conf_data = NULL; + path_private.conf_len = 0; + kfree(conf_data); + continue; } - switch (dasd_eckd_path_access(conf_data, conf_len)) { - case 0x02: - path_data->npm |= lpm; - break; - case 0x03: - path_data->ppm |= lpm; - break; + + if (dasd_eckd_compare_path_uid( + device, &path_private)) { + uid = &path_private.uid; + if (strlen(uid->vduit) > 0) + snprintf(print_path_uid, + sizeof(print_path_uid), + "%s.%s.%04x.%02x.%s", + uid->vendor, uid->serial, + uid->ssid, uid->real_unit_addr, + uid->vduit); + else + snprintf(print_path_uid, + sizeof(print_path_uid), + "%s.%s.%04x.%02x", + uid->vendor, uid->serial, + uid->ssid, + uid->real_unit_addr); + uid = &private->uid; + if (strlen(uid->vduit) > 0) + snprintf(print_device_uid, + sizeof(print_device_uid), + "%s.%s.%04x.%02x.%s", + uid->vendor, uid->serial, + uid->ssid, uid->real_unit_addr, + uid->vduit); + else + snprintf(print_device_uid, + sizeof(print_device_uid), + "%s.%s.%04x.%02x", + uid->vendor, uid->serial, + uid->ssid, + uid->real_unit_addr); + dev_err(&device->cdev->dev, + "Not all channel paths lead to " + "the same device, path %02X leads to " + "device %s instead of %s\n", lpm, + print_path_uid, print_device_uid); + return -EINVAL; } - path_data->opm |= lpm; - if (conf_data != private->conf_data) - kfree(conf_data); + + path_private.conf_data = NULL; + path_private.conf_len = 0; } + switch (dasd_eckd_path_access(conf_data, conf_len)) { + case 0x02: + path_data->npm |= lpm; + break; + case 0x03: + path_data->ppm |= lpm; + break; + } + path_data->opm |= lpm; + + if (conf_data != private->conf_data) + kfree(conf_data); } + return 0; } @@ -1090,12 +1172,61 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) return 0; } +static int rebuild_device_uid(struct dasd_device *device, + struct path_verification_work_data *data) +{ + struct dasd_eckd_private *private; + struct dasd_path *path_data; + __u8 lpm, opm; + int rc; + + rc = -ENODEV; + private = (struct dasd_eckd_private *) device->private; + path_data = &device->path_data; + opm = device->path_data.opm; + + for (lpm = 0x80; lpm; lpm >>= 1) { + if (!(lpm & opm)) + continue; + memset(&data->rcd_buffer, 0, sizeof(data->rcd_buffer)); + memset(&data->cqr, 0, sizeof(data->cqr)); + data->cqr.cpaddr = &data->ccw; + rc = dasd_eckd_read_conf_immediately(device, &data->cqr, + data->rcd_buffer, + lpm); + + if (rc) { + if (rc == -EOPNOTSUPP) /* -EOPNOTSUPP is ok */ + continue; + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, + "Read configuration data " + "returned error %d", rc); + break; + } + memcpy(private->conf_data, data->rcd_buffer, + DASD_ECKD_RCD_DATA_SIZE); + if (dasd_eckd_identify_conf_parts(private)) { + rc = -ENODEV; + } else /* first valid path is enough */ + break; + } + + if (!rc) + rc = dasd_eckd_generate_uid(device); + + return rc; +} + static void do_path_verification_work(struct work_struct *work) { struct path_verification_work_data *data; struct dasd_device *device; + struct dasd_eckd_private path_private; + struct dasd_uid *uid; + __u8 path_rcd_buf[DASD_ECKD_RCD_DATA_SIZE]; __u8 lpm, opm, npm, ppm, epm; unsigned long flags; + char print_uid[60]; int rc; data = container_of(work, struct path_verification_work_data, worker); @@ -1112,64 +1243,129 @@ static void do_path_verification_work(struct work_struct *work) ppm = 0; epm = 0; for (lpm = 0x80; lpm; lpm >>= 1) { - if (lpm & data->tbvpm) { - memset(data->rcd_buffer, 0, sizeof(data->rcd_buffer)); - memset(&data->cqr, 0, sizeof(data->cqr)); - data->cqr.cpaddr = &data->ccw; - rc = dasd_eckd_read_conf_immediately(device, &data->cqr, - data->rcd_buffer, - lpm); - if (!rc) { - switch (dasd_eckd_path_access(data->rcd_buffer, - DASD_ECKD_RCD_DATA_SIZE)) { - case 0x02: - npm |= lpm; - break; - case 0x03: - ppm |= lpm; - break; - } - opm |= lpm; - } else if (rc == -EOPNOTSUPP) { - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", - "path verification: No configuration " - "data retrieved"); - opm |= lpm; - } else if (rc == -EAGAIN) { - DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", + if (!(lpm & data->tbvpm)) + continue; + memset(&data->rcd_buffer, 0, sizeof(data->rcd_buffer)); + memset(&data->cqr, 0, sizeof(data->cqr)); + data->cqr.cpaddr = &data->ccw; + rc = dasd_eckd_read_conf_immediately(device, &data->cqr, + data->rcd_buffer, + lpm); + if (!rc) { + switch (dasd_eckd_path_access(data->rcd_buffer, + DASD_ECKD_RCD_DATA_SIZE) + ) { + case 0x02: + npm |= lpm; + break; + case 0x03: + ppm |= lpm; + break; + } + opm |= lpm; + } else if (rc == -EOPNOTSUPP) { + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", + "path verification: No configuration " + "data retrieved"); + opm |= lpm; + } else if (rc == -EAGAIN) { + DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", "path verification: device is stopped," " try again later"); - epm |= lpm; - } else { - dev_warn(&device->cdev->dev, - "Reading device feature codes failed " - "(rc=%d) for new path %x\n", rc, lpm); - continue; - } - if (verify_fcx_max_data(device, lpm)) { + epm |= lpm; + } else { + dev_warn(&device->cdev->dev, + "Reading device feature codes failed " + "(rc=%d) for new path %x\n", rc, lpm); + continue; + } + if (verify_fcx_max_data(device, lpm)) { + opm &= ~lpm; + npm &= ~lpm; + ppm &= ~lpm; + continue; + } + + /* + * save conf_data for comparison after + * rebuild_device_uid may have changed + * the original data + */ + memcpy(&path_rcd_buf, data->rcd_buffer, + DASD_ECKD_RCD_DATA_SIZE); + path_private.conf_data = (void *) &path_rcd_buf; + path_private.conf_len = DASD_ECKD_RCD_DATA_SIZE; + if (dasd_eckd_identify_conf_parts(&path_private)) { + path_private.conf_data = NULL; + path_private.conf_len = 0; + continue; + } + + /* + * compare path UID with device UID only if at least + * one valid path is left + * in other case the device UID may have changed and + * the first working path UID will be used as device UID + */ + if (device->path_data.opm && + dasd_eckd_compare_path_uid(device, &path_private)) { + /* + * the comparison was not successful + * rebuild the device UID with at least one + * known path in case a z/VM hyperswap command + * has changed the device + * + * after this compare again + * + * if either the rebuild or the recompare fails + * the path can not be used + */ + if (rebuild_device_uid(device, data) || + dasd_eckd_compare_path_uid( + device, &path_private)) { + uid = &path_private.uid; + if (strlen(uid->vduit) > 0) + snprintf(print_uid, sizeof(print_uid), + "%s.%s.%04x.%02x.%s", + uid->vendor, uid->serial, + uid->ssid, uid->real_unit_addr, + uid->vduit); + else + snprintf(print_uid, sizeof(print_uid), + "%s.%s.%04x.%02x", + uid->vendor, uid->serial, + uid->ssid, + uid->real_unit_addr); + dev_err(&device->cdev->dev, + "The newly added channel path %02X " + "will not be used because it leads " + "to a different device %s\n", + lpm, print_uid); opm &= ~lpm; npm &= ~lpm; ppm &= ~lpm; + continue; } } + + /* + * There is a small chance that a path is lost again between + * above path verification and the following modification of + * the device opm mask. We could avoid that race here by using + * yet another path mask, but we rather deal with this unlikely + * situation in dasd_start_IO. + */ + spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + if (!device->path_data.opm && opm) { + device->path_data.opm = opm; + dasd_generic_path_operational(device); + } else + device->path_data.opm |= opm; + device->path_data.npm |= npm; + device->path_data.ppm |= ppm; + device->path_data.tbvpm |= epm; + spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); } - /* - * There is a small chance that a path is lost again between - * above path verification and the following modification of - * the device opm mask. We could avoid that race here by using - * yet another path mask, but we rather deal with this unlikely - * situation in dasd_start_IO. - */ - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - if (!device->path_data.opm && opm) { - device->path_data.opm = opm; - dasd_generic_path_operational(device); - } else - device->path_data.opm |= opm; - device->path_data.npm |= npm; - device->path_data.ppm |= ppm; - device->path_data.tbvpm |= epm; - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_put_device(device); if (data->isglobal) @@ -1441,11 +1637,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device) device->default_expires = value; } - /* Generate device unique id */ - rc = dasd_eckd_generate_uid(device); - if (rc) - goto out_err1; - dasd_eckd_get_uid(device, &temp_uid); if (temp_uid.type == UA_BASE_DEVICE) { block = dasd_alloc_block(); -- cgit v1.2.3 From b38f27e8425a132ed2dc49ffb3741404e81363d8 Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 27 Dec 2011 11:27:28 +0100 Subject: [S390] dasd: fix fixpoint divide exception in define_extent If an IO request is build on an alias device without prefix enabled we try to calculate with zero data from the alias device. This triggers a BUG statement with fixpoint divide exception. This case is very unlikely and can only happen if the pathgroup is lost with an alias device already in use. Prevent the alias device from being used in this case. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_alias.c | 10 ++++++++++ drivers/s390/block/dasd_eckd.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/s390/block') diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index c388eda1e2b1..553b3c5abb0a 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -705,6 +705,16 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) if (lcu->pav == NO_PAV || lcu->flags & (NEED_UAC_UPDATE | UPDATE_PENDING)) return NULL; + if (unlikely(!(private->features.feature[8] & 0x01))) { + /* + * PAV enabled but prefix not, very unlikely + * seems to be a lost pathgroup + * use base device to do IO + */ + DBF_DEV_EVENT(DBF_ERR, base_device, "%s", + "Prefix not enabled with PAV enabled\n"); + return NULL; + } spin_lock_irqsave(&lcu->lock, flags); alias_device = group->next; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 763f1bd9605a..bbcd5e9206ee 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2397,7 +2397,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( sizeof(struct PFX_eckd_data)); } else { if (define_extent(ccw++, cqr->data, first_trk, - last_trk, cmd, startdev) == -EAGAIN) { + last_trk, cmd, basedev) == -EAGAIN) { /* Clock not in sync and XRC is enabled. * Try again later. */ -- cgit v1.2.3