diff options
70 files changed, 2196 insertions, 1591 deletions
@@ -286,8 +286,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode, if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ) inode_unlock(inode); - if ((retval > 0) && end_io) - end_io(iocb, pos, retval, bh.b_private); + if (end_io) { + int err; + + err = end_io(iocb, pos, retval, bh.b_private); + if (err) + retval = err; + } if (!(flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(inode); diff --git a/fs/direct-io.c b/fs/direct-io.c index 0a8d937c6775..476f1ecbd1f0 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, if (ret == 0) ret = transferred; - if (dio->end_io && dio->result) - dio->end_io(dio->iocb, offset, transferred, dio->private); + if (dio->end_io) { + int err; + + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 393689dfa1af..c04743519865 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1511,15 +1511,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } -static inline void ext4_set_io_unwritten_flag(struct inode *inode, - struct ext4_io_end *io_end) -{ - if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { - io_end->flag |= EXT4_IO_END_UNWRITTEN; - atomic_inc(&EXT4_I(inode)->i_unwritten); - } -} - /* * Inode dynamic state flags */ @@ -3293,6 +3284,27 @@ extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; extern int ext4_resize_begin(struct super_block *sb); extern void ext4_resize_end(struct super_block *sb); +static inline void ext4_set_io_unwritten_flag(struct inode *inode, + struct ext4_io_end *io_end) +{ + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_unwritten); + } +} + +static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) +{ + struct inode *inode = io_end->inode; + + if (io_end->flag & EXT4_IO_END_UNWRITTEN) { + io_end->flag &= ~EXT4_IO_END_UNWRITTEN; + /* Wake up anyone waiting on unwritten extent conversion */ + if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) + wake_up_all(ext4_ioend_wq(inode)); + } +} + #endif /* __KERNEL__ */ #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index b2e9576450eb..dab84a2530ff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3289,22 +3289,32 @@ out: } #endif -static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, +static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { ext4_io_end_t *io_end = private; /* if not async direct IO just return */ if (!io_end) - return; + return 0; ext_debug("ext4_end_io_dio(): io_end 0x%p " "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", io_end, io_end->inode->i_ino, iocb, offset, size); + /* + * Error during AIO DIO. We cannot convert unwritten extents as the + * data was not written. Just clear the unwritten flag and drop io_end. + */ + if (size <= 0) { + ext4_clear_io_unwritten_flag(io_end); + size = 0; + } io_end->offset = offset; io_end->size = size; ext4_put_io_end(io_end); + + return 0; } /* diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 349d7aa04fe7..d77d15f4b674 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -136,16 +136,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) kmem_cache_free(io_end_cachep, io_end); } -static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) -{ - struct inode *inode = io_end->inode; - - io_end->flag &= ~EXT4_IO_END_UNWRITTEN; - /* Wake up anyone waiting on unwritten extent conversion */ - if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) - wake_up_all(ext4_ioend_wq(inode)); -} - /* * Check a range of space and convert unwritten extents to written. Note that * we are protected from truncate touching same part of extent tree by the diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index cda0361e95a4..043110e5212d 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -620,7 +620,7 @@ bail: * particularly interested in the aio/dio case. We use the rw_lock DLM lock * to protect io on one node from truncation on another. */ -static void ocfs2_dio_end_io(struct kiocb *iocb, +static int ocfs2_dio_end_io(struct kiocb *iocb, loff_t offset, ssize_t bytes, void *private) @@ -628,6 +628,9 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, struct inode *inode = file_inode(iocb->ki_filp); int level; + if (bytes <= 0) + return 0; + /* this io's submitter should not have unlocked this before we could */ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); @@ -644,6 +647,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, level = ocfs2_iocb_rw_locked_level(iocb); ocfs2_rw_unlock(inode, level); } + + return 0; } static int ocfs2_releasepage(struct page *page, gfp_t wait) diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 3746367098fd..0ebc90496525 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type) return 0; } -static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id, +static int quota_quotaon(struct super_block *sb, int type, qid_t id, struct path *path) { if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) @@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id, return 0; } +/* + * Return quota for next active quota >= this id, if any exists, + * otherwise return -ESRCH via ->get_nextdqblk + */ +static int quota_getnextquota(struct super_block *sb, int type, qid_t id, + void __user *addr) +{ + struct kqid qid; + struct qc_dqblk fdq; + struct if_nextdqblk idq; + int ret; + + if (!sb->s_qcop->get_nextdqblk) + return -ENOSYS; + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq); + if (ret) + return ret; + /* struct if_nextdqblk is a superset of struct if_dqblk */ + copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq); + idq.dqb_id = from_kqid(current_user_ns(), qid); + if (copy_to_user(addr, &idq, sizeof(idq))) + return -EFAULT; + return 0; +} + static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); @@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id, return ret; } +/* + * Return quota for next active quota >= this id, if any exists, + * otherwise return -ESRCH via ->get_nextdqblk. + */ +static int quota_getnextxquota(struct super_block *sb, int type, qid_t id, + void __user *addr) +{ + struct fs_disk_quota fdq; + struct qc_dqblk qdq; + struct kqid qid; + qid_t id_out; + int ret; + + if (!sb->s_qcop->get_nextdqblk) + return -ENOSYS; + qid = make_kqid(current_user_ns(), type, id); + if (!qid_valid(qid)) + return -EINVAL; + ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq); + if (ret) + return ret; + id_out = from_kqid(current_user_ns(), qid); + copy_to_xfs_dqblk(&fdq, &qdq, type, id_out); + if (copy_to_user(addr, &fdq, sizeof(fdq))) + return -EFAULT; + return ret; +} + static int quota_rmxquota(struct super_block *sb, void __user *addr) { __u32 flags; @@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, switch (cmd) { case Q_QUOTAON: - return quota_quotaon(sb, type, cmd, id, path); + return quota_quotaon(sb, type, id, path); case Q_QUOTAOFF: return quota_quotaoff(sb, type); case Q_GETFMT: @@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return quota_setinfo(sb, type, addr); case Q_GETQUOTA: return quota_getquota(sb, type, id, addr); + case Q_GETNEXTQUOTA: + return quota_getnextquota(sb, type, id, addr); case Q_SETQUOTA: return quota_setquota(sb, type, id, addr); case Q_SYNC: @@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: return quota_getxquota(sb, type, id, addr); + case Q_XGETNEXTQUOTA: + return quota_getnextxquota(sb, type, id, addr); case Q_XQUOTASYNC: if (sb->s_flags & MS_RDONLY) return -EROFS; @@ -708,10 +768,12 @@ static int quotactl_cmd_write(int cmd) switch (cmd) { case Q_GETFMT: case Q_GETINFO: + case Q_GETNEXTQUOTA: case Q_SYNC: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XGETQUOTA: + case Q_XGETNEXTQUOTA: case Q_XQUOTASYNC: return 0; } diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 444626ddbd1b..d9b42425291e 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -118,8 +118,6 @@ xfs_allocbt_free_block( xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_trans_agbtree_delta(cur->bc_tp, -1); - - xfs_trans_binval(cur->bc_tp, bp); return 0; } diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 919756e3ba53..90928bbe693c 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -24,22 +24,6 @@ * Small attribute lists are packed as tightly as possible so as * to fit into the literal area of the inode. */ - -/* - * Entries are packed toward the top as tight as possible. - */ -typedef struct xfs_attr_shortform { - struct xfs_attr_sf_hdr { /* constant-structure header block */ - __be16 totsize; /* total bytes in shortform list */ - __u8 count; /* count of active entries */ - } hdr; - struct xfs_attr_sf_entry { - __uint8_t namelen; /* actual length of name (no NULL) */ - __uint8_t valuelen; /* actual length of value (no NULL) */ - __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - __uint8_t nameval[1]; /* name & value bytes concatenated */ - } list[1]; /* variable sized array */ -} xfs_attr_shortform_t; typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ef00156f4f96..041b6948aecc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -477,10 +477,7 @@ xfs_bmap_check_leaf_extents( } block = XFS_BUF_TO_BLOCK(bp); } - if (bp_release) { - bp_release = 0; - xfs_trans_brelse(NULL, bp); - } + return; error0: @@ -912,7 +909,7 @@ xfs_bmap_local_to_extents( * We don't want to deal with the case of keeping inode data inline yet. * So sending the data fork of a regular inode is invalid. */ - ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK)); + ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK)); ifp = XFS_IFORK_PTR(ip, whichfork); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL); @@ -1079,7 +1076,7 @@ xfs_bmap_add_attrfork_local( if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip)) return 0; - if (S_ISDIR(ip->i_d.di_mode)) { + if (S_ISDIR(VFS_I(ip)->i_mode)) { memset(&dargs, 0, sizeof(dargs)); dargs.geo = ip->i_mount->m_dir_geo; dargs.dp = ip; @@ -1091,7 +1088,7 @@ xfs_bmap_add_attrfork_local( return xfs_dir2_sf_to_block(&dargs); } - if (S_ISLNK(ip->i_d.di_mode)) + if (S_ISLNK(VFS_I(ip)->i_mode)) return xfs_bmap_local_to_extents(tp, ip, firstblock, 1, flags, XFS_DATA_FORK, xfs_symlink_local_to_remote); @@ -4721,6 +4718,66 @@ error0: } /* + * When a delalloc extent is split (e.g., due to a hole punch), the original + * indlen reservation must be shared across the two new extents that are left + * behind. + * + * Given the original reservation and the worst case indlen for the two new + * extents (as calculated by xfs_bmap_worst_indlen()), split the original + * reservation fairly across the two new extents. If necessary, steal available + * blocks from a deleted extent to make up a reservation deficiency (e.g., if + * ores == 1). The number of stolen blocks is returned. The availability and + * subsequent accounting of stolen blocks is the responsibility of the caller. + */ +static xfs_filblks_t +xfs_bmap_split_indlen( + xfs_filblks_t ores, /* original res. */ + xfs_filblks_t *indlen1, /* ext1 worst indlen */ + xfs_filblks_t *indlen2, /* ext2 worst indlen */ + xfs_filblks_t avail) /* stealable blocks */ +{ + xfs_filblks_t len1 = *indlen1; + xfs_filblks_t len2 = *indlen2; + xfs_filblks_t nres = len1 + len2; /* new total res. */ + xfs_filblks_t stolen = 0; + + /* + * Steal as many blocks as we can to try and satisfy the worst case + * indlen for both new extents. + */ + while (nres > ores && avail) { + nres--; + avail--; + stolen++; + } + + /* + * The only blocks available are those reserved for the original + * extent and what we can steal from the extent being removed. + * If this still isn't enough to satisfy the combined + * requirements for the two new extents, skim blocks off of each + * of the new reservations until they match what is available. + */ + while (nres > ores) { + if (len1) { + len1--; + nres--; + } + if (nres == ores) + break; + if (len2) { + len2--; + nres--; + } + } + + *indlen1 = len1; + *indlen2 = len2; + + return stolen; +} + +/* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). */ @@ -4984,28 +5041,29 @@ xfs_bmap_del_extent( XFS_IFORK_NEXT_SET(ip, whichfork, XFS_IFORK_NEXTENTS(ip, whichfork) + 1); } else { + xfs_filblks_t stolen; ASSERT(whichfork == XFS_DATA_FORK); - temp = xfs_bmap_worst_indlen(ip, temp); + + /* + * Distribute the original indlen reservation across the + * two new extents. Steal blocks from the deleted extent + * if necessary. Stealing blocks simply fudges the + * fdblocks accounting in xfs_bunmapi(). + */ + temp = xfs_bmap_worst_indlen(ip, got.br_blockcount); + temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount); + stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2, + del->br_blockcount); + da_new = temp + temp2 - stolen; + del->br_blockcount -= stolen; + + /* + * Set the reservation for each extent. Warn if either + * is zero as this can lead to delalloc problems. + */ + WARN_ON_ONCE(!temp || !temp2); xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } } trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); xfs_iext_insert(ip, *idx + 1, 1, &new, state); @@ -5210,7 +5268,7 @@ xfs_bunmapi( * This is better than zeroing it. */ ASSERT(del.br_state == XFS_EXT_NORM); - ASSERT(xfs_trans_get_block_res(tp) > 0); + ASSERT(tp->t_blk_res > 0); /* * If this spans a realtime extent boundary, * chop it back to the start of the one we end at. @@ -5241,7 +5299,7 @@ xfs_bunmapi( del.br_startblock += mod; } else if ((del.br_startoff == start && (del.br_state == XFS_EXT_UNWRITTEN || - xfs_trans_get_block_res(tp) == 0)) || + tp->t_blk_res == 0)) || !xfs_sb_version_hasextflgbit(&mp->m_sb)) { /* * Can't make it unwritten. There isn't @@ -5296,9 +5354,37 @@ xfs_bunmapi( goto nodelete; } } + + /* + * If it's the case where the directory code is running + * with no block reservation, and the deleted block is in + * the middle of its extent, and the resulting insert + * of an extent would cause transformation to btree format, + * then reject it. The calling code will then swap + * blocks around instead. + * We have to do this now, rather than waiting for the + * conversion to btree format, since the transaction + * will be dirty. + */ + if (!wasdel && tp->t_blk_res == 0 && + XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && + XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ + XFS_IFORK_MAXEXT(ip, whichfork) && + del.br_startoff > got.br_startoff && + del.br_startoff + del.br_blockcount < + got.br_startoff + got.br_blockcount) { + error = -ENOSPC; + goto error0; + } + + /* + * Unreserve quota and update realtime free space, if + * appropriate. If delayed allocation, update the inode delalloc + * counter now and wait to update the sb counters as + * xfs_bmap_del_extent() might need to borrow some blocks. + */ if (wasdel) { ASSERT(startblockval(del.br_startblock) > 0); - /* Update realtime/data freespace, unreserve quota */ if (isrt) { xfs_filblks_t rtexts; @@ -5309,8 +5395,6 @@ xfs_bunmapi( ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_RTBLKS); } else { - xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, - false); (void)xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del.br_blockcount), 0, XFS_QMOPT_RES_REGBLKS); @@ -5321,32 +5405,16 @@ xfs_bunmapi( XFS_BTCUR_BPRV_WASDEL; } else if (cur) cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL; - /* - * If it's the case where the directory code is running - * with no block reservation, and the deleted block is in - * the middle of its extent, and the resulting insert - * of an extent would cause transformation to btree format, - * then reject it. The calling code will then swap - * blocks around instead. - * We have to do this now, rather than waiting for the - * conversion to btree format, since the transaction - * will be dirty. - */ - if (!wasdel && xfs_trans_get_block_res(tp) == 0 && - XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS && - XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */ - XFS_IFORK_MAXEXT(ip, whichfork) && - del.br_startoff > got.br_startoff && |