From 42bec214d8bd432be6d32a1acb0a9079ecd4d142 Mon Sep 17 00:00:00 2001 From: Sachin Prabhu Date: Thu, 3 Aug 2017 13:09:03 +0530 Subject: cifs: Fix df output for users with quota limits The df for a SMB2 share triggers a GetInfo call for FS_FULL_SIZE_INFORMATION. The values returned are used to populate struct statfs. The problem is that none of the information returned by the call contains the total blocks available on the filesystem. Instead we use the blocks available to the user ie. quota limitation when filling out statfs.f_blocks. The information returned does contain Actual free units on the filesystem and is used to populate statfs.f_bfree. For users with quota enabled, it can lead to situations where the total free space reported is more than the total blocks on the system ending up with df reports like the following # df -h /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.5G -2.3G 2.5G - /mnt/a To fix this problem, we instead populate both statfs.f_bfree with the same value as statfs.f_bavail ie. CallerAvailableAllocationUnits. This is similar to what is done already in the code for cifs and df now reports the quota information for the user used to mount the share. # df --si /mnt/a Filesystem Size Used Avail Use% Mounted on //192.168.22.10/a 2.7G 101M 2.6G 4% /mnt/a Signed-off-by: Sachin Prabhu Signed-off-by: Pierguido Lambri Signed-off-by: Steve French Cc: --- fs/cifs/smb2pdu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 5fb2fc2d0080..97edb4d376cd 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3219,8 +3219,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf, kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) * le32_to_cpu(pfs_inf->SectorsPerAllocationUnit); kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits); - kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits); - kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); + kst->f_bfree = kst->f_bavail = + le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits); return; } -- cgit v1.2.3 From d3edede29f74d335f81d95a4588f5f136a9f7dcf Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 23 Aug 2017 14:48:14 +1000 Subject: cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() Add checking for the path component length and verify it is <= the maximum that the server advertizes via FileFsAttributeInformation. With this patch cifs.ko will now return ENAMETOOLONG instead of ENOENT when users to access an overlong path. To test this, try to cd into a (non-existing) directory on a CIFS share that has a too long name: cd /mnt/aaaaaaaaaaaaaaa... and it now should show a good error message from the shell: bash: cd: /mnt/aaaaaaaaaaaaaaaa...aaaaaa: File name too long rh bz 1153996 Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French Cc: --- fs/cifs/dir.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 56366e984076..569d3fb736be 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -194,15 +194,20 @@ cifs_bp_rename_retry: } /* + * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. * The VFS will not allow "/", but "\" is allowed by posix. */ static int -check_name(struct dentry *direntry) +check_name(struct dentry *direntry, struct cifs_tcon *tcon) { struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); int i; + if (unlikely(direntry->d_name.len > + tcon->fsAttrInfo.MaxPathNameComponentLength)) + return -ENAMETOOLONG; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { for (i = 0; i < direntry->d_name.len; i++) { if (direntry->d_name.name[i] == '\\') { @@ -500,10 +505,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, return finish_no_open(file, res); } - rc = check_name(direntry); - if (rc) - return rc; - xid = get_xid(); cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n", @@ -516,6 +517,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, } tcon = tlink_tcon(tlink); + + rc = check_name(direntry, tcon); + if (rc) + goto out_free_xid; + server = tcon->ses->server; if (server->ops->new_lease_key) @@ -776,7 +782,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - rc = check_name(direntry); + rc = check_name(direntry, pTcon); if (rc) goto lookup_out; -- cgit v1.2.3 From fc788f64f1f3eb31e87d4f53bcf1ab76590d5838 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Aug 2017 11:12:19 -0400 Subject: nfsd: Limit end of page list when decoding NFSv4 WRITE When processing an NFSv4 WRITE operation, argp->end should never point past the end of the data in the final page of the page list. Otherwise, nfsd4_decode_compound can walk into uninitialized memory. More critical, nfsd4_decode_write is failing to increment argp->pagelen when it increments argp->pagelist. This can cause later xdr decoders to assume more data is available than really is, which can cause server crashes on malformed requests. Signed-off-by: Chuck Lever Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20fbcab97753..5f940d2a136b 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -144,7 +144,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp) argp->p = page_address(argp->pagelist[0]); argp->pagelist++; if (argp->pagelen < PAGE_SIZE) { - argp->end = argp->p + (argp->pagelen>>2); + argp->end = argp->p + XDR_QUADLEN(argp->pagelen); argp->pagelen = 0; } else { argp->end = argp->p + (PAGE_SIZE>>2); @@ -1279,9 +1279,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) argp->pagelen -= pages * PAGE_SIZE; len -= pages * PAGE_SIZE; - argp->p = (__be32 *)page_address(argp->pagelist[0]); - argp->pagelist++; - argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE); + next_decode_page(argp); } argp->p += XDR_QUADLEN(len); -- cgit v1.2.3 From fffa281b48a91ad6dac1a18c5907ece58fa3879b Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 25 Aug 2017 15:55:36 -0700 Subject: dax: fix deadlock due to misaligned PMD faults In DAX there are two separate places where the 2MiB range of a PMD is defined. The first is in the page tables, where a PMD mapping inserted for a given address spans from (vmf->address & PMD_MASK) to ((vmf->address & PMD_MASK) + PMD_SIZE - 1). That is, from the 2MiB boundary below the address to the 2MiB boundary above the address. So, for example, a fault at address 3MiB (0x30 0000) falls within the PMD that ranges from 2MiB (0x20 0000) to 4MiB (0x40 0000). The second PMD range is in the mapping->page_tree, where a given file offset is covered by a radix tree entry that spans from one 2MiB aligned file offset to another 2MiB aligned file offset. So, for example, the file offset for 3MiB (pgoff 768) falls within the PMD range for the order 9 radix tree entry that ranges from 2MiB (pgoff 512) to 4MiB (pgoff 1024). This system works so long as the addresses and file offsets for a given mapping both have the same offsets relative to the start of each PMD. Consider the case where the starting address for a given file isn't 2MiB aligned - say our faulting address is 3 MiB (0x30 0000), but that corresponds to the beginning of our file (pgoff 0). Now all the PMDs in the mapping are misaligned so that the 2MiB range defined in the page tables never matches up with the 2MiB range defined in the radix tree. The current code notices this case for DAX faults to storage with the following test in dax_pmd_insert_mapping(): if (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR) goto unlock_fallback; This test makes sure that the pfn we get from the driver is 2MiB aligned, and relies on the assumption that the 2MiB alignment of the pfn we get back from the driver matches the 2MiB alignment of the faulting address. However, faults to holes were not checked and we could hit the problem described above. This was reported in response to the NVML nvml/src/test/pmempool_sync TEST5: $ cd nvml/src/test/pmempool_sync $ make TEST5 You can grab NVML here: https://github.com/pmem/nvml/ The dmesg warning you see when you hit this error is: WARNING: CPU: 13 PID: 2900 at fs/dax.c:641 dax_insert_mapping_entry+0x2df/0x310 Where we notice in dax_insert_mapping_entry() that the radix tree entry we are about to replace doesn't match the locked entry that we had previously inserted into the tree. This happens because the initial insertion was done in grab_mapping_entry() using a pgoff calculated from the faulting address (vmf->address), and the replacement in dax_pmd_load_hole() => dax_insert_mapping_entry() is done using vmf->pgoff. In our failure case those two page offsets (one calculated from vmf->address, one using vmf->pgoff) point to different order 9 radix tree entries. This failure case can result in a deadlock because the radix tree unlock also happens on the pgoff calculated from vmf->address. This means that the locked radix tree entry that we swapped in to the tree in dax_insert_mapping_entry() using vmf->pgoff is never unlocked, so all future faults to that 2MiB range will block forever. Fix this by validating that the faulting address's PMD offset matches the PMD offset from the start of the file. This check is done at the very beginning of the fault and covers faults that would have mapped to storage as well as faults to holes. I left the COLOUR check in dax_pmd_insert_mapping() in place in case we ever hit the insanity condition where the alignment of the pfn we get from the driver doesn't match the alignment of the userspace address. Link: http://lkml.kernel.org/r/20170822222436.18926-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Reported-by: "Slusarz, Marcin" Reviewed-by: Jan Kara Cc: Alexander Viro Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 306c2b603fb8..865d42c63e23 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1383,6 +1383,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, trace_dax_pmd_fault(inode, vmf, max_pgoff, 0); + /* + * Make sure that the faulting address's PMD offset (color) matches + * the PMD offset from the start of the file. This is necessary so + * that a PMD range in the page table overlaps exactly with a PMD + * range in the radix tree. + */ + if ((vmf->pgoff & PG_PMD_COLOUR) != + ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR)) + goto fallback; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) goto fallback; -- cgit v1.2.3 From 79de3cbe9a974e03a02b71da80da9ee0eb15a2d0 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Wed, 23 Aug 2017 22:37:00 +0200 Subject: fs/select: Fix memory corruption in compat_get_fd_set() Commit 464d62421cb8 ("select: switch compat_{get,put}_fd_set() to compat_{get,put}_bitmap()") changed the calculation on how many bytes need to be zeroed when userspace handed over a NULL pointer for a fdset array in the select syscall. The calculation was changed in compat_get_fd_set() wrongly from memset(fdset, 0, ((nr + 1) & ~1)*sizeof(compat_ulong_t)); to memset(fdset, 0, ALIGN(nr, BITS_PER_LONG)); The ALIGN(nr, BITS_PER_LONG) calculates the number of _bits_ which need to be zeroed in the target fdset array (rounded up to the next full bits for an unsigned long). But the memset() call expects the number of _bytes_ to be zeroed. This leads to clearing more memory than wanted (on the stack area or even at kmalloc()ed memory areas) and to random kernel crashes as we have seen them on the parisc platform. The correct change should have been memset(fdset, 0, (ALIGN(nr, BITS_PER_LONG) / BITS_PER_LONG) * BYTES_PER_LONG); which is the same as can be archieved with a call to zero_fd_set(nr, fdset). Fixes: 464d62421cb8 ("select: switch compat_{get,put}_fd_set() to compat_{get,put}_bitmap()" Acked-by:: Al Viro Signed-off-by: Helge Deller Signed-off-by: Linus Torvalds --- fs/select.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/select.c b/fs/select.c index 9d5f15ed87fe..c6362e38ae92 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1164,11 +1164,7 @@ int compat_get_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, if (ufdset) { return compat_get_bitmap(fdset, ufdset, nr); } else { - /* Tricky, must clear full unsigned long in the - * kernel fdset at the end, ALIGN makes sure that - * actually happens. - */ - memset(fdset, 0, ALIGN(nr, BITS_PER_LONG)); + zero_fd_set(nr, fdset); return 0; } } -- cgit v1.2.3 From 9e37b1784f2be9397a903307574ee565bbadfd75 Mon Sep 17 00:00:00 2001 From: Pavel Shilovsky Date: Thu, 24 Aug 2017 15:16:40 -0700 Subject: CIFS: Fix maximum SMB2 header size Currently the maximum size of SMB2/3 header is set incorrectly which leads to hanging of directory listing operations on encrypted SMB3 connections. Fix this by setting the maximum size to 170 bytes that is calculated as RFC1002 length field size (4) + transform header size (52) + SMB2 header size (64) + create response size (56). Cc: Signed-off-by: Pavel Shilovsky Signed-off-by: Steve French Acked-by: Sachin Prabhu --- fs/cifs/smb2pdu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h index 18700fd25a0b..2826882c81d1 100644 --- a/fs/cifs/smb2pdu.h +++ b/fs/cifs/smb2pdu.h @@ -84,8 +84,8 @@ #define NUMBER_OF_SMB2_COMMANDS 0x0013 -/* BB FIXME - analyze following length BB */ -#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */ +/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */ +#define MAX_SMB2_HDR_SIZE 0x00b0 #define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) #define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd) -- cgit v1.2.3 From 6e3c1529c39e92ed64ca41d53abadabbaa1d5393 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 27 Aug 2017 16:56:08 -0500 Subject: CIFS: remove endian related sparse warning Recent patch had an endian warning ie cifs: return ENAMETOOLONG for overlong names in cifs_open()/cifs_lookup() Signed-off-by: Steve French CC: Ronnie Sahlberg CC: Stable Acked-by: Pavel Shilovsky --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 569d3fb736be..e702d48bd023 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -205,7 +205,7 @@ check_name(struct dentry *direntry, struct cifs_tcon *tcon) int i; if (unlikely(direntry->d_name.len > - tcon->fsAttrInfo.MaxPathNameComponentLength)) + le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength))) return -ENAMETOOLONG; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { -- cgit v1.2.3 From ddef7ed2b5cbafae692d1d580bb5a07808926a9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jul 2017 18:58:37 +0200 Subject: annotate RWF_... flags [AV: added missing annotations in syscalls.h/compat.h] Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/nfsd/vfs.c | 2 +- fs/read_write.c | 50 +++++++++++++++++++++++++------------------------- 2 files changed, 26 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 38d0383dc7f9..bc69d40c4e8b 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -969,7 +969,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, int use_wgather; loff_t pos = offset; unsigned int pflags = current->flags; - int flags = 0; + rwf_t flags = 0; if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* diff --git a/fs/read_write.c b/fs/read_write.c index 0cc7033aa413..61b58c7b6531 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -33,7 +33,7 @@ const struct file_operations generic_ro_fops = { EXPORT_SYMBOL(generic_ro_fops); -static inline int unsigned_offsets(struct file *file) +static inline bool unsigned_offsets(struct file *file) { return file->f_mode & FMODE_UNSIGNED_OFFSET; } @@ -633,7 +633,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) EXPORT_SYMBOL(iov_shorten); static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, int type, int flags) + loff_t *ppos, int type, rwf_t flags) { struct kiocb kiocb; ssize_t ret; @@ -655,7 +655,7 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, /* Do it by hand, with file-ops */ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, - loff_t *ppos, int type, int flags) + loff_t *ppos, int type, rwf_t flags) { ssize_t ret = 0; @@ -871,7 +871,7 @@ out: #endif static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, - loff_t *pos, int flags) + loff_t *pos, rwf_t flags) { size_t tot_len; ssize_t ret = 0; @@ -899,7 +899,7 @@ out: } ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags) + rwf_t flags) { if (!file->f_op->read_iter) return -EINVAL; @@ -908,7 +908,7 @@ ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, EXPORT_SYMBOL(vfs_iter_read); static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, - loff_t *pos, int flags) + loff_t *pos, rwf_t flags) { size_t tot_len; ssize_t ret = 0; @@ -935,7 +935,7 @@ static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, } ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, - int flags) + rwf_t flags) { if (!file->f_op->write_iter) return -EINVAL; @@ -944,7 +944,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, EXPORT_SYMBOL(vfs_iter_write); ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -962,7 +962,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_readv); ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -981,7 +981,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, int flags) + unsigned long vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; @@ -1001,7 +1001,7 @@ static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, } static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, int flags) + unsigned long vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; @@ -1027,7 +1027,7 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) } static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret = -EBADF; @@ -1050,7 +1050,7 @@ static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, } static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret = -EBADF; @@ -1094,7 +1094,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, - int, flags) + rwf_t, flags) { loff_t pos = pos_from_hilo(pos_h, pos_l); @@ -1114,7 +1114,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, - int, flags) + rwf_t, flags) { loff_t pos = pos_from_hilo(pos_h, pos_l); @@ -1127,7 +1127,7 @@ SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, #ifdef CONFIG_COMPAT static size_t compat_readv(struct file *file, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -1147,7 +1147,7 @@ static size_t compat_readv(struct file *file, static size_t do_compat_readv(compat_ulong_t fd, const struct compat_iovec __user *vec, - compat_ulong_t vlen, int flags) + compat_ulong_t vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret; @@ -1173,7 +1173,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, static long do_compat_preadv64(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret; @@ -1211,7 +1211,7 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos, int, flags) + unsigned long, vlen, loff_t, pos, rwf_t, flags) { return do_compat_preadv64(fd, vec, vlen, pos, flags); } @@ -1220,7 +1220,7 @@ COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high, - int, flags) + rwf_t, flags) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; @@ -1232,7 +1232,7 @@ COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, static size_t compat_writev(struct file *file, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t *pos, int flags) + unsigned long vlen, loff_t *pos, rwf_t flags) { struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; @@ -1254,7 +1254,7 @@ static size_t compat_writev(struct file *file, static size_t do_compat_writev(compat_ulong_t fd, const struct compat_iovec __user* vec, - compat_ulong_t vlen, int flags) + compat_ulong_t vlen, rwf_t flags) { struct fd f = fdget_pos(fd); ssize_t ret; @@ -1279,7 +1279,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, static long do_compat_pwritev64(unsigned long fd, const struct compat_iovec __user *vec, - unsigned long vlen, loff_t pos, int flags) + unsigned long vlen, loff_t pos, rwf_t flags) { struct fd f; ssize_t ret; @@ -1317,7 +1317,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos, int, flags) + unsigned long, vlen, loff_t, pos, rwf_t, flags) { return do_compat_pwritev64(fd, vec, vlen, pos, flags); } @@ -1325,7 +1325,7 @@ COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags) + compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; -- cgit v1.2.3 From dd2bc473482eedc60c29cf00ad12568ce40ce511 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Fri, 4 Aug 2017 11:22:31 +0800 Subject: ceph: fix readpage from fscache ceph_readpage() unlocks page prematurely prematurely in the case that page is reading from fscache. Caller of readpage expects that page is uptodate when it get unlocked. So page shoule get locked by completion callback of fscache_read_or_alloc_pages() Cc: stable@vger.kernel.org # 4.1+, needs backporting for < 4.7 Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/addr.c | 24 +++++++++++++++--------- fs/ceph/cache.c | 12 +++--------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 50836280a6f8..1bc709fe330a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -189,7 +189,7 @@ static int ceph_releasepage(struct page *page, gfp_t g) /* * read a single page, without unlocking it. */ -static int readpage_nounlock(struct file *filp, struct page *page) +static int ceph_do_readpage(struct file *filp, struct page *page) { struct inode *inode = file_inode(filp); struct ceph_inode_info *ci = ceph_inode(inode); @@ -219,7 +219,7 @@ static int readpage_nounlock(struct file *filp, struct page *page) err = ceph_readpage_from_fscache(inode, page); if (err == 0) - goto out; + return -EINPROGRESS; dout("readpage inode %p file %p page %p index %lu\n", inode, filp, page, page->index); @@ -249,8 +249,11 @@ out: static int ceph_readpage(struct file *filp, struct page *page) { - int r = readpage_nounlock(filp, page); - unlock_page(page); + int r = ceph_do_readpage(filp, page); + if (r != -EINPROGRESS) + unlock_page(page); + else + r = 0; return r; } @@ -1237,7 +1240,7 @@ retry_locked: goto retry_locked; r = writepage_nounlock(page, NULL); if (r < 0) - goto fail_nosnap; + goto fail_unlock; goto retry_locked; } @@ -1265,11 +1268,14 @@ retry_locked: } /* we need to read it. */ - r = readpage_nounlock(file, page); - if (r < 0) - goto fail_nosnap; + r = ceph_do_readpage(file, page); + if (r < 0) { + if (r == -EINPROGRESS) + return -EAGAIN; + goto fail_unlock; + } goto retry_locked; -fail_nosnap: +fail_unlock: unlock_page(page); return r; } diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index fd1172823f86..337f88673ed9 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -297,13 +297,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp) } } -static void ceph_vfs_readpage_complete(struct page *page, void *data, int error) -{ - if (!error) - SetPageUptodate(page); -} - -static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error) +static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error) { if (!error) SetPageUptodate(page); @@ -331,7 +325,7 @@ int ceph_readpage_from_fscache(struct inode *inode, struct page *page) return -ENOBUFS; ret = fscache_read_or_alloc_page(ci->fscache, page, - ceph_vfs_readpage_complete, NULL, + ceph_readpage_from_fscache_complete, NULL, GFP_KERNEL); switch (ret) { @@ -360,7 +354,7 @@ int ceph_readpages_from_fscache(struct inode *inode, return -ENOBUFS; ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages, - ceph_vfs_readpage_complete_unlock, + ceph_readpage_from_fscache_complete, NULL, mapping_gfp_mask(mapping)); switch (ret) { -- cgit v1.2.3 From a4d1a885251382250ec315482bdd8ca52dd61e6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Thu, 31 Aug 2017 17:17:26 -0400 Subject: dax: update to new mmu_notifier semantic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace all mmu_notifier_invalidate_page() calls by *_invalidate_range() and make sure it is bracketed by calls to *_invalidate_range_start()/end(). Note that because we can not presume the pmd value or pte value we have to assume the worst and unconditionaly report an invalidation as happening. Signed-off-by: Jérôme Glisse Cc: Dan Williams Cc: Ross Zwisler Cc: Bernhard Held Cc: Adam Borowski Cc: Andrea Arcangeli Cc: Radim Krčmář Cc: Wanpeng Li Cc: Paolo Bonzini Cc: Takashi Iwai Cc: Nadav Amit Cc: Mike Galbraith Cc: Kirill A. Shutemov Cc: axie Cc: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dax.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/dax.c b/fs/dax.c index 865d42c63e23..ab925dc6647a 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -646,11 +646,10 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pte_t pte, *ptep = NULL; pmd_t *pmdp = NULL; spinlock_t *ptl; - bool changed; i_mmap_lock_read(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, index, index) { - unsigned long address; + unsigned long address, start, end; cond_resched(); @@ -658,8 +657,13 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, continue; address = pgoff_address(index, vma); - changed = false; - if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl)) + + /* + * Note because we provide start/end to follow_pte_pmd it will + * call mmu_notifier_invalidate_range_start() on our behalf + * before taking any lock. + */ + if (follow_pte_pmd(vma->vm_mm, address, &start, &end, &ptep, &pmdp, &ptl)) continue; if (pmdp) { @@ -676,7 +680,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping, pmd = pmd_wrprotect(pmd); pmd = pmd_mkclean(pmd); set_pmd_at(vma->vm_mm, address, pmdp, pmd); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pmd: spin_unlock(ptl); #endif @@ -691,13 +695,12 @@ unlock_pmd: pte = pte_wrprotect(pte); pte = pte_mkclean(pte); set_pte_at(vma->vm_mm, address, ptep, pte); - changed = true; + mmu_notifier_invalidate_range(vma->vm_mm, start, end); unlock_pte: pte_unmap_unlock(ptep, ptl); } - if (changed) - mmu_notifier_invalidate_page(vma->vm_mm, address); + mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); } i_mmap_unlock_read(mapping); } -- cgit v1.2.3 From c227390c91a355300f47f9bef0aefbdfaaca1500 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Thu, 31 Aug 2017 16:46:59 -0500 Subject: jfs should use MAX_LFS_FILESIZE when calculating s_maxbytes jfs had previously avoided the use of MAX_LFS_FILESIZE because it hadn't accounted for the whole 32-bit index range on 32-bit systems. That has been fixed by commit 0cc3b0ec23ce ("Clarify (and fix) MAX_LFS_FILESIZE macros"), so we can simplify the code now. Suggested by Andreas Dilger. Signed-off-by: Dave Kleikamp Reviewed-by: Andreas Dilger Cc: jfs-discussion@lists.sourceforge.net Signed-off-by: Linus Torvalds --- fs/jfs/super.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 78b41e1d5c67..60726ae7cf26 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -619,16 +619,10 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) if (!sb->s_root) goto out_no_root; - /* logical blocks are represented by 40 bits in pxd_t, etc. */ - sb->s_maxbytes = ((u64) sb->s_blocksize) << 40; -#if BITS_PER_LONG == 32 - /* - * Page cache is indexed by long. - * I would use MAX_LFS_FILESIZE, but it's only half as big + /* logical blocks are represented by 40 bits in pxd_t, etc. + * and page cache is indexed by long */ - sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1, - (u64)sb->s_maxbytes); -#endif + sb->s_maxbytes = min(((loff_t)sb->s_blocksize) << 40, MAX_LFS_FILESIZE); sb->s_time_gran = 1; return 0; -- cgit v1.2.3 From 7e682f766f289887c5cbf7c0a1e4970103f01ac4 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 31 Aug 2017 21:34:24 -0500 Subject: Fix warning messages when mounting to older servers When mounting to older servers, such as Windows XP (or even Windows 7), the limited error messages that can be passed back to user space can get confusing since the default dialect has changed from SMB1 (CIFS) to more secure SMB3 dialect. Log additional information when the user chooses to use the default dialects and when the server does not support the dialect requested. Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Acked-by: Pavel Shilovsky --- fs/cifs/connect.c | 22 +++++++++++++++++++++- fs/cifs/smb2pdu.c | 7 ++++++- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 59647eb72c5f..83a8f52cd879 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1223,6 +1223,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, char *tmp_end, *value; char delim; bool got_ip = false; + bool got_version = false; unsigned short port = 0; struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr; @@ -1874,24 +1875,35 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, pr_warn("CIFS: server netbiosname longer than 15 truncated.\n"); break; case Opt_ver: + /* version of mount userspace tools, not dialect */ string = match_strdup(args); if (string == NULL) goto out_nomem; + /* If interface changes in mount.cifs bump to new ver */ if (strncasecmp(string, "1", 1) == 0) { + if (strlen(string) > 1) { + pr_warn("Bad mount helper ver=%s. Did " + "you want SMB1 (CIFS) dialect " + "and mean to type vers=1.0 " + "instead?\n", string); + goto cifs_parse_mount_err; + } /* This is the default */ break; } /* For all other value, error */ - pr_warn("CIFS: Invalid version specified\n"); + pr_warn("CIFS: Invalid mount helper version specified\n"); goto cifs_parse_mount_err; case Opt_vers: + /* protocol version (dialect) */ string = match_strdup(args); if (string == NULL) goto out_nomem; if (cifs_parse_smb_version(string, vol) != 0) goto cifs_parse_mount_err; + got_version = true; break; case Opt_sec: string = match_strdup(args); @@ -1973,6 +1985,14 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, else if (override_gid == 1) pr_notice("CIFS: ignoring forcegid mount option specified with no gid= option.\n"); + if (got_version == false) + pr_warn("No dialect specified on mount. Default has changed to " + "a more secure dialect, SMB3 (vers=3.0), from CIFS " + "(SMB1). To use the less secure SMB1 dialect to access " + "old servers which do not support SMB3 specify vers=1.0" + " on mount. For somewhat newer servers such as Windows " + "7 try vers=2.1.\n"); + kfree(mountdata_copy); return 0; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 97edb4d376cd..7aa67206f6da 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -514,7 +514,12 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) * No tcon so can't do * cifs_stats_inc(&tcon->stats.smb2_stats.smb2_com_fail[SMB2...]); */ - if (rc != 0) + if (rc == -EOPNOTSUPP) { + cifs_dbg(VFS, "Dialect not supported by server. Consider " + "specifying vers=1.0 or vers=2.1 on mount for accessing" + " older servers\n"); + goto neg_exit; + } else if (rc != 0) goto neg_exit; cifs_dbg(FYI, "mode 0x%x\n", rsp->SecurityMode); -- cgit v1.2.3 From 138e4ad67afd5c6c318b056b4d17c17f2c0ca5c0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 1 Sep 2017 18:55:33 +0200 Subject: epoll: fix race between ep_poll_callback(POLLFREE) and ep_free()/ep_remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The race was introduced by me in commit 971316f0503a ("epoll: ep_unregister_pollwait() can use the freed pwq->whead"). I did not realize that nothing can protect eventpoll after ep_poll_callback() sets ->whead = NULL, only whead->lock can save us from the race with ep_free() or ep_remove(). Move ->whead = NULL to the end of ep_poll_callback() and add the necessary barriers. TODO: cleanup the ewake/EPOLLEXCLUSIVE logic, it was confusing even before this patch. Hopefully this explains use-after-free reported by syzcaller: BUG: KASAN: use-after-free in debug_spin_lock_before ... _raw_spin_lock_irqsave+0x4a/0x60 kernel/locking/spinlock.c:159 ep_poll_callback+0x29f/0xff0 fs/eventpoll.c:1148 this is spin_lock(eventpoll->lock), ... Freed by task 17774: ... kfree+0xe8/0x2c0 mm/slub.c:3883 ep_free+0x22c/0x2a0 fs/eventpoll.c:865 Fixes: 971316f0503a ("epoll: ep_unregister_pollwait() can use the freed pwq->whead") Reported-by: 范龙飞 Cc: stable@vger.kernel.org Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e767e4389cb1..adbe328b957c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -600,8 +600,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq) wait_queue_head_t *whead; rcu_read_lock(); - /* If it is cleared by POLLFREE, it should be rcu-safe */ - whead = rcu_dereference(pwq->whead); + /* + * If it is cleared by POLLFREE, it should be rcu-safe. + * If we read NULL we need a barrier paired with + * smp_store_release() in ep_poll_callback(), otherwise + * we rely on whead->lock. + */ + whead = smp_load_acquire(&pwq->whead); if (whead) remove_wait_queue(whead, &pwq->wait); rcu_read_unlock(); @@ -1134,17 +1139,6 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v struct eventpoll *ep = epi->ep; int ewake = 0; - if ((unsigned long)key & POLLFREE) { - ep_pwq_from_wait(wait)->whead = NULL; - /* - * whead = NULL above can race with ep_remove_wait_queue() - * which can do another remove_wait_queue() after us, so we - * can't use __remove_wait_queue(). whead->lock is held by - * the caller. - */ - list_del_init(&wait->entry); - } - spin_lock_irqsave(&ep->lock, flags); ep_set_busy_poll_napi_id(epi); @@ -1228,10 +1222,26 @@ out_unlock: if (pwake) ep_poll_safewake(&ep->poll_wait); - if (epi->event.events & EPOLLEXCLUSIVE) - return ewake; + if (!(epi->event.events & EPOLLEXCLUSIVE)) + ewake = 1; + + if ((unsigned long)key & POLLFREE) { + /* + * If we race with ep_remove_wait_queue() it can miss + * ->whead = NULL and do another remove_wait_queue() after + * us, so we can't use __remove_wait_queue(). + */ + list_del_init(&wait->entry); + /* + * ->whead != NULL protects us from the race with ep_free() + * or ep_remove(), ep_remove_wait_queue() takes whead->lock + * held by the caller. Once we nullify it, nothing protects + * ep/epi or even wait. + */ + smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL); + } - return 1; + return ewake; } /* -- cgit v1.2.3