From 07bfa4415ab607e459b69bd86aa7e7602ce10b4f Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Mon, 23 Sep 2019 15:32:53 -0700 Subject: fat: work around race with userspace's read via blockdev while mounting If userspace reads the buffer via blockdev while mounting, sb_getblk()+modify can race with buffer read via blockdev. For example, FS userspace bh = sb_getblk() modify bh->b_data read ll_rw_block(bh) fill bh->b_data by on-disk data /* lost modified data by FS */ set_buffer_uptodate(bh) set_buffer_uptodate(bh) Userspace should not use the blockdev while mounting though, the udev seems to be already doing this. Although I think the udev should try to avoid this, workaround the race by small overhead. Link: http://lkml.kernel.org/r/87pnk7l3sw.fsf_-_@mail.parknet.co.jp Signed-off-by: OGAWA Hirofumi Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fat/dir.c | 13 +++++++++++-- fs/fat/fatent.c | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 1bda2ab6745b..814ad2c2ba80 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1100,8 +1100,11 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used, err = -ENOMEM; goto error; } + /* Avoid race with userspace read via bdev */ + lock_buffer(bhs[n]); memset(bhs[n]->b_data, 0, sb->s_blocksize); set_buffer_uptodate(bhs[n]); + unlock_buffer(bhs[n]); mark_buffer_dirty_inode(bhs[n], dir); n++; @@ -1158,6 +1161,8 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts) fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de = (struct msdos_dir_entry *)bhs[0]->b_data; + /* Avoid race with userspace read via bdev */ + lock_buffer(bhs[0]); /* filling the new directory slots ("." and ".." entries) */ memcpy(de[0].name, MSDOS_DOT, MSDOS_NAME); memcpy(de[1].name, MSDOS_DOTDOT, MSDOS_NAME); @@ -1180,6 +1185,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec64 *ts) de[0].size = de[1].size = 0; memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de)); set_buffer_uptodate(bhs[0]); + unlock_buffer(bhs[0]); mark_buffer_dirty_inode(bhs[0], dir); err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE); @@ -1237,11 +1243,14 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots, /* fill the directory entry */ copy = min(size, sb->s_blocksize); + /* Avoid race with userspace read via bdev */ + lock_buffer(bhs[n]); memcpy(bhs[n]->b_data, slots, copy); - slots += copy; - size -= copy; set_buffer_uptodate(bhs[n]); + unlock_buffer(bhs[n]); mark_buffer_dirty_inode(bhs[n], dir); + slots += copy; + size -= copy; if (!size) break; n++; diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 265983635f2b..3647c65a0f48 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -388,8 +388,11 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs, err = -ENOMEM; goto error; } + /* Avoid race with userspace read via bdev */ + lock_buffer(c_bh); memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize); set_buffer_uptodate(c_bh); + unlock_buffer(c_bh); mark_buffer_dirty_inode(c_bh, sbi->fat_inode); if (sb->s_flags & SB_SYNCHRONOUS) err = sync_dirty_buffer(c_bh); -- cgit v1.2.3 From 6e73fd25e2c7510b7dfadbaf701f31d4bff9c75b Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Mon, 23 Sep 2019 15:32:56 -0700 Subject: Revert "mm/z3fold.c: fix race between migration and destruction" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the original commit applied, z3fold_zpool_destroy() may get blocked on wait_event() for indefinite time. Revert this commit for the time being to get rid of this problem since the issue the original commit addresses is less severe. Link: http://lkml.kernel.org/r/20190910123142.7a9c8d2de4d0acbc0977c602@gmail.com Fixes: d776aaa9895eb6eb77 ("mm/z3fold.c: fix race between migration and destruction") Reported-by: Agustín Dall'Alba Signed-off-by: Vitaly Wool Cc: Vlastimil Babka Cc: Vitaly Wool Cc: Shakeel Butt Cc: Jonathan Adams Cc: Henry Burns Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 90 ------------------------------------------------------------- 1 file changed, 90 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index 75b7962439ff..ed19d98c9dcd 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -146,8 +145,6 @@ struct z3fold_header { * @release_wq: workqueue for safe page release * @work: work_struct for safe page release * @inode: inode for z3fold pseudo filesystem - * @destroying: bool to stop migration once we start destruction - * @isolated: int to count the number of pages currently in isolation * * This structure is allocated at pool creation time and maintains metadata * pertaining to a particular z3fold pool. @@ -166,11 +163,8 @@ struct z3fold_pool { const struct zpool_ops *zpool_ops; struct workqueue_struct *compact_wq; struct workqueue_struct *release_wq; - struct wait_queue_head isolate_wait; struct work_struct work; struct inode *inode; - bool destroying; - int isolated; }; /* @@ -775,7 +769,6 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp, goto out_c; spin_lock_init(&pool->lock); spin_lock_init(&pool->stale_lock); - init_waitqueue_head(&pool->isolate_wait); pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2); if (!pool->unbuddied) goto out_pool; @@ -815,15 +808,6 @@ out: return NULL; } -static bool pool_isolated_are_drained(struct z3fold_pool *pool) -{ - bool ret; - - spin_lock(&pool->lock); - ret = pool->isolated == 0; - spin_unlock(&pool->lock); - return ret; -} /** * z3fold_destroy_pool() - destroys an existing z3fold pool * @pool: the z3fold pool to be destroyed @@ -833,22 +817,6 @@ static bool pool_isolated_are_drained(struct z3fold_pool *pool) static void z3fold_destroy_pool(struct z3fold_pool *pool) { kmem_cache_destroy(pool->c_handle); - /* - * We set pool-> destroying under lock to ensure that - * z3fold_page_isolate() sees any changes to destroying. This way we - * avoid the need for any memory barriers. - */ - - spin_lock(&pool->lock); - pool->destroying = true; - spin_unlock(&pool->lock); - - /* - * We need to ensure that no pages are being migrated while we destroy - * these workqueues, as migration can queue work on either of the - * workqueues. - */ - wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool)); /* * We need to destroy pool->compact_wq before pool->release_wq, @@ -1339,28 +1307,6 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool) return atomic64_read(&pool->pages_nr); } -/* - * z3fold_dec_isolated() expects to be called while pool->lock is held. - */ -static void z3fold_dec_isolated(struct z3fold_pool *pool) -{ - assert_spin_locked(&pool->lock); - VM_BUG_ON(pool->isolated <= 0); - pool->isolated--; - - /* - * If we have no more isolated pages, we have to see if - * z3fold_destroy_pool() is waiting for a signal. - */ - if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait)) - wake_up_all(&pool->isolate_wait); -} - -static void z3fold_inc_isolated(struct z3fold_pool *pool) -{ - pool->isolated++; -} - static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) { struct z3fold_header *zhdr; @@ -1387,34 +1333,6 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) spin_lock(&pool->lock); if (!list_empty(&page->lru)) list_del(&page->lru); - /* - * We need to check for destruction while holding pool->lock, as - * otherwise destruction could see 0 isolated pages, and - * proceed. - */ - if (unlikely(pool->destroying)) { - spin_unlock(&pool->lock); - /* - * If this page isn't stale, somebody else holds a - * reference to it. Let't drop our refcount so that they - * can call the release logic. - */ - if (unlikely(kref_put(&zhdr->refcount, - release_z3fold_page_locked))) { - /* - * If we get here we have kref problems, so we - * should freak out. - */ - WARN(1, "Z3fold is experiencing kref problems\n"); - z3fold_page_unlock(zhdr); - return false; - } - z3fold_page_unlock(zhdr); - return false; - } - - - z3fold_inc_isolated(pool); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); return true; @@ -1483,10 +1401,6 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work); - spin_lock(&pool->lock); - z3fold_dec_isolated(pool); - spin_unlock(&pool->lock); - page_mapcount_reset(page); put_page(page); return 0; @@ -1506,14 +1420,10 @@ static void z3fold_page_putback(struct page *page) INIT_LIST_HEAD(&page->lru); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); - spin_lock(&pool->lock); - z3fold_dec_isolated(pool); - spin_unlock(&pool->lock); return; } spin_lock(&pool->lock); list_add(&page->lru, &pool->lru); - z3fold_dec_isolated(pool); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); } -- cgit v1.2.3 From 710ec38b0f633ab3e2581f07a73442d809e28ab0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Sep 2019 15:32:59 -0700 Subject: mm: add dummy can_do_mlock() helper On kernels without CONFIG_MMU, we get a link error for the siw driver: drivers/infiniband/sw/siw/siw_mem.o: In function `siw_umem_get': siw_mem.c:(.text+0x4c8): undefined reference to `can_do_mlock' This is probably not the only driver that needs the function and could otherwise build correctly without CONFIG_MMU, so add a dummy variant that always returns false. Link: http://lkml.kernel.org/r/20190909204201.931830-1-arnd@arndb.de Fixes: 2251334dcac9 ("rdma/siw: application buffer management") Signed-off-by: Arnd Bergmann Suggested-by: Jason Gunthorpe Acked-by: Michal Hocko Cc: Bernard Metzler Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7cf955feb823..6e79b3df1582 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1405,7 +1405,11 @@ extern void pagefault_out_of_memory(void); extern void show_free_areas(unsigned int flags, nodemask_t *nodemask); +#ifdef CONFIG_MMU extern bool can_do_mlock(void); +#else +static inline bool can_do_mlock(void) { return false; } +#endif extern int user_shm_lock(size_t, struct user_struct *); extern void user_shm_unlock(size_t, struct user_struct *); -- cgit v1.2.3 From 3f9d2b5766aea06042630ac60b7316fd0cebf06f Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Mon, 23 Sep 2019 15:33:02 -0700 Subject: z3fold: fix retry mechanism in page reclaim z3fold_page_reclaim()'s retry mechanism is broken: on a second iteration it will have zhdr from the first one so that zhdr is no longer in line with struct page. That leads to crashes when the system is stressed. Fix that by moving zhdr assignment up. While at it, protect against using already freed handles by using own local slots structure in z3fold_page_reclaim(). Link: http://lkml.kernel.org/r/20190908162919.830388dc7404d1e2c80f4095@gmail.com Signed-off-by: Vitaly Wool Reported-by: Markus Linnala Reported-by: Chris Murphy Reported-by: Agustin Dall'Alba Cc: "Maciej S. Szmigiero" Cc: Shakeel Butt Cc: Henry Burns Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index ed19d98c9dcd..d637d9ee005c 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -366,9 +366,10 @@ static inline int __idx(struct z3fold_header *zhdr, enum buddy bud) * Encodes the handle of a particular buddy within a z3fold page * Pool lock should be held as this function accesses first_num */ -static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) +static unsigned long __encode_handle(struct z3fold_header *zhdr, + struct z3fold_buddy_slots *slots, + enum buddy bud) { - struct z3fold_buddy_slots *slots; unsigned long h = (unsigned long)zhdr; int idx = 0; @@ -385,11 +386,15 @@ static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) if (bud == LAST) h |= (zhdr->last_chunks << BUDDY_SHIFT); - slots = zhdr->slots; slots->slot[idx] = h; return (unsigned long)&slots->slot[idx]; } +static unsigned long encode_handle(struct z3fold_header *zhdr, enum buddy bud) +{ + return __encode_handle(zhdr, zhdr->slots, bud); +} + /* Returns the z3fold page where a given handle is stored */ static inline struct z3fold_header *handle_to_z3fold_header(unsigned long h) { @@ -624,6 +629,7 @@ static void do_compact_page(struct z3fold_header *zhdr, bool locked) } if (unlikely(PageIsolated(page) || + test_bit(PAGE_CLAIMED, &page->private) || test_bit(PAGE_STALE, &page->private))) { z3fold_page_unlock(zhdr); return; @@ -1100,6 +1106,7 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) struct z3fold_header *zhdr = NULL; struct page *page = NULL; struct list_head *pos; + struct z3fold_buddy_slots slots; unsigned long first_handle = 0, middle_handle = 0, last_handle = 0; spin_lock(&pool->lock); @@ -1118,16 +1125,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) /* this bit could have been set by free, in which case * we pass over to the next page in the pool. */ - if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) { + page = NULL; continue; + } - if (unlikely(PageIsolated(page))) + if (unlikely(PageIsolated(page))) { + clear_bit(PAGE_CLAIMED, &page->private); + page = NULL; continue; + } + zhdr = page_address(page); if (test_bit(PAGE_HEADLESS, &page->private)) break; - zhdr = page_address(page); if (!z3fold_page_trylock(zhdr)) { + clear_bit(PAGE_CLAIMED, &page->private); zhdr = NULL; continue; /* can't evict at this point */ } @@ -1145,26 +1158,30 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) if (!test_bit(PAGE_HEADLESS, &page->private)) { /* - * We need encode the handles before unlocking, since - * we can race with free that will set - * (first|last)_chunks to 0 + * We need encode the handles before unlocking, and + * use our local slots structure because z3fold_free + * can zero out zhdr->slots and we can't do much + * about that */ first_handle = 0; last_handle = 0; middle_handle = 0; if (zhdr->first_chunks) - first_handle = encode_handle(zhdr, FIRST); + first_handle = __encode_handle(zhdr, &slots, + FIRST); if (zhdr->middle_chunks) - middle_handle = encode_handle(zhdr, MIDDLE); + middle_handle = __encode_handle(zhdr, &slots, + MIDDLE); if (zhdr->last_chunks) - last_handle = encode_handle(zhdr, LAST); + last_handle = __encode_handle(zhdr, &slots, + LAST); /* * it's safe to unlock here because we hold a * reference to this page */ z3fold_page_unlock(zhdr); } else { - first_handle = encode_handle(zhdr, HEADLESS); + first_handle = __encode_handle(zhdr, &slots, HEADLESS); last_handle = middle_handle = 0; } @@ -1194,9 +1211,9 @@ next: spin_lock(&pool->lock); list_add(&page->lru, &pool->lru); spin_unlock(&pool->lock); + clear_bit(PAGE_CLAIMED, &page->private); } else { z3fold_page_lock(zhdr); - clear_bit(PAGE_CLAIMED, &page->private); if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) { atomic64_dec(&pool->pages_nr); @@ -1211,6 +1228,7 @@ next: list_add(&page->lru, &pool->lru); spin_unlock(&pool->lock); z3fold_page_unlock(zhdr); + clear_bit(PAGE_CLAIMED, &page->private); } /* We started off locked to we need to lock the pool back */ @@ -1315,7 +1333,8 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode) VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(PageIsolated(page), page); - if (test_bit(PAGE_HEADLESS, &page->private)) + if (test_bit(PAGE_HEADLESS, &page->private) || + test_bit(PAGE_CLAIMED, &page->private)) return false; zhdr = page_address(page); -- cgit v1.2.3 From 6279eb3dd7946c69346a3b98473ed13d3a44adb5 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Mon, 23 Sep 2019 15:33:05 -0700 Subject: kbuild: clean compressed initramfs image Since 9e3596b0c653 ("kbuild: initramfs cleanup, set target from Kconfig") "make clean" leaves behind compressed initramfs images. Example: $ make defconfig $ sed -i 's|CONFIG_INITRAMFS_SOURCE=""|CONFIG_INITRAMFS_SOURCE="/tmp/ir.cpio"|' .config $ make olddefconfig $ make -s $ make -s clean $ git clean -ndxf | grep initramfs Would remove usr/initramfs_data.cpio.gz clean rules do not have CONFIG_* context so they do not know which compression format was used. Thus they don't know which files to delete. Tell clean to delete all possible compression formats. Once patched usr/initramfs_data.cpio.gz and friends are deleted by "make clean". Link: http://lkml.kernel.org/r/20190722063251.55541-1-gthelen@google.com Fixes: 9e3596b0c653 ("kbuild: initramfs cleanup, set target from Kconfig") Signed-off-by: Greg Thelen Cc: Nicholas Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- usr/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/usr/Makefile b/usr/Makefile index 6a89eb019275..e6f7cb2f81db 100644 --- a/usr/Makefile +++ b/usr/Makefile @@ -11,6 +11,9 @@ datafile_y = initramfs_data.cpio$(suffix_y) datafile_d_y = .$(datafile_y).d AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)" +# clean rules do not have CONFIG_INITRAMFS_COMPRESSION. So clean up after all +# possible compression formats. +clean-files += initramfs_data.cpio* # Generate builtin.o based on initramfs_data.o obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o -- cgit v1.2.3 From bbd0f32721e28cc7097fa50afa96178896f9e33b Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 23 Sep 2019 15:33:08 -0700 Subject: ocfs2: use jbd2_inode dirty range scoping 6ba0e7dc64a5 ("jbd2: introduce jbd2_inode dirty range scoping") allow us scoping each of the inode dirty ranges associated with a given transaction, and ext4 already does this way. Now let's also use the newly introduced jbd2_inode dirty range scoping to prevent us from waiting forever when trying to complete a journal transaction in ocfs2. Link: http://lkml.kernel.org/r/1562977611-8412-1-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Ross Zwisler Reviewed-by: Changwei Ge Cc: "Theodore Ts'o" Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 5 ++++- fs/ocfs2/aops.c | 13 ++++++++++--- fs/ocfs2/file.c | 10 +++++++--- fs/ocfs2/journal.h | 11 +++++++---- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 0c335b51043d..f5d2bd15e0ca 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -6792,6 +6792,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, struct page *page, int zero, u64 *phys) { int ret, partial = 0; + loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from; + loff_t length = to - from; ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0); if (ret) @@ -6811,7 +6813,8 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, if (ret < 0) mlog_errno(ret); else if (ocfs2_should_order_data(inode)) { - ret = ocfs2_jbd2_file_inode(handle, inode); + ret = ocfs2_jbd2_inode_add_write(handle, inode, + start_byte, length); if (ret < 0) mlog_errno(ret); } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index a4c905d6b575..8de1c9d644f6 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -942,7 +942,8 @@ static void ocfs2_write_failure(struct inode *inode, if (tmppage && page_has_buffers(tmppage)) { if (ocfs2_should_order_data(inode)) - ocfs2_jbd2_file_inode(wc->w_handle, inode); + ocfs2_jbd2_inode_add_write(wc->w_handle, inode, + user_pos, user_len); block_commit_write(tmppage, from, to); } @@ -2023,8 +2024,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (page_has_buffers(tmppage)) { - if (handle && ocfs2_should_order_data(inode)) - ocfs2_jbd2_file_inode(handle, inode); + if (handle && ocfs2_should_order_data(inode)) { + loff_t start_byte = + ((loff_t)tmppage->index << PAGE_SHIFT) + + from; + loff_t length = to - from; + ocfs2_jbd2_inode_add_write(handle, inode, + start_byte, length); + } block_commit_write(tmppage, from, to); } } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4435df3e5adb..efe9988a5be4 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -706,7 +706,9 @@ leave: * Thus, we need to explicitly order the zeroed pages. */ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode, - struct buffer_head *di_bh) + struct buffer_head *di_bh, + loff_t start_byte, + loff_t length) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); handle_t *handle = NULL; @@ -722,7 +724,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode, goto out; } - ret = ocfs2_jbd2_file_inode(handle, inode); + ret = ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length); if (ret < 0) { mlog_errno(ret); goto out; @@ -761,7 +763,9 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT)); BUG_ON(abs_from & (inode->i_blkbits - 1)); - handle = ocfs2_zero_start_ordered_transaction(inode, di_bh); + handle = ocfs2_zero_start_ordered_transaction(inode, di_bh, + abs_from, + abs_to - abs_from); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index c0fe6ed08ab1..f37473c20a7b 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -232,8 +232,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) * ocfs2_journal_access_*() unless you intend to * manage the checksum by hand. * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. - * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before - * the current handle commits. + * ocfs2_jbd2_inode_add_write - Mark an inode with range so that its data goes + * out before the current handle commits. */ /* You must always start_trans with a number of buffs > 0, but it's @@ -603,9 +603,12 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, return credits; } -static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) +static inline int ocfs2_jbd2_inode_add_write(handle_t *handle, struct inode *inode, + loff_t start_byte, loff_t length) { - return jbd2_journal_inode_add_write(handle, &OCFS2_I(inode)->ip_jinode); + return jbd2_journal_inode_ranged_write(handle, + &OCFS2_I(inode)->ip_jinode, + start_byte, length); } static inline int ocfs2_begin_ordered_truncate(struct inode *inode, -- cgit v1.2.3 From 963abb9aebcdacf5c709a36da9ac0727a33671eb Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Mon, 23 Sep 2019 15:33:11 -0700 Subject: jbd2: remove jbd2_journal_inode_add_[write|wait] Since ext4/ocfs2 are using jbd2_inode dirty range scoping APIs now, jbd2_journal_inode_add_[write|wait] are not used any more, remove them. Link: http://lkml.kernel.org/r/1562977611-8412-2-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: Joseph Qi Reviewed-by: Ross Zwisler Acked-by: Changwei Ge Cc: Gang He Cc: Joel Becker Cc: Joseph Qi Cc: Jun Piao Cc: Junxiao Bi Cc: Mark Fasheh Cc: "Theodore Ts'o" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd2/journal.c | 2 -- fs/jbd2/transaction.c | 12 ------------ include/linux/jbd2.h | 2 -- 3 files changed, 16 deletions(-) diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 953990eb70a9..1c58859aa592 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -89,8 +89,6 @@ EXPORT_SYMBOL(jbd2_journal_blocks_per_page); EXPORT_SYMBOL(jbd2_journal_invalidatepage); EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers); EXPORT_SYMBOL(jbd2_journal_force_commit); -EXPORT_SYMBOL(jbd2_journal_inode_add_write); -EXPORT_SYMBOL(jbd2_journal_inode_add_wait); EXPORT_SYMBOL(jbd2_journal_inode_ranged_write); EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait); EXPORT_SYMBOL(jbd2_journal_init_jbd_inode); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index afc06daee5bb..bee8498d7792 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2622,18 +2622,6 @@ done: return 0; } -int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *jinode) -{ - return jbd2_journal_file_inode(handle, jinode, - JI_WRITE_DATA | JI_WAIT_DATA, 0, LLONG_MAX); -} - -int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *jinode) -{ - return jbd2_journal_file_inode(handle, jinode, JI_WAIT_DATA, 0, - LLONG_MAX); -} - int jbd2_journal_inode_ranged_write(handle_t *handle, struct jbd2_inode *jinode, loff_t start_byte, loff_t length) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index df03825ad1a1..603fbc4e2f70 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1410,8 +1410,6 @@ extern int jbd2_journal_clear_err (journal_t *); extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *); extern int jbd2_journal_force_commit(journal_t *); extern int jbd2_journal_force_commit_nested(journal_t *); -extern int jbd2_journal_inode_add_write(handle_t *handle, struct jbd2_inode *inode); -extern int jbd2_journal_inode_add_wait(handle_t *handle, struct jbd2_inode *inode); extern int jbd2_journal_inode_ranged_write(handle_t *handle, struct jbd2_inode *inode, loff_t start_byte, loff_t length); -- cgit v1.2.3 From 5e7a3ed9f1a60f17c165e1b73df6d6aebb211266 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 23 Sep 2019 15:33:15 -0700 Subject: ocfs2: further debugfs cleanups There is no need to check return value of debugfs_create functions, but the last sweep through ocfs missed a number of places where this was happening. There is also no need to save the individual dentries for the debugfs files, as everything is can just be removed at once when the directory is removed. By getting rid of the file dentries for the debugfs entries, a bit of local memory can be saved as well. [colin.king@canonical.com: ensure ret is set to zero before returning] Link: http://lkml.kernel.org/r/20190807121929.28918-1-colin.king@canonical.com Link: http://lkml.kernel.org/r/20190731132119.GA12603@kroah.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Colin Ian King Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Jia Guo Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/blockcheck.c | 26 +++++------ fs/ocfs2/cluster/heartbeat.c | 103 ++++++++++++------------------------------- fs/ocfs2/dlm/dlmcommon.h | 1 - fs/ocfs2/dlm/dlmdebug.c | 55 +++++------------------ fs/ocfs2/dlm/dlmdebug.h | 16 +------ fs/ocfs2/dlm/dlmdomain.c | 7 +-- fs/ocfs2/dlmglue.c | 20 +++------ fs/ocfs2/ocfs2.h | 3 -- fs/ocfs2/super.c | 10 ++--- 9 files changed, 61 insertions(+), 180 deletions(-) diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index 429e6a8359a5..eaf042feaf5e 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -231,14 +231,6 @@ static int blockcheck_u64_get(void *data, u64 *val) } DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n"); -static struct dentry *blockcheck_debugfs_create(const char *name, - struct dentry *parent, - u64 *value) -{ - return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value, - &blockcheck_fops); -} - static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) { if (stats) { @@ -250,16 +242,20 @@ static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats) static void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, struct dentry *parent) { - stats->b_debug_dir = debugfs_create_dir("blockcheck", parent); + struct dentry *dir; + + dir = debugfs_create_dir("blockcheck", parent); + stats->b_debug_dir = dir; + + debugfs_create_file("blocks_checked", S_IFREG | S_IRUSR, dir, + &stats->b_check_count, &blockcheck_fops); - blockcheck_debugfs_create("blocks_checked", stats->b_debug_dir, - &stats->b_check_count); + debugfs_create_file("checksums_failed", S_IFREG | S_IRUSR, dir, + &stats->b_failure_count, &blockcheck_fops); - blockcheck_debugfs_create("checksums_failed", stats->b_debug_dir, - &stats->b_failure_count); + debugfs_create_file("ecc_recoveries", S_IFREG | S_IRUSR, dir, + &stats->b_recover_count, &blockcheck_fops); - blockcheck_debugfs_create("ecc_recoveries", stats->b_debug_dir, - &stats->b_recover_count); } #else static inline void ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f1b613327ac8..a368350d4c27 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -225,10 +225,6 @@ struct o2hb_region { unsigned int hr_region_num; struct dentry *hr_debug_dir; - struct dentry *hr_debug_livenodes; - struct dentry *hr_debug_regnum; - struct dentry *hr_debug_elapsed_time; - struct dentry *hr_debug_pinned; struct o2hb_debug_buf *hr_db_livenodes; struct o2hb_debug_buf *hr_db_regnum; struct o2hb_debug_buf *hr_db_elapsed_time; @@ -1394,21 +1390,20 @@ void o2hb_exit(void) kfree(o2hb_db_failedregions); } -static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, - struct o2hb_debug_buf **db, int db_len, - int type, int size, int len, void *data) +static void o2hb_debug_create(const char *name, struct dentry *dir, + struct o2hb_debug_buf **db, int db_len, int type, + int size, int len, void *data) { *db = kmalloc(db_len, GFP_KERNEL); if (!*db) - return NULL; + return; (*db)->db_type = type; (*db)->db_size = size; (*db)->db_len = len; (*db)->db_data = data; - return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, - &o2hb_debug_fops); + debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, &o2hb_debug_fops); } static void o2hb_debug_init(void) @@ -1525,11 +1520,7 @@ static void o2hb_region_release(struct config_item *item) kfree(reg->hr_slots); - debugfs_remove(reg->hr_debug_livenodes); - debugfs_remove(reg->hr_debug_regnum); - debugfs_remove(reg->hr_debug_elapsed_time); - debugfs_remove(reg->hr_debug_pinned); - debugfs_remove(reg->hr_debug_dir); + debugfs_remove_recursive(reg->hr_debug_dir); kfree(reg->hr_db_livenodes); kfree(reg->hr_db_regnum); kfree(reg->hr_db_elapsed_time); @@ -1988,69 +1979,33 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group : NULL; } -static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) +static void o2hb_debug_region_init(struct o2hb_region *reg, + struct dentry *parent) { - int ret = -ENOMEM; + struct dentry *dir; - reg->hr_debug_dir = - debugfs_create_dir(config_item_name(®->hr_item), dir); - if (!reg->hr_debug_dir) { - mlog_errno(ret); - goto bail; - } + dir = debugfs_create_dir(config_item_name(®->hr_item), parent); + reg->hr_debug_dir = dir; - reg->hr_debug_livenodes = - o2hb_debug_create(O2HB_DEBUG_LIVENODES, - reg->hr_debug_dir, - &(reg->hr_db_livenodes), - sizeof(*(reg->hr_db_livenodes)), - O2HB_DB_TYPE_REGION_LIVENODES, - sizeof(reg->hr_live_node_bitmap), - O2NM_MAX_NODES, reg); - if (!reg->hr_debug_livenodes) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_LIVENODES, dir, &(reg->hr_db_livenodes), + sizeof(*(reg->hr_db_livenodes)), + O2HB_DB_TYPE_REGION_LIVENODES, + sizeof(reg->hr_live_node_bitmap), O2NM_MAX_NODES, + reg); - reg->hr_debug_regnum = - o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, - reg->hr_debug_dir, - &(reg->hr_db_regnum), - sizeof(*(reg->hr_db_regnum)), - O2HB_DB_TYPE_REGION_NUMBER, - 0, O2NM_MAX_NODES, reg); - if (!reg->hr_debug_regnum) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, dir, &(reg->hr_db_regnum), + sizeof(*(reg->hr_db_regnum)), + O2HB_DB_TYPE_REGION_NUMBER, 0, O2NM_MAX_NODES, reg); - reg->hr_debug_elapsed_time = - o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, - reg->hr_debug_dir, - &(reg->hr_db_elapsed_time), - sizeof(*(reg->hr_db_elapsed_time)), - O2HB_DB_TYPE_REGION_ELAPSED_TIME, - 0, 0, reg); - if (!reg->hr_debug_elapsed_time) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, dir, + &(reg->hr_db_elapsed_time), + sizeof(*(reg->hr_db_elapsed_time)), + O2HB_DB_TYPE_REGION_ELAPSED_TIME, 0, 0, reg); - reg->hr_debug_pinned = - o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, - reg->hr_debug_dir, - &(reg->hr_db_pinned), - sizeof(*(reg->hr_db_pinned)), - O2HB_DB_TYPE_REGION_PINNED, - 0, 0, reg); - if (!reg->hr_debug_pinned) { - mlog_errno(ret); - goto bail; - } + o2hb_debug_create(O2HB_DEBUG_REGION_PINNED, dir, &(reg->hr_db_pinned), + sizeof(*(reg->hr_db_pinned)), + O2HB_DB_TYPE_REGION_PINNED, 0, 0, reg); - ret = 0; -bail: - return ret; } static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, @@ -2106,11 +2061,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g if (ret) goto unregister_handler; - ret = o2hb_debug_region_init(reg, o2hb_debug_dir); - if (ret) { - config_item_put(®->hr_item); - goto unregister_handler; - } + o2hb_debug_region_init(reg, o2hb_debug_dir); return ®->hr_item; diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 69a429b625cc..aaf24548b02a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -142,7 +142,6 @@ struct dlm_ctxt atomic_t res_tot_count; atomic_t res_cur_count; - struct dlm_debug_ctxt *dlm_debug_ctxt; struct dentry *dlm_debugfs_subroot; /* NOTE: Next three are protected by dlm_domain_lock */ diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index a4b58ba99927..4d0b452012b2 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -853,67 +853,34 @@ static const struct file_operations debug_state_fops = { /* files in subroot */ void dlm_debug_init(struct dlm_ctxt *dlm) { - struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; - /* for dumping dlm_ctxt */ - dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_state_fops); + debugfs_create_file(DLM_DEBUGFS_DLM_STATE, S_IFREG|S_IRUSR, + dlm->dlm_debugfs_subroot, dlm, &debug_state_fops); /* for dumping lockres */ - dc->debug_lockres_dentry = - debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_lockres_fops); + debugfs_create_file(DLM_DEBUGFS_LOCKING_STATE, S_IFREG|S_IRUSR, + dlm->dlm_debugfs_subroot, dlm, &debug_lockres_fops); /* for dumping mles */ - dc->debug_mle_dentry = debugfs_create_file(DLM_DEBUGFS_MLE_STATE, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_mle_fops); + debugfs_create_file(DLM_DEBUGFS_MLE_STATE, S_IFREG|S_IRUSR, + dlm->dlm_debugfs_subroot, dlm, &debug_mle_fops); /* for dumping lockres on the purge list */ - dc->debug_purgelist_dentry = - debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, - S_IFREG|S_IRUSR, - dlm->dlm_debugfs_subroot, - dlm, &debug_purgelist_fops); -} - -void dlm_debug_shutdown(struct dlm_ctxt *dlm) -{ - struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; - - if (dc) { - debugfs_remove(dc->debug_purgelist_dentry); - debugfs_remove(dc->debug_mle_dentry); - debugfs_remove(dc->debug_lockres_dentry); - debugfs_remove(dc->debug_state_dentry); - kfree(dc); - dc = NULL; - } + debugfs_create_file(DLM_DEBUGFS_PURGE_LIST, S_IFREG|S_IRUSR, + dlm->dlm_debugfs_subroot, dlm, + &debug_purgelist_fops); } /* subroot - domain dir */ -int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) +void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { - dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), - GFP_KERNEL); - if (!dlm->dlm_debug_ctxt) { - mlog_errno(-ENOMEM); - return -ENOMEM; - } - dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, dlm_debugfs_root); - return 0; } void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { - debugfs_remove(dlm->dlm_debugfs_subroot); + debugfs_remove_recursive(dlm->dlm_debugfs_subroot); } /* debugfs root */ diff --git a/fs/ocfs2/dlm/dlmdebug.h b/fs/ocfs2/dlm/dlmdebug.h index 7d0c7c9013ce..f8fd8680a4b6 100644 --- a/fs/ocfs2/dlm/dlmdebug.h +++ b/fs/ocfs2/dlm/dlmdebug.h @@ -14,13 +14,6 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle); #ifdef CONFIG_DEBUG_FS -struct dlm_debug_ctxt { - struct dentry *debug_state_dentry; - struct dentry *debug_lockres_dentry; - struct dentry *debug_mle_dentry; - struct dentry *debug_purgelist_dentry; -}; - struct debug_lockres { int dl_len; char *dl_buf; @@ -29,9 +22,8 @@ struct debug_lockres { }; void dlm_debug_init(struct dlm_ctxt *dlm); -void dlm_debug_shutdown(struct dlm_ctxt *dlm); -int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); +void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm); void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm); void dlm_create_debugfs_root(void); @@ -42,12 +34,8 @@ void dlm_destroy_debugfs_root(void); static inline void dlm_debug_init(struct dlm_ctxt *dlm) { } -static inline void dlm_debug_shutdown(struct dlm_ctxt *dlm) -{ -} -static inline int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) +static inline void dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { - return 0; } static inline void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 7338b5d4647c..ee6f459f9770 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -387,7 +387,6 @@ static void dlm_destroy_dlm_worker(struct dlm_ctxt *dlm) static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm) { dlm_unregister_domain_handlers(dlm); - dlm_debug_shutdown(dlm); dlm_complete_thread(dlm); dlm_complete_recovery_thread(dlm); dlm_destroy_dlm_worker(dlm); @@ -1938,7 +1937,6 @@ bail: if (status) { dlm_unregister_domain_handlers(dlm); - dlm_debug_shutdown(dlm); dlm_complete_thread(dlm); dlm_complete_recovery_thread(dlm); dlm_destroy_dlm_worker(dlm); @@ -1992,9 +1990,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, dlm->key = key; dlm->node_num = o2nm_this_node(); - ret = dlm_create_debugfs_subroot(dlm); - if (ret < 0) - goto leave; + dlm_create_debugfs_subroot(dlm); spin_lock_init(&dlm->spinlock); spin_lock_init(&dlm->master_lock); @@ -2056,6 +2052,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, mlog(0, "context init: refcount %u\n", kref_read(&dlm->dlm_refs)); + ret = 0; leave: if (ret < 0 && dlm) { if (dlm->master_hash) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 14207234fa3d..ad594fef2ab0 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3012,8 +3012,6 @@ struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void) kref_init(&dlm_debug->d_refcnt); INIT_LIST_HEAD(&dlm_debug->d_lockres_tracking); - dlm_debug->d_locking_state = NULL; - dlm_debug->d_locking_filter = NULL; dlm_debug->d_filter_secs = 0; out: return dlm_debug; @@ -3282,27 +3280,19 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb) { struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; - dlm_debug->d_locking_state = debugfs_create_file("locking_state", - S_IFREG|S_IRUSR, - osb->osb_debug_root, - osb, - &ocfs2_dlm_debug_fops); + debugfs_create_file("locking_state", S_IFREG|S_IRUSR, + osb->osb_debug_root, osb, &ocfs2_dlm_debug_fops); - dlm_debug->d_locking_filter = debugfs_create_u32("locking_filter", - 0600, - osb->osb_debug_root, - &dlm_debug->d_filter_secs); + debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root, + &dlm_debug->d_filter_secs); } static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb) { struct ocfs2_dlm_debug *dlm_debug = osb->osb_dlm_debug; - if (dlm_debug) { - debugfs_remove(dlm_debug->d_locking_state); - debugfs_remove(dlm_debug->d_locking_filter); + if (dlm_debug) ocfs2_put_dlm_debug(dlm_debug); - } } int ocfs2_dlm_init(struct ocfs2_super *osb) diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index fddbbd60f434..9150cfa4df7d 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -223,8 +223,6 @@ struct ocfs2_orphan_scan { struct ocfs2_dlm_debug { struct kref d_refcnt; - struct dentry *d_locking_state; - struct dentry *d_locking_filter; u32 d_filter_secs; struct list_head d_lockres_tracking; }; @@ -401,7 +399,6 @@ struct ocfs2_super struct ocfs2_dlm_debug *osb_dlm_debug; struct dentry *osb_debug_root; - struct dentry *osb_ctxt; wait_queue_head_t recovery_event; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8b2f39506648..c81e86c62380 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1080,10 +1080,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->osb_debug_root = debugfs_create_dir(osb->uuid_str, ocfs2_debugfs_root); - osb->osb_ctxt = debugfs_create_file("fs_state", S_IFREG|S_IRUSR, - osb->osb_debug_root, - osb, - &ocfs2_osb_debug_fops); + debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, + osb, &ocfs2_osb_debug_fops); if (ocfs2_meta_ecc(osb)) ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, @@ -1861,8 +1859,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) kset_unregister(osb->osb_dev_kset); - debugfs_remove(osb->osb_ctxt); - /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); @@ -1918,7 +1914,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) ocfs2_dlm_shutdown(osb, hangup_needed); ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats); - debugfs_remove(osb->osb_debug_root); + debugfs_remove_recursive(osb->osb_debug_root); if (hangup_needed) ocfs2_cluster_hangup(osb->uuid_str, strlen(osb->uuid_str)); -- cgit v1.2.3 From 3dd21cdbefa97cf3ec5559f9ab214af4d8ae2096 Mon Sep 17 00:00:00 2001 From: Guozhonghua Date: Mon, 23 Sep 2019 15:33:18 -0700 Subject: ocfs2: remove unused ocfs2_calc_tree_trunc_credits() ocfs2_calc_tree_trunc_credits() is not called anywhere. Link: http://lkml.kernel.org/r/71604351584F6A4EBAE558C676F37CA4014FC2050F@H3CMLB12-EX.srv.huawei-3com.com Signed-off-by: guozhonghua Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/journal.h | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index f37473c20a7b..6d8dd3308aaf 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -575,34 +575,6 @@ static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb) return ocfs2_extent_recs_per_gd(sb); } -static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, - unsigned int clusters_to_del, - struct ocfs2_dinode *fe, - struct ocfs2_extent_list *last_el) -{ - /* for dinode + all headers in this pass + update to next leaf */ - u16 next_free = le16_to_cpu(last_el->l_next_free_rec); - u16 tree_depth = le16_to_cpu(fe->id2.i_list.l_tree_depth); - int credits = 1 + tree_depth + 1; - int i; - - i = next_free - 1; - BUG_ON(i < 0); - - /* We may be deleting metadata blocks, so metadata alloc dinode + - one desc. block for each possible delete. */ - if (tree_depth && next_free == 1 && - ocfs2_rec_clusters(last_el, &last_el->l_recs[i]) == clusters_to_del) - credits += 1 + tree_depth; - - /* update to the truncate log. */ - credits += OCFS2_TRUNCATE_LOG_UPDATE; - - credits += ocfs2_quota_trans_credits(sb); - - return credits; -} - static inline int ocfs2_jbd2_inode_add_write(handle_t *handle, struct inode *inode, loff_t start_byte, loff_t length) { -- cgit v1.2.3 From bf5a52647963cd08b2e999f77b16739d6300ebce Mon Sep 17 00:00:00 2001 From: Guozhonghua Date: Mon, 23 Sep 2019 15:33:21 -0700 Subject: ocfs2: remove unused ocfs2_orphan_scan_exit() declaration ocfs2_orphan_scan_exit() is declared but not implemented. Also perform a minor cleanup in ocfs2_link_credits() Link: http://lkml.kernel.org/r/71604351584F6A4EBAE558C676F37CA4014FC208AC@H3CMLB12-EX.srv.huawei-3com.com Signed-off-by: guozhonghua Reviewed-by: Andrew Morton Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/journal.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 6d8dd3308aaf..3103ba7f97a2 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -144,7 +144,6 @@ static inline void ocfs2_ci_set_new(struct ocfs2_super *osb, void ocfs2_orphan_scan_init(struct ocfs2_super *osb); void ocfs2_orphan_scan_start(struct ocfs2_super *osb); void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); -void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); void ocfs2_complete_recovery(struct work_struct *work); void ocfs2_wait_for_recovery(struct ocfs2_super *osb); @@ -441,7 +440,7 @@ static inline int ocfs2_mknod_credits(struct super_block *sb, int is_dir, * previous dirblock update in the free list */ static inline int ocfs2_link_credits(struct super_block *sb) { - return 2*OCFS2_INODE_UPDATE_CREDITS + 4 + + return 2 * OCFS2_INODE_UPDATE_CREDITS + 4 + ocfs2_quota_trans_credits(sb); } -- cgit v1.2.3 From 225dcadf8ee869d4429373ef04fdce49ac3fc266 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 23 Sep 2019 15:33:24 -0700 Subject: fs/ocfs2/namei.c: remove set but not used variables Fixes gcc '-Wunused-but-set-variable' warning: fs/ocfs2/namei.c: In function ocfs2_create_inode_in_orphan: fs/ocfs2/namei.c:2503:23: warning: variable di set but not used [-Wunused-but-set-variable] Link: http://lkml.kernel.org/r/1566522588-63786-2-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: zhengbin Signed-off-by: Joseph Qi Reported-by: Hulk Robot Reviewed-by: Joseph Qi Reviewed-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 6f8e1c4fdb9c..8ea51cf27b97 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2486,7 +2486,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, struct inode *inode = NULL; struct inode *orphan_dir = NULL; struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); - struct ocfs2_dinode *di = NULL; handle_t *handle = NULL; char orphan_name[OCFS2_ORPHAN_NAMELEN + 1]; struct buffer_head *parent_di_bh = NULL; @@ -2552,7 +2551,6 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, goto leave; } - di = (struct ocfs2_dinode *)new_di_bh->b_data; status = ocfs2_orphan_add(osb, handle, inode, new_di_bh, orphan_name, &orphan_insert, orphan_dir, false); if (status < 0) { -- cgit v1.2.3 From 236dcc2ae4945e00201dbab9b8f76905849b7515 Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 23 Sep 2019 15:33:27 -0700 Subject: fs/ocfs2/file.c: remove set but not used variables Fixes gcc '-Wunused-but-set-variable' warning: fs/ocfs2/file.c: In function ocfs2_prepare_inode_for_write: fs/ocfs2/file.c:2143:9: warning: variable end set but not used [-Wunused-but-set-variable] Link: http://lkml.kernel.org/r/1566522588-63786-3-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: zhengbin Signed-off-by: Joseph Qi Reported-by: Hulk Robot Reviewed-by: Joseph Qi Reviewed-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index efe9988a5be4..2e982db3e1ae 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2130,7 +2130,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file, struct dentry *dentry = file->f_path.dentry; struct inode *inode = d_inode(dentry); struct buffer_head *di_bh = NULL; - loff_t end; /* * We start with a read level meta lock and only jump to an ex @@ -2194,8 +2193,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file, } } - end = pos + count; - ret = ocfs2_check_range_for_refcount(inode, pos, count); if (ret == 1) { ocfs2_inode_unlock(inode, meta_level); -- cgit v1.2.3 From 77461ba1d17673da47eeb5cccb928e10cf97a01e Mon Sep 17 00:00:00 2001 From: zhengbin Date: Mon, 23 Sep 2019 15:33:30 -0700 Subject: fs/ocfs2/dir.c: remove set but not used variables Fixes gcc '-Wunused-but-set-variable' warning: fs/ocfs2/dir.c: In function ocfs2_dx_dir_transfer_leaf: fs/ocfs2/dir.c:3653:42: warning: variable new_list set but not used [-Wunused-but-set-variable] Link: http://lkml.kernel.org/r/1566522588-63786-4-git-send-email-joseph.qi@linux.alibaba.com Signed-off-by: zhengbin Signed-off-by: Joseph Qi Reported-by: Hulk Robot Reviewed-by: Joseph Qi Reviewed-by: Changwei Ge Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dir.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 784426dee56c..bdef72c0f099 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3636,7 +3636,7 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash, int i, j, num_used; u32 major_hash; struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf; - struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list; + struct ocfs2_dx_entry_list *orig_list, *tmp_list; struct ocfs2_dx_entry *dx_entry; tmp_list = &tmp_dx_leaf->dl_list; @@ -3645,7 +3645,6 @@ static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash, orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data; orig_list = &orig_dx_leaf->dl_list; new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data; - new_list = &new_dx_leaf->dl_list; num_used = le16_to_cpu(orig_list->de_num_used); -- cgit v1.2.3 From a89bd89fae638965ca5a79a3467d79f926260882 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 23 Sep 2019 15:33:34 -0700 Subject: ocfs2: delete unnecessary checks before brelse() brelse() tests whether its argument is NULL and then returns immediately. Thus the tests around the shown calls are not needed. This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/55cde320-394b-f985-56ce-1a2abea782aa@web.de Signed-off-by: Markus Elfring Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Cc: Gang He Cc: Jun Piao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlmglue.c | 7 ++----- fs/ocfs2/extent_map.c | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ad594fef2ab0..6e774c5ea13b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2508,9 +2508,7 @@ bail: ocfs2_inode_unlock(inode, ex); } - if (local_bh) - brelse(local_bh); - + brelse(local_bh); return status; } @@ -2593,8 +2591,7 @@ int ocfs2_inode_lock_atime(struct inode *inode, *level = 1; if (ocfs2_should_update_atime(inode, vfsmnt)) ocfs2_update_inode_atime(inode, bh); - if (bh) - brelse(bh); + brelse(bh); } else *level = 0; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index e66a249fe07c..e3e2d1b2af51 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -590,8 +590,7 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, *extent_flags = rec->e_flags; } out: - if (eb_bh) - brelse(eb_bh); + brelse(eb_bh); return ret; } -- cgit v1.2.3 From 0a3775e4f883912944481cf2ef36eb6383a9cc74 Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Mon, 23 Sep 2019 15:33:37 -0700 Subject: ocfs2: wait for recovering done after direct unlock request There is a scenario causing ocfs2 umount hang when multiple hosts are rebooting at the same time. NODE1 NODE2 NODE3 send unlock requset to NODE2 dies become recovery master recover NODE2 find NODE2 dead mark resource RECOVERING directly remove lock from grant list calculate usage but RECOVERING marked **miss the window of purging clear RECOVERING To reproduce this issue, crash a host and then umount ocfs2 from another node. To solve this, just let unlock progress wait for recovery done. Link: http://lkml.kernel.org/r/1550124866-20367-1-git-send-email-gechangwei@live.cn Signed-off-by: Changwei Ge Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmunlock.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index e78657742bd8..3883633e82eb 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -90,7 +90,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, enum dlm_status status; int actions = 0; int in_use; - u8 owner; + u8 owner; + int recovery_wait = 0; mlog(0, "master_node = %d, valblk = %d\n", master_node, flags & LKM_VALBLK); @@ -193,9 +194,12 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, } if (flags & LKM_CANCEL) lock->cancel_pending = 0; - else - lock->unlock_pending = 0; - + else { + if (!lock->unlock_pending) + recovery_wait = 1; + else + lock->unlock_pending = 0; + } } /* get an extra ref on lock. if we are just switching @@ -229,6 +233,17 @@ leave: spin_unlock(&res->spinlock); wake_up(&res->wq); + if (recovery_wait) { + spin_lock(&res->spinlock); + /* Unlock request will directly succeed after owner dies, + * and the lock is already removed from grant list. We have to + * wait for RECOVERING done or we miss the chance to purge it + * since the removement is much faster than RECOVERING proc. + */ + __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_RECOVERING); + spin_unlock(&res->spinlock); + } + /* let the caller's final dlm_lock_put handle the actual kfree */ if (actions & DLM_UNLOCK_FREE_LOCK) { /* this should always be coupled with list removal */ -- cgit v1.2.3 From d7283b39dbf3de4fe54870f07d7975effa3188da Mon Sep 17 00:00:00 2001 From: Changwei Ge Date: Mon, 23 Sep 2019 15:33:40 -0700 Subject: ocfs2: checkpoint appending truncate log transaction before flushing Appending truncate log(TA) and and flushing truncate log(TF) are two separated transactions. They can be both committed but not checkpointed. If crash occurs then, both transaction will be replayed with several already released to global bitmap clusters. Then truncate log will be replayed resulting in cluster double free. To reproduce this issue, just crash the host while punching hole to files. Signed-off-by: Changwei Ge Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f5d2bd15e0ca..f9baefc76cf9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5993,6 +5993,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) struct buffer_head *data_alloc_bh = NULL; struct ocfs2_dinode *di; struct ocfs2_truncate_log *tl; + struct ocfs2_journal *journal = osb->journal; BUG_ON(inode_trylock(tl_inode)); @@ -6013,6 +6014,20 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) goto out; } + /* Appending truncate log(TA) and and flushing truncate log(TF) are + * two separated transactions. They can be both committed but not + * checkpointed. If crash occurs then, both two transaction will be + * replayed with several already released to global bitmap clusters. + * Then truncate log will be replayed resulting in cluster double free. + */ + jbd2_journal_lock_updates(journal->j_journal); + status = jbd2_journal_flush(journal->j_journal); + jbd2_journal_unlock_updates(journal->j_journal); + if (status < 0) { + mlog_errno(status); + goto out; + } + data_alloc_inode = ocfs2_get_system_file_inode(osb, GLOBAL_BITMAP_SYSTEM_INODE, OCFS2_INVALID_SLOT); -- cgit v1.2.3 From 1c3ce5417b338ba3c697bac6485581ba117d8e52 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 23 Sep 2019 15:33:43 -0700 Subject: ocfs2: fix spelling mistake "ambigous" -> "ambiguous" There is a spelling mistake in a mlog_bug_on_msg message. Fix it. Link: http://lkml.kernel.org/r/831bdff4-064e-038b-f45d-c4d265cbff1e@linux.alibaba.com Signed-off-by: Colin Ian King Acked-by: Joseph Qi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7ad9d6590818..7c9dfd50c1c1 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -534,7 +534,7 @@ static int ocfs2_read_locked_inode(struct inode *inode, */ mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), - "Inode %llu: system file state is ambigous\n", + "Inode %llu: system file state is ambiguous\n", (unsigned long long)args->fi_blkno); if (S_ISCHR(le16_to_cpu(fe->i_mode)) || -- cgit v1.2.3 From 04f768a39d55967246c002aa66b407b3bfdd8269 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 23 Sep 2019 15:33:46 -0700 Subject: mm, slab: extend slab/shrink to shrink all memcg caches Currently, a value of '1" is written to /sys/kernel/slab//shrink file to shrink the slab by flushing out all the per-cpu slabs and free slabs in partial lists. This can be useful to squeeze out a bit more memory under extreme condition as well as making the active object counts in /proc/slabinfo more accurate. This usually applies only to the root caches, as the SLUB_MEMCG_SYSFS_ON option is usually not enabled and "slub_memcg_sysfs=1" not set. Even if memcg sysfs is turned on, it is too cumbersome and impractical to manage all those per-memcg sysfs files in a real production system. So there is no practical way to shrink memcg caches. Fix this by enabling a proper write to the shrink sysfs file of the root cache to scan all the available memcg cache