From 22cddde104d715600a4c218bf9224923208afe90 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 5 Nov 2012 11:07:23 -0800 Subject: ceph: Fix i_size update race ceph_aio_write() has an optimization that marks cap EPH_CAP_FILE_WR dirty before data is copied to page cache and inode size is updated. If ceph_check_caps() flushes the dirty cap before the inode size is updated, MDS can miss the new inode size. The fix is move ceph_{get,put}_cap_refs() into ceph_write_{begin,end}() and call __ceph_mark_dirty_caps() after inode size is updated. Signed-off-by: Yan, Zheng Signed-off-by: Sage Weil --- fs/ceph/addr.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) (limited to 'fs/ceph/addr.c') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 22b6e4583faa..21a07187df05 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1078,23 +1078,51 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file->f_dentry->d_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_file_info *fi = file->private_data; struct page *page; pgoff_t index = pos >> PAGE_CACHE_SHIFT; - int r; + int r, want, got = 0; + + if (fi->fmode & CEPH_FILE_MODE_LAZY) + want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO; + else + want = CEPH_CAP_FILE_BUFFER; + + dout("write_begin %p %llx.%llx %llu~%u getting caps. i_size %llu\n", + inode, ceph_vinop(inode), pos, len, inode->i_size); + r = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos+len); + if (r < 0) + return r; + dout("write_begin %p %llx.%llx %llu~%u got cap refs on %s\n", + inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); + if (!(got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO))) { + ceph_put_cap_refs(ci, got); + return -EAGAIN; + } do { /* get a page */ page = grab_cache_page_write_begin(mapping, index, 0); - if (!page) - return -ENOMEM; - *pagep = page; + if (!page) { + r = -ENOMEM; + break; + } dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); r = ceph_update_writeable_page(file, pos, len, page); + if (r) + page_cache_release(page); } while (r == -EAGAIN); + if (r) { + ceph_put_cap_refs(ci, got); + } else { + *pagep = page; + *(int *)fsdata = got; + } return r; } @@ -1108,10 +1136,12 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, struct page *page, void *fsdata) { struct inode *inode = file->f_dentry->d_inode; + struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_mds_client *mdsc = fsc->mdsc; unsigned from = pos & (PAGE_CACHE_SIZE - 1); int check_cap = 0; + int got = (unsigned long)fsdata; dout("write_end file %p inode %p page %p %d~%d (%d)\n", file, inode, page, (int)pos, (int)copied, (int)len); @@ -1134,6 +1164,19 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, up_read(&mdsc->snap_rwsem); page_cache_release(page); + if (copied > 0) { + int dirty; + spin_lock(&ci->i_ceph_lock); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + spin_unlock(&ci->i_ceph_lock); + if (dirty) + __mark_inode_dirty(inode, dirty); + } + + dout("write_end %p %llx.%llx %llu~%u dropping cap refs on %s\n", + inode, ceph_vinop(inode), pos, len, ceph_cap_string(got)); + ceph_put_cap_refs(ci, got); + if (check_cap) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); -- cgit v1.2.3 From 8884d53dd63b1d9315b343564fcbe1ede004a99e Mon Sep 17 00:00:00 2001 From: David Zafman Date: Mon, 3 Dec 2012 19:14:05 -0800 Subject: libceph: Unlock unprocessed pages in start_read() error path Function start_read() can get an error before processing all pages. It must not only release the remaining pages, but unlock them too. This fixes http://tracker.newdream.net/issues/3370 Signed-off-by: David Zafman Reviewed-by: Alex Elder --- fs/ceph/addr.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/ceph/addr.c') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 21a07187df05..8e8a818cba07 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -267,6 +267,14 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) kfree(req->r_pages); } +static void ceph_unlock_page_vector(struct page **pages, int num_pages) +{ + int i; + + for (i = 0; i < num_pages; i++) + unlock_page(pages[i]); +} + /* * start an async read(ahead) operation. return nr_pages we submitted * a read for on success, or negative error code. @@ -347,6 +355,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) return nr_pages; out_pages: + ceph_unlock_page_vector(pages, nr_pages); ceph_release_page_vector(pages, nr_pages); out: ceph_osdc_put_request(req); -- cgit v1.2.3