summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/filemap.c132
1 files changed, 85 insertions, 47 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 385759c4ce4b..8c3d3e233d37 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1002,6 +1002,7 @@ struct wait_page_queue {
static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
{
+ int ret;
struct wait_page_key *key = arg;
struct wait_page_queue *wait_page
= container_of(wait, struct wait_page_queue, wait);
@@ -1014,17 +1015,40 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
return 0;
/*
- * Stop walking if it's locked.
- * Is this safe if put_and_wait_on_page_locked() is in use?
- * Yes: the waker must hold a reference to this page, and if PG_locked
- * has now already been set by another task, that task must also hold
- * a reference to the *same usage* of this page; so there is no need
- * to walk on to wake even the put_and_wait_on_page_locked() callers.
+ * If it's an exclusive wait, we get the bit for it, and
+ * stop walking if we can't.
+ *
+ * If it's a non-exclusive wait, then the fact that this
+ * wake function was called means that the bit already
+ * was cleared, and we don't care if somebody then
+ * re-took it.
*/
- if (test_bit(key->bit_nr, &key->page->flags))
- return -1;
+ ret = 0;
+ if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+ if (test_and_set_bit(key->bit_nr, &key->page->flags))
+ return -1;
+ ret = 1;
+ }
+ wait->flags |= WQ_FLAG_WOKEN;
- return autoremove_wake_function(wait, mode, sync, key);
+ wake_up_state(wait->private, mode);
+
+ /*
+ * Ok, we have successfully done what we're waiting for,
+ * and we can unconditionally remove the wait entry.
+ *
+ * Note that this has to be the absolute last thing we do,
+ * since after list_del_init(&wait->entry) the wait entry
+ * might be de-allocated and the process might even have
+ * exited.
+ *
+ * We _really_ should have a "list_del_init_careful()" to
+ * properly pair with the unlocked "list_empty_careful()"
+ * in finish_wait().
+ */
+ smp_mb();
+ list_del_init(&wait->entry);
+ return ret;
}
static void wake_up_page_bit(struct page *page, int bit_nr)
@@ -1103,16 +1127,31 @@ enum behavior {
*/
};
+/*
+ * Attempt to check (or get) the page bit, and mark the
+ * waiter woken if successful.
+ */
+static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+ struct wait_queue_entry *wait)
+{
+ if (wait->flags & WQ_FLAG_EXCLUSIVE) {
+ if (test_and_set_bit(bit_nr, &page->flags))
+ return false;
+ } else if (test_bit(bit_nr, &page->flags))
+ return false;
+
+ wait->flags |= WQ_FLAG_WOKEN;
+ return true;
+}
+
static inline int wait_on_page_bit_common(wait_queue_head_t *q,
struct page *page, int bit_nr, int state, enum behavior behavior)
{
struct wait_page_queue wait_page;
wait_queue_entry_t *wait = &wait_page.wait;
- bool bit_is_set;
bool thrashing = false;
bool delayacct = false;
unsigned long pflags;
- int ret = 0;
if (bit_nr == PG_locked &&
!PageUptodate(page) && PageWorkingset(page)) {
@@ -1130,48 +1169,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
wait_page.page = page;
wait_page.bit_nr = bit_nr;
- for (;;) {
- spin_lock_irq(&q->lock);
+ /*
+ * Do one last check whether we can get the
+ * page bit synchronously.
+ *
+ * Do the SetPageWaiters() marking before that
+ * to let any waker we _just_ missed know they
+ * need to wake us up (otherwise they'll never
+ * even go to the slow case that looks at the
+ * page queue), and add ourselves to the wait
+ * queue if we need to sleep.
+ *
+ * This part needs to be done under the queue
+ * lock to avoid races.
+ */
+ spin_lock_irq(&q->lock);
+ SetPageWaiters(page);
+ if (!trylock_page_bit_common(page, bit_nr, wait))
+ __add_wait_queue_entry_tail(q, wait);
+ spin_unlock_irq(&q->lock);
- if (likely(list_empty(&wait->entry))) {
- __add_wait_queue_entry_tail(q, wait);
- SetPageWaiters(page);
- }
+ /*
+ * From now on, all the logic will be based on
+ * the WQ_FLAG_WOKEN flag, and the and the page
+ * bit testing (and setting) will be - or has
+ * already been - done by the wake function.
+ *
+ * We can drop our reference to the page.
+ */
+ if (behavior == DROP)
+ put_page(page);
+ for (;;) {
set_current_state(state);
- spin_unlock_irq(&q->lock);
-
- bit_is_set = test_bit(bit_nr, &page->flags);
- if (behavior == DROP)
- put_page(page);
-
- if (likely(bit_is_set))
- io_schedule();
-
- if (behavior == EXCLUSIVE) {
- if (!test_and_set_bit_lock(bit_nr, &page->flags))
- break;
- } else if (behavior == SHARED) {
- if (!test_bit(bit_nr, &page->flags))
- break;
- }
-
- if (signal_pending_state(state, current)) {
- ret = -EINTR;
+ if (signal_pending_state(state, current))
break;
- }
- if (behavior == DROP) {
- /*
- * We can no longer safely access page->flags:
- * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
- * there is a risk of waiting forever on a page reused
- * for something that keeps it locked indefinitely.
- * But best check for -EINTR above before breaking.
- */
+ if (wait->flags & WQ_FLAG_WOKEN)
break;
- }
+
+ io_schedule();
}
finish_wait(q, wait);
@@ -1190,7 +1228,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
* bother with signals either.
*/
- return ret;
+ return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
}
void wait_on_page_bit(struct page *page, int bit_nr)