summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
AgeCommit message (Expand)Author
2018-09-14blok, bfq: do not plug I/O if all queues are weight-raisedPaolo Valente
2018-09-14block, bfq: inject other-queue I/O into seeky idle queues on NCQ flashPaolo Valente
2018-08-16block, bfq: reduce write overchargePaolo Valente
2018-08-16block, bfq: readd missing reset of parent-entity servicePaolo Valente
2018-07-09block, bfq: give a better name to bfq_bfqq_may_idlePaolo Valente
2018-07-09block, bfq: fix service being wrongly set to zero in case of preemptionPaolo Valente
2018-07-09block, bfq: do not expire a queue that will deserve dispatch pluggingPaolo Valente
2018-07-09block, bfq: add/remove entity weights correctlyPaolo Valente
2018-05-31block, bfq: prevent soft_rt_next_start from being stuck at infinityDavide Sapienza
2018-05-31block, bfq: increase weight-raising duration for interactive appsDavide Sapienza
2018-05-31block, bfq: remove slow-system classPaolo Valente
2018-05-31block, bfq: add description of weight-raising heuristicsPaolo Valente
2018-05-31block, bfq: remove the removal of 'next' rq in bfq_requests_mergedFilippo Muzzini
2018-05-31block, bfq: remove wrong check in bfq_requests_mergedPaolo Valente
2018-05-31block, bfq: remove wrong lock in bfq_requests_mergedFilippo Muzzini
2018-05-10bfq-iosched: update shallow depth to smallest one usedJens Axboe
2018-05-10bfq-iosched: remove unused variableJens Axboe
2018-05-10bfq: calculate shallow depths at init timeJens Axboe
2018-05-10bfq-iosched: don't worry about reserved tags in limit_depthJens Axboe
2018-05-10block, bfq: postpone rq preparation to insert or mergePaolo Valente
2018-05-09block: consolidate struct request timestamp fieldsOmar Sandoval
2018-04-17bfq-iosched: ensure to clear bic/bfqq pointers when preparing requestJens Axboe
2018-03-26block, bfq: lower-bound the estimated peak rate to 1Paolo Valente
2018-02-07block, bfq: add requeue-request hookPaolo Valente
2018-01-18block, bfq: limit sectors served with interactive weight raisingPaolo Valente
2018-01-18block, bfq: limit tags for writes and async I/OPaolo Valente
2018-01-10block, bfq: fix occurrences of request finish method's old nameChiara Bruschi
2018-01-09bfq-iosched: don't call bfqg_and_blkg_put for !CONFIG_BFQ_GROUP_IOSCHEDJens Axboe
2018-01-09block, bfq: release oom-queue ref to root group on exitPaolo Valente
2018-01-05block, bfq: remove batches of confusing ifdefsPaolo Valente
2018-01-05block, bfq: consider also past I/O in soft real-time detectionPaolo Valente
2018-01-05block, bfq: remove superfluous check in queue-merging setupAngelo Ruocco
2018-01-05block, bfq: let a queue be merged only shortly after starting I/OPaolo Valente
2018-01-05block, bfq: check low_latency flag in bfq_bfqq_save_state()Angelo Ruocco
2018-01-05block, bfq: add missing rq_pos_tree update on rq removalPaolo Valente
2018-01-05block, bfq: increase threshold to deem I/O as randomPaolo Valente
2017-11-14block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUPLuca Miccio
2017-11-14block, bfq: update blkio stats outside the scheduler lockPaolo Valente
2017-11-14block, bfq: add missing invocations of bfqg_stats_update_io_add/removeLuca Miccio
2017-10-09block, bfq: fix unbalanced decrements of burst sizePaolo Valente
2017-10-09block,bfq: Disable writeback throttlingLuca Miccio
2017-10-03block, bfq: decrease burst size when queues in burst exitPaolo Valente
2017-10-03block, bfq: let early-merged queues be weight-raised on split tooPaolo Valente
2017-10-03block, bfq: check and switch back to interactive wr also on queue splitPaolo Valente
2017-10-03block, bfq: fix wrong init of saved start time for weight raisingPaolo Valente
2017-09-09Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-blockLinus Torvalds
2017-09-01bfq: Use icq_to_bic() consistentlyBart Van Assche
2017-09-01bfq: Suppress compiler warnings about comparisonsBart Van Assche
2017-09-01bfq: Check kstrtoul() return valueBart Van Assche
2017-09-01bfq: Annotate fall-through in a switch statementBart Van Assche

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * inet fragments management
 *
 * 		Authors:	Pavel Emelyanov <xemul@openvz.org>
 *				Started as consolidation of ipv4/ip_fragment.c,
 *				ipv6/reassembly. and ipv6 nf conntrack reassembly
 */

#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/rhashtable.h>

#include <net/sock.h>
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/ipv6.h>

/* Use skb->cb to track consecutive/adjacent fragments coming at
 * the end of the queue. Nodes in the rb-tree queue will
 * contain "runs" of one or more adjacent fragments.
 *
 * Invariants:
 * - next_frag is NULL at the tail of a "run";
 * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
 */
struct ipfrag_skb_cb {
	union {
		struct inet_skb_parm	h4;
		struct inet6_skb_parm	h6;
	};
	struct sk_buff		*next_frag;
	int			frag_run_len;
};

#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))

static void fragcb_clear(struct sk_buff *skb)
{
	RB_CLEAR_NODE(&skb->rbnode);
	FRAG_CB(skb)->next_frag = NULL;
	FRAG_CB(skb)->frag_run_len = skb->len;
}

/* Append skb to the last "run". */
static void fragrun_append_to_last(struct inet_frag_queue *q,
				   struct sk_buff *skb)
{
	fragcb_clear(skb);

	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
	FRAG_CB(q->fragments_tail)->next_frag = skb;
	q->fragments_tail = skb;
}

/* Create a new "run" with the skb. */
static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
{
	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
	fragcb_clear(skb);

	if (q->last_run_head)
		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
			     &q->last_run_head->rbnode.rb_right);
	else
		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
	rb_insert_color(&skb->rbnode, &q->rb_fragments);

	q->fragments_tail = skb;
	q->last_run_head = skb;
}

/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
 * Value : 0xff if frame should be dropped.
 *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
 */
const u8 ip_frag_ecn_table[16] = {
	/* at least one fragment had CE, and others ECT_0 or ECT_1 */
	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE,
	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE,
	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE,

	/* invalid combinations : drop frame */
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
};
EXPORT_SYMBOL(ip_frag_ecn_table);

int inet_frags_init(struct inet_frags *f)
{
	f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
					    NULL);
	if (!f->frags_cachep)
		return -ENOMEM;

	refcount_set(&f->refcnt, 1);
	init_completion(&f->completion);
	return 0;
}
EXPORT_SYMBOL(inet_frags_init);

void inet_frags_fini(struct inet_frags *f)
{
	if (refcount_dec_and_test(&f->refcnt))
		complete(&f->completion);

	wait_for_completion(&f->completion);

	kmem_cache_destroy(f->frags_cachep);
	f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);

/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
static void inet_frags_free_cb(void *ptr, void *arg)
{
	struct inet_frag_queue *fq = ptr;
	int count;

	count = del_timer_sync(&fq->timer) ? 1 : 0;

	spin_lock_bh(&fq->lock);
	if (!(fq->flags & INET_FRAG_COMPLETE)) {
		fq->flags |= INET_FRAG_COMPLETE;
		count++;
	} else if (fq->flags & INET_FRAG_HASH_DEAD) {
		count++;
	}
	spin_unlock_bh(&fq->lock);

	if (refcount_sub_and_test(count, &fq->refcnt))
		inet_frag_destroy(fq);
}

static void fqdir_work_fn(struct work_struct *work)
{
	struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work);
	struct inet_frags *f = fqdir->f;

	rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);

	/* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu)
	 * have completed, since they need to dereference fqdir.
	 * Would it not be nice to have kfree_rcu_barrier() ? :)
	 */
	rcu_barrier();

	if (refcount_dec_and_test(&f->refcnt))
		complete(&f->completion);

	kfree(fqdir);
}

int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
{
	struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
	int res;

	if (!fqdir)
		return -ENOMEM;
	fqdir->f = f;
	fqdir->net = net;
	res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
	if (res < 0) {
		kfree(fqdir);
		return res;
	}
	refcount_inc(&f->refcnt);
	*fqdirp = fqdir;
	return 0;
}
EXPORT_SYMBOL(fqdir_init);

void fqdir_exit(struct fqdir *fqdir)
{
	INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
	queue_work(system_wq, &fqdir->destroy_work);
}
EXPORT_SYMBOL(fqdir_exit);

void inet_frag_kill(struct inet_frag_queue *fq)
{
	if (del_timer(&fq->timer))
		refcount_dec(&fq->refcnt);

	if (!(fq->flags & INET_FRAG_COMPLETE)) {
		struct fqdir *fqdir = fq->fqdir;

		fq->flags |= INET_FRAG_COMPLETE;
		rcu_read_lock();
		/* The RCU read lock provides a memory barrier
		 * guaranteeing that if fqdir->dead is false then
		 * the hash table destruction will not start until
		 * after we unlock.  Paired with inet_frags_exit_net().
		 */
		if (!fqdir->dead) {
			rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
					       fqdir->f->rhash_params);
			refcount_dec(&fq->refcnt);
		} else {
			fq->flags |= INET_FRAG_HASH_DEAD;
		}
		rcu_read_unlock();
	}
}
EXPORT_SYMBOL(inet_frag_kill);

static void inet_frag_destroy_rcu(struct rcu_head *head)
{
	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
						 rcu);
	struct inet_frags *f = q->fqdir->f;

	if (f->destructor)
		f->destructor(q);
	kmem_cache_free(f->frags_cachep, q);
}

unsigned int inet_frag_rbtree_purge(struct rb_root *root)
{
	struct rb_node *p = rb_first(root);
	unsigned