summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
AgeCommit message (Expand)Author
2018-09-14blok, bfq: do not plug I/O if all queues are weight-raisedPaolo Valente
2018-09-14block, bfq: inject other-queue I/O into seeky idle queues on NCQ flashPaolo Valente
2018-08-16block, bfq: reduce write overchargePaolo Valente
2018-08-16block, bfq: readd missing reset of parent-entity servicePaolo Valente
2018-07-09block, bfq: give a better name to bfq_bfqq_may_idlePaolo Valente
2018-07-09block, bfq: fix service being wrongly set to zero in case of preemptionPaolo Valente
2018-07-09block, bfq: do not expire a queue that will deserve dispatch pluggingPaolo Valente
2018-07-09block, bfq: add/remove entity weights correctlyPaolo Valente
2018-05-31block, bfq: prevent soft_rt_next_start from being stuck at infinityDavide Sapienza
2018-05-31block, bfq: increase weight-raising duration for interactive appsDavide Sapienza
2018-05-31block, bfq: remove slow-system classPaolo Valente
2018-05-31block, bfq: add description of weight-raising heuristicsPaolo Valente
2018-05-31block, bfq: remove the removal of 'next' rq in bfq_requests_mergedFilippo Muzzini
2018-05-31block, bfq: remove wrong check in bfq_requests_mergedPaolo Valente
2018-05-31block, bfq: remove wrong lock in bfq_requests_mergedFilippo Muzzini
2018-05-10bfq-iosched: update shallow depth to smallest one usedJens Axboe
2018-05-10bfq-iosched: remove unused variableJens Axboe
2018-05-10bfq: calculate shallow depths at init timeJens Axboe
2018-05-10bfq-iosched: don't worry about reserved tags in limit_depthJens Axboe
2018-05-10block, bfq: postpone rq preparation to insert or mergePaolo Valente
2018-05-09block: consolidate struct request timestamp fieldsOmar Sandoval
2018-04-17bfq-iosched: ensure to clear bic/bfqq pointers when preparing requestJens Axboe
2018-03-26block, bfq: lower-bound the estimated peak rate to 1Paolo Valente
2018-02-07block, bfq: add requeue-request hookPaolo Valente
2018-01-18block, bfq: limit sectors served with interactive weight raisingPaolo Valente
2018-01-18block, bfq: limit tags for writes and async I/OPaolo Valente
2018-01-10block, bfq: fix occurrences of request finish method's old nameChiara Bruschi
2018-01-09bfq-iosched: don't call bfqg_and_blkg_put for !CONFIG_BFQ_GROUP_IOSCHEDJens Axboe
2018-01-09block, bfq: release oom-queue ref to root group on exitPaolo Valente
2018-01-05block, bfq: remove batches of confusing ifdefsPaolo Valente
2018-01-05block, bfq: consider also past I/O in soft real-time detectionPaolo Valente
2018-01-05block, bfq: remove superfluous check in queue-merging setupAngelo Ruocco
2018-01-05block, bfq: let a queue be merged only shortly after starting I/OPaolo Valente
2018-01-05block, bfq: check low_latency flag in bfq_bfqq_save_state()Angelo Ruocco
2018-01-05block, bfq: add missing rq_pos_tree update on rq removalPaolo Valente
2018-01-05block, bfq: increase threshold to deem I/O as randomPaolo Valente
2017-11-14block, bfq: move debug blkio stats behind CONFIG_DEBUG_BLK_CGROUPLuca Miccio
2017-11-14block, bfq: update blkio stats outside the scheduler lockPaolo Valente
2017-11-14block, bfq: add missing invocations of bfqg_stats_update_io_add/removeLuca Miccio
2017-10-09block, bfq: fix unbalanced decrements of burst sizePaolo Valente
2017-10-09block,bfq: Disable writeback throttlingLuca Miccio
2017-10-03block, bfq: decrease burst size when queues in burst exitPaolo Valente
2017-10-03block, bfq: let early-merged queues be weight-raised on split tooPaolo Valente
2017-10-03block, bfq: check and switch back to interactive wr also on queue splitPaolo Valente
2017-10-03block, bfq: fix wrong init of saved start time for weight raisingPaolo Valente
2017-09-09Merge branch 'for-4.14/block-postmerge' of git://git.kernel.dk/linux-blockLinus Torvalds
2017-09-01bfq: Use icq_to_bic() consistentlyBart Van Assche
2017-09-01bfq: Suppress compiler warnings about comparisonsBart Van Assche
2017-09-01bfq: Check kstrtoul() return valueBart Van Assche
2017-09-01bfq: Annotate fall-through in a switch statementBart Van Assche
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * inet fragments management
 *
 * 		Authors:	Pavel Emelyanov <xemul@openvz.org>
 *				Started as consolidation of ipv4/ip_fragment.c,
 *				ipv6/reassembly. and ipv6 nf conntrack reassembly
 */

#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <linux/rhashtable.h>

#include <net/sock.h>
#include <net/inet_frag.h>
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/ipv6.h>

/* Use skb->cb to track consecutive/adjacent fragments coming at
 * the end of the queue. Nodes in the rb-tree queue will
 * contain "runs" of one or more adjacent fragments.
 *
 * Invariants:
 * - next_frag is NULL at the tail of a "run";
 * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
 */
struct ipfrag_skb_cb {
	union {
		struct inet_skb_parm	h4;
		struct inet6_skb_parm	h6;
	};
	struct sk_buff		*next_frag;
	int			frag_run_len;
};

#define FRAG_CB(skb)		((struct ipfrag_skb_cb *)((skb)->cb))

static void fragcb_clear(struct sk_buff *skb)
{
	RB_CLEAR_NODE(&skb->rbnode);
	FRAG_CB(skb)->next_frag = NULL;
	FRAG_CB(skb)->frag_run_len = skb->len;
}

/* Append skb to the last "run". */
static void fragrun_append_to_last(struct inet_frag_queue *q,
				   struct sk_buff *skb)
{
	fragcb_clear(skb);

	FRAG_CB(q->last_run_head)->frag_run_len += skb->len;
	FRAG_CB(q->fragments_tail)->next_frag = skb;
	q->fragments_tail = skb;
}

/* Create a new "run" with the skb. */
static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb)
{
	BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb));
	fragcb_clear(skb);

	if (q->last_run_head)
		rb_link_node(&skb->rbnode, &q->last_run_head->rbnode,
			     &q->last_run_head->rbnode.rb_right);
	else
		rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node);
	rb_insert_color(&skb->rbnode, &q->rb_fragments);

	q->fragments_tail = skb;
	q->last_run_head = skb;
}

/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
 * Value : 0xff if frame should be dropped.
 *         0 or INET_ECN_CE value, to be ORed in to final iph->tos field
 */
const u8 ip_frag_ecn_table[16] = {
	/* at least one fragment had CE, and others ECT_0 or ECT_1 */
	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0]			= INET_ECN_CE,
	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1]			= INET_ECN_CE,
	[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1]	= INET_ECN_CE,

	/* invalid combinations : drop frame */
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff,
	[IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff,
};
EXPORT_SYMBOL(ip_frag_ecn_table);

int inet_frags_init(struct inet_frags *f)
{
	f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
					    NULL);
	if (!f->frags_cachep)
		return -ENOMEM;

	refcount_set(&f->refcnt, 1);
	init_completion(&f->completion);
	return 0;
}
EXPORT_SYMBOL(inet_frags_init);

void inet_frags_fini(struct inet_frags *f)
{
	if (refcount_dec_and_test(&f->refcnt))
		complete(&f->completion);

	wait_for_completion(&f->completion);

	kmem_cache_destroy(f->frags_cachep);
	f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);

/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
static void inet_frags_free_cb(void *ptr, void *arg)
{
	struct inet_frag_queue *fq = ptr;
	int count;

	count = del_timer_sync(&fq->timer) ? 1 : 0;

	spin_lock_bh(&fq->lock);
	if (!(fq->flags & INET_FRAG_COMPLETE)) {
		fq->flags |= INET_FRAG_COMPLETE;
		count++;
	} else if (fq->flags & INET_FRAG_HASH_DEAD) {
		count++;
	}
	spin_unlock_bh(&fq->lock);

	if (refcount_sub_and_test(count, &fq->refcnt))
		inet_frag_destroy(fq);
}

static void fqdir_work_fn(struct work_struct *work)
{
	struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work);
	struct inet_frags *f = fqdir->f;

	rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);

	/* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu)
	 * have completed, since they need to dereference fqdir.
	 * Would it not be nice to have kfree_rcu_barrier() ? :)
	 */
	rcu_barrier();

	if (refcount_dec_and_test(&f->refcnt))
		complete(&f->completion);

	kfree(fqdir);
}

int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
{
	struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
	int res;

	if (!fqdir)
		return -ENOMEM;
	fqdir->f = f;
	fqdir->net = net;
	res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
	if (res < 0) {
		kfree(fqdir);
		return res;
	}
	refcount_inc(&f->refcnt);
	*fqdirp = fqdir;
	return 0;
}
EXPORT_SYMBOL(fqdir_init);

void fqdir_exit(struct fqdir *fqdir)
{
	INIT_WORK(&fqdir->destroy_work, fqdir_work_fn);
	queue_work(system_wq, &fqdir->destroy_work);
}
EXPORT_SYMBOL(fqdir_exit);

void inet_frag_kill(struct inet_frag_queue *fq)
{
	if (del_timer(&fq->timer))
		refcount_dec(&fq->refcnt);

	if (!(fq->flags & INET_FRAG_COMPLETE)) {
		struct fqdir *fqdir = fq->fqdir;

		fq->flags |= INET_FRAG_COMPLETE;
		rcu_read_lock();
		/* The RCU read lock provides a memory barrier
		 * guaranteeing that if fqdir->dead is false then
		 * the hash table destruction will not start until
		 * after we unlock.  Paired with inet_frags_exit_net().
		 */
		if (!fqdir->dead) {
			rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
					       fqdir->f->rhash_params);
			refcount_dec(&fq->refcnt);
		} else {
			fq->flags |= INET_FRAG_HASH_DEAD;
		}
		rcu_read_unlock();
	}
}
EXPORT_SYMBOL(inet_frag_kill);

static void inet_frag_destroy_rcu(struct rcu_head *head)
{
	struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
						 rcu);
	struct inet_frags *f = q->fqdir->f;

	if (f->destructor)
		f->destructor(q);
	kmem_cache_free(f->frags_cachep, q);
}

unsigned int inet_frag_rbtree_purge(struct rb_root *root)
{
	struct rb_node *p = rb_first(root);
	unsigned