/*
* z3fold.c
*
* Author: Vitaly Wool <vitaly.wool@konsulko.com>
* Copyright (C) 2016, Sony Mobile Communications Inc.
*
* This implementation is based on zbud written by Seth Jennings.
*
* z3fold is an special purpose allocator for storing compressed pages. It
* can store up to three compressed pages per page which improves the
* compression ratio of zbud while retaining its main concepts (e. g. always
* storing an integral number of objects per page) and simplicity.
* It still has simple and deterministic reclaim properties that make it
* preferable to a higher density approach (with no requirement on integral
* number of object per page) when reclaim is used.
*
* As in zbud, pages are divided into "chunks". The size of the chunks is
* fixed at compile time and is determined by NCHUNKS_ORDER below.
*
* z3fold doesn't export any API and is meant to be used via zpool API.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/atomic.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/zpool.h>
/*****************
* Structures
*****************/
struct z3fold_pool;
struct z3fold_ops {
int (*evict)(struct z3fold_pool *pool, unsigned long handle);
};
enum buddy {
HEADLESS = 0,
FIRST,
MIDDLE,
LAST,
BUDDIES_MAX
};
/*
* struct z3fold_header - z3fold page metadata occupying first chunks of each
* z3fold page, except for HEADLESS pages
* @buddy: links the z3fold page into the relevant list in the
* pool
* @page_lock: per-page lock
* @refcount: reference count for the z3fold page
* @work: work_struct for page layout optimization
* @pool: pointer to the pool which this page belongs to
* @cpu: CPU which this page "belongs" to
* @first_chunks: the size of the first buddy in chunks, 0 if free
* @middle_chunks: the size of the middle buddy in chunks, 0 if free
* @last_chunks: the size of the last buddy in chunks, 0 if free
* @first_num: the starting number (for the first handle)
*/
struct z3fold_header {
struct list_head buddy;
spinlock_t page_lock;
struct kref refcount;
struct work_struct work;
struct z3fold_pool *pool;
short cpu;
unsigned short first_chunks;
unsigned short middle_chunks;
unsigned short last_chunks;
unsigned short start_middle;
unsigned short first_num:2;
};
/*
* NCHUNKS_ORDER determines the internal allocation granularity, effectively
* adjusting internal fragmentation. It also determines the number of
* freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
* allocation granularity will be in chunks of size PAGE_SIZE/64. Some chunks
* in the beginning of an allocated page are occupied by z3fold header, so
* NCHUNKS will be calculated to 63 (or 62 in case CONFIG_DEBUG_SPINLOCK=y),
* which shows the max number of free chunks in z3fold page, also there will
* be 63, or 62, respectively, freelists per pool.
*/
#define NCHUNKS_ORDER 6
#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
#define CHUNK_SIZE (1 << CHUNK_SHIFT)
#define ZHDR_SIZE_ALIGNED round_up(sizeof(struct z3fold_header), CHUNK_SIZE)
#define ZHDR_CHUNKS (ZHDR_SIZE_ALIGNED >> CHUNK_SHIFT)
#define TOTAL_CHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
#define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
#define BUDDY_MASK (0x3)
#define BUDDY_SHIFT 2
/**
* struct z3fold_pool - stores metadata for each z3fold pool
* @name: pool name
* @lock: protects pool unbuddied/lru lists
* @stale_lock: protects pool stale page list
* @unbuddied: per-cpu array of lists tracking z3fold pages that contain 2-
* buddies; the list each z3fold page is added to depends on
* the size of its free region.
* @lru: list tracking the z3fold pages in LRU order by most recently
* added buddy.
* @stale: list of pages marked for freeing
* @pages_nr: number of z3fold pages in the pool.
* @ops: pointer to a structure of user defined operations specified at
* pool creation time.
* @compact_wq: workqueue for page layout background optimization
* @release_wq: workqueue for safe page release
* @work: work_struct for safe page release
*
* This structure is allocated at pool creation time and maintains metadata
* pertaining to a particular z3fold pool.
*/
struct z3fold_pool {
const char *name;
spinlock_t lock;
spinlock_t stale_lock;
struct list_head *unbuddied;
struct list_head lru;
struct list_head stale;
atomic64_t pages_nr;
const struct z3fold_ops *ops;
struct zpool *zpool;
const struct zpool_ops *zpool_ops;
struct workqueue_struct *compact_wq;
struct workqueue_struct *release_wq;
struct work_struct work;
};
/*
* Internal z3fold page flags
*/
enum z3fold_page_flags {
PAGE_HEADLESS = 0,
MIDDLE_CHUNK_MAPPED,
NEEDS_COMPACTING,
PAGE_STALE,
PAGE_CLAIMED, /* by either reclaim or free */
};
/*****************
* Helpers
*****************/
/* Converts an allocation size in bytes to size in z3fold chunks */
static int size_to_chunks(size_t size)
{
return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
}
#define for_each_unbuddied_list(_iter, _begin) \
for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
static void compact_page_work(struct work_struct *w);
/* Initializes the z3fold header of a newly allocated z3fold page */
static struct z3fold_header *init_z3fold_page(struct page *page,
struct z3fold_pool *pool)
{
struct z3fold_header *zhdr = page_address(page);
INIT_LIST_HEAD(&page->lru);
clear_bit(PAGE_HEADLESS, &page->private);
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
clear_bit(NEEDS_COMPACTING