fs/ceph/metric.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_MDS_METRIC_H
#define _FS_CEPH_MDS_METRIC_H

#include <linux/types.h>
#include <linux/percpu_counter.h>
#include <linux/ktime.h>

extern bool disable_send_metrics;

enum ceph_metric_type {
	CLIENT_METRIC_TYPE_CAP_INFO,
	CLIENT_METRIC_TYPE_READ_LATENCY,
	CLIENT_METRIC_TYPE_WRITE_LATENCY,
	CLIENT_METRIC_TYPE_METADATA_LATENCY,
	CLIENT_METRIC_TYPE_DENTRY_LEASE,

	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
};

/*
 * This will always have the highest metric bit value
 * as the last element of the array.
 */
#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED {	\
	CLIENT_METRIC_TYPE_CAP_INFO,		\
	CLIENT_METRIC_TYPE_READ_LATENCY,	\
	CLIENT_METRIC_TYPE_WRITE_LATENCY,	\
	CLIENT_METRIC_TYPE_METADATA_LATENCY,	\
						\
	CLIENT_METRIC_TYPE_MAX,			\
}

/* metric caps header */
struct ceph_metric_cap {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(hit + mis + total) */
	__le64 hit;
	__le64 mis;
	__le64 total;
} __packed;

/* metric read latency header */
struct ceph_metric_read_latency {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(sec + nsec) */
	__le32 sec;
	__le32 nsec;
} __packed;

/* metric write latency header */
struct ceph_metric_write_latency {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(sec + nsec) */
	__le32 sec;
	__le32 nsec;
} __packed;

/* metric metadata latency header */
struct ceph_metric_metadata_latency {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(sec + nsec) */
	__le32 sec;
	__le32 nsec;
} __packed;

struct ceph_metric_head {
	__le32 num;	/* the number of metrics that will be sent */
} __packed;

/* This is the global metrics */
struct ceph_client_metric {
	atomic64_t            total_dentries;
	struct percpu_counter d_lease_hit;
	struct percpu_counter d_lease_mis;

	atomic64_t            total_caps;
	struct percpu_counter i_caps_hit;
	struct percpu_counter i_caps_mis;

	spinlock_t read_latency_lock;
	u64 total_reads;
	ktime_t read_latency_sum;
	ktime_t read_latency_sq_sum;
	ktime_t read_latency_min;
	ktime_t read_latency_max;

	spinlock_t write_latency_lock;
	u64 total_writes;
	ktime_t write_latency_sum;
	ktime_t write_latency_sq_sum;
	ktime_t write_latency_min;
	ktime_t write_latency_max;

	spinlock_t metadata_latency_lock;
	u64 total_metadatas;
	ktime_t metadata_latency_sum;
	ktime_t metadata_latency_sq_sum;
	ktime_t metadata_latency_min;
	ktime_t metadata_latency_max;

	struct ceph_mds_session *session;
	struct delayed_work delayed_work;  /* delayed work */
};

static inline void metric_schedule_delayed(struct ceph_client_metric *m)
{
	if (disable_send_metrics)
		return;

	/* per second */
	schedule_delayed_work(&m->delayed_work, round_jiffies_relative(HZ));
}

extern int ceph_metric_init(struct ceph_client_metric *m);
extern void ceph_metric_destroy(struct ceph_client_metric *m);

static inline void ceph_update_cap_hit(struct ceph_client_metric *m)
{
	percpu_counter_inc(&m->i_caps_hit);
}

static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
{
	percpu_counter_inc(&m->i_caps_mis);
}

extern void ceph_update_read_latency(struct ceph_client_metric *m,
				     ktime_t r_start, ktime_t r_end,
				     int rc);
extern void ceph_update_write_latency(struct ceph_client_metric *m,
				      ktime_t r_start, ktime_t r_end,
				      int rc);
extern void ceph_update_metadata_latency(struct ceph_client_metric *m,
				         ktime_t r_start, ktime_t r_end,
					 int rc);
#endif /* _FS_CEPH_MDS_METRIC_H */
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

/*
 *  Linux VFS file operations.
 */

#include "protocol.h"
#include "pvfs2-kernel.h"
#include "pvfs2-bufmap.h"
#include <linux/fs.h>
#include <linux/pagemap.h>

#define wake_up_daemon_for_return(op)			\
do {							\
	spin_lock(&op->lock);                           \
	op->io_completed = 1;                           \
	spin_unlock(&op->lock);                         \
	wake_up_interruptible(&op->io_completion_waitq);\
} while (0)

/*
 * Copy to client-core's address space from the buffers specified
 * by the iovec upto total_size bytes.
 * NOTE: the iovector can either contain addresses which
 *       can futher be kernel-space or user-space addresses.
 *       or it can pointers to struct page's
 */
static int precopy_buffers(struct pvfs2_bufmap *bufmap,
			   int buffer_index,
			   const struct iovec *vec,
			   unsigned long nr_segs,
			   size_t total_size)
{
	int ret = 0;
	struct iov_iter iter;

	/*
	 * copy data from application/kernel by pulling it out
	 * of the iovec.
	 */


	if (total_size) {
		iov_iter_init(&iter, WRITE, vec, nr_segs, total_size);
		ret = pvfs_bufmap_copy_from_iovec(bufmap,
						&iter,
						buffer_index,
						total_size);
		if (ret < 0)
		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
			   __func__,
			   (long)ret);
		
	}

	if (ret < 0)
		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
			__func__,
			(long)ret);
	return ret;
}

/*
 * Copy from client-core's address space to the buffers specified
 * by the iovec upto total_size bytes.
 * NOTE: the iovector can either contain addresses which
 *       can futher be kernel-space or user-space addresses.
 *       or it can pointers to struct page's
 */
static int postcopy_buffers(struct pvfs2_bufmap *bufmap,
			    int buffer_index,
			    const struct iovec *vec,
			    int nr_segs,
			    size_t total_size)
{
	int ret = 0;

	struct iov_iter iter;

	/*
	 * copy data to application/kernel by pushing it out to
	 * the iovec. NOTE; target buffers can be addresses or
	 * struct page pointers.
	 */
	if (total_size) {
		iov_iter_init(&iter, READ, vec, nr_segs, total_size);
		ret = pvfs_bufmap_copy_to_iovec(bufmap,
						&iter,
						buffer_index);
		if (ret < 0)
			gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
				__func__,
				(long)ret);
	}
	return ret;
}

/*
 * Post and wait for the I/O upcall to finish
 */
static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode,
		loff_t *offset, struct iovec *vec, unsigned long nr_segs,
		size_t total_size, loff_t readahead_size)
{
	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
	struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle;
	struct pvfs2_bufmap *bufmap = NULL;
	struct pvfs2_kernel_op_s *new_op = NULL;
	int buffer_index = -1;
	ssize_t ret;

	new_op = op_alloc(PVFS2_VFS_OP_FILE_IO);
	if (!new_op) {
		ret = -ENOMEM;
		goto out;
	}
	/* synchronous I/O */
	new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO;
	new_op->upcall.req.io.readahead_size = readahead_size;
	new_op->upcall.req.io.io_type = type;
	new_op->upcall.req.io.refn = pvfs2_inode->refn;

populate_shared_memory:
	/* get a shared buffer index */
	ret = pvfs_bufmap_get(&bufmap, &buffer_index);
	if (ret < 0) {
		gossip_debug(GOSSIP_FILE_DEBUG,
			     "%s: pvfs_bufmap_get failure (%ld)\n",
			     __func__, (long)ret);
		goto out;
	}
	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): GET op %p -> buffer_index %d\n",
		     __func__,
		     handle,
		     new_op,
		     buffer_index);

	new_op->uses_shared_memory = 1;
	new_op->upcall.req.io.buf_index = buffer_index;
	new_op->upcall.req.io.count = total_size;
	new_op->upcall.req.io.offset = *offset;

	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): nr_segs %lu, offset: %llu total_size: %zd\n",
		     __func__,
		     handle,
		     nr_segs,
		     llu(*offset),
		     total_size);
	/*
	 * Stage 1: copy the buffers into client-core's address space
	 * precopy_buffers only pertains to writes.
	 */
	if (type == PVFS_IO_WRITE) {
		ret = precopy_buffers(bufmap,
				      buffer_index,
				      vec,
				      nr_segs,
				      total_size);
		if (ret < 0)
			goto out;
	}

	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): Calling post_io_request with tag (%llu)\n"</