summaryrefslogtreecommitdiffstats
path: root/fs/ceph/metric.h
blob: 1d0959d669d7023a90d25b931332d48f12180688 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _FS_CEPH_MDS_METRIC_H
#define _FS_CEPH_MDS_METRIC_H

#include <linux/types.h>
#include <linux/percpu_counter.h>
#include <linux/ktime.h>

extern bool disable_send_metrics;

enum ceph_metric_type {
	CLIENT_METRIC_TYPE_CAP_INFO,
	CLIENT_METRIC_TYPE_READ_LATENCY,
	CLIENT_METRIC_TYPE_WRITE_LATENCY,
	CLIENT_METRIC_TYPE_METADATA_LATENCY,
	CLIENT_METRIC_TYPE_DENTRY_LEASE,

	CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
};

/*
 * This will always have the highest metric bit value
 * as the last element of the array.
 */
#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED {	\
	CLIENT_METRIC_TYPE_CAP_INFO,		\
	CLIENT_METRIC_TYPE_READ_LATENCY,	\
	CLIENT_METRIC_TYPE_WRITE_LATENCY,	\
	CLIENT_METRIC_TYPE_METADATA_LATENCY,	\
						\
	CLIENT_METRIC_TYPE_MAX,			\
}

/* metric caps header */
struct ceph_metric_cap {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(hit + mis + total) */
	__le64 hit;
	__le64 mis;
	__le64 total;
} __packed;

/* metric read latency header */
struct ceph_metric_read_latency {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(sec + nsec) */
	__le32 sec;
	__le32 nsec;
} __packed;

/* metric write latency header */
struct ceph_metric_write_latency {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(sec + nsec) */
	__le32 sec;
	__le32 nsec;
} __packed;

/* metric metadata latency header */
struct ceph_metric_metadata_latency {
	__le32 type;     /* ceph metric type */

	__u8  ver;
	__u8  compat;

	__le32 data_len; /* length of sizeof(sec + nsec) */
	__le32 sec;
	__le32 nsec;
} __packed;

struct ceph_metric_head {
	__le32 num;	/* the number of metrics that will be sent */
} __packed;

/* This is the global metrics */
struct ceph_client_metric {
	atomic64_t            total_dentries;
	struct percpu_counter d_lease_hit;
	struct percpu_counter d_lease_mis;

	atomic64_t            total_caps;
	struct percpu_counter i_caps_hit;
	struct percpu_counter i_caps_mis;

	spinlock_t read_latency_lock;
	u64 total_reads;
	ktime_t read_latency_sum;
	ktime_t read_latency_sq_sum;
	ktime_t read_latency_min;
	ktime_t read_latency_max;

	spinlock_t write_latency_lock;
	u64 total_writes;
	ktime_t write_latency_sum;
	ktime_t write_latency_sq_sum;
	ktime_t write_latency_min;
	ktime_t write_latency_max;

	spinlock_t metadata_latency_lock;
	u64 total_metadatas;
	ktime_t metadata_latency_sum;
	ktime_t metadata_latency_sq_sum;
	ktime_t metadata_latency_min;
	ktime_t metadata_latency_max;

	struct ceph_mds_session *session;
	struct delayed_work delayed_work;  /* delayed work */
};

static inline void metric_schedule_delayed(struct ceph_client_metric *m)
{
	if (disable_send_metrics)
		return;

	/* per second */
	schedule_delayed_work(&m->delayed_work, round_jiffies_relative(HZ));
}

extern int ceph_metric_init(struct ceph_client_metric *m);
extern void ceph_metric_destroy(struct ceph_client_metric *m);

static inline void ceph_update_cap_hit(struct ceph_client_metric *m)
{
	percpu_counter_inc(&m->i_caps_hit);
}

static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
{
	percpu_counter_inc(&m->i_caps_mis);
}

extern void ceph_update_read_latency(struct ceph_client_metric *m,
				     ktime_t r_start, ktime_t r_end,
				     int rc);
extern void ceph_update_write_latency(struct ceph_client_metric *m,
				      ktime_t r_start, ktime_t r_end,
				      int rc);
extern void ceph_update_metadata_latency(struct ceph_client_metric *m,
				         ktime_t r_start, ktime_t r_end,
					 int rc);
#endif /* _FS_CEPH_MDS_METRIC_H */
45 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001
/*
 * (C) 2001 Clemson University and The University of Chicago
 *
 * See COPYING in top-level directory.
 */

/*
 *  Linux VFS file operations.
 */

#include "protocol.h"
#include "pvfs2-kernel.h"
#include "pvfs2-bufmap.h"
#include <linux/fs.h>
#include <linux/pagemap.h>

#define wake_up_daemon_for_return(op)			\
do {							\
	spin_lock(&op->lock);                           \
	op->io_completed = 1;                           \
	spin_unlock(&op->lock);                         \
	wake_up_interruptible(&op->io_completion_waitq);\
} while (0)

/*
 * Copy to client-core's address space from the buffers specified
 * by the iovec upto total_size bytes.
 * NOTE: the iovector can either contain addresses which
 *       can futher be kernel-space or user-space addresses.
 *       or it can pointers to struct page's
 */
static int precopy_buffers(struct pvfs2_bufmap *bufmap,
			   int buffer_index,
			   const struct iovec *vec,
			   unsigned long nr_segs,
			   size_t total_size)
{
	int ret = 0;
	struct iov_iter iter;

	/*
	 * copy data from application/kernel by pulling it out
	 * of the iovec.
	 */


	if (total_size) {
		iov_iter_init(&iter, WRITE, vec, nr_segs, total_size);
		ret = pvfs_bufmap_copy_from_iovec(bufmap,
						&iter,
						buffer_index,
						total_size);
		if (ret < 0)
		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
			   __func__,
			   (long)ret);
		
	}

	if (ret < 0)
		gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
			__func__,
			(long)ret);
	return ret;
}

/*
 * Copy from client-core's address space to the buffers specified
 * by the iovec upto total_size bytes.
 * NOTE: the iovector can either contain addresses which
 *       can futher be kernel-space or user-space addresses.
 *       or it can pointers to struct page's
 */
static int postcopy_buffers(struct pvfs2_bufmap *bufmap,
			    int buffer_index,
			    const struct iovec *vec,
			    int nr_segs,
			    size_t total_size)
{
	int ret = 0;

	struct iov_iter iter;

	/*
	 * copy data to application/kernel by pushing it out to
	 * the iovec. NOTE; target buffers can be addresses or
	 * struct page pointers.
	 */
	if (total_size) {
		iov_iter_init(&iter, READ, vec, nr_segs, total_size);
		ret = pvfs_bufmap_copy_to_iovec(bufmap,
						&iter,
						buffer_index);
		if (ret < 0)
			gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
				__func__,
				(long)ret);
	}
	return ret;
}

/*
 * Post and wait for the I/O upcall to finish
 */
static ssize_t wait_for_direct_io(enum PVFS_io_type type, struct inode *inode,
		loff_t *offset, struct iovec *vec, unsigned long nr_segs,
		size_t total_size, loff_t readahead_size)
{
	struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
	struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle;
	struct pvfs2_bufmap *bufmap = NULL;
	struct pvfs2_kernel_op_s *new_op = NULL;
	int buffer_index = -1;
	ssize_t ret;

	new_op = op_alloc(PVFS2_VFS_OP_FILE_IO);
	if (!new_op) {
		ret = -ENOMEM;
		goto out;
	}
	/* synchronous I/O */
	new_op->upcall.req.io.async_vfs_io = PVFS_VFS_SYNC_IO;
	new_op->upcall.req.io.readahead_size = readahead_size;
	new_op->upcall.req.io.io_type = type;
	new_op->upcall.req.io.refn = pvfs2_inode->refn;

populate_shared_memory:
	/* get a shared buffer index */
	ret = pvfs_bufmap_get(&bufmap, &buffer_index);
	if (ret < 0) {
		gossip_debug(GOSSIP_FILE_DEBUG,
			     "%s: pvfs_bufmap_get failure (%ld)\n",
			     __func__, (long)ret);
		goto out;
	}
	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): GET op %p -> buffer_index %d\n",
		     __func__,
		     handle,
		     new_op,
		     buffer_index);

	new_op->uses_shared_memory = 1;
	new_op->upcall.req.io.buf_index = buffer_index;
	new_op->upcall.req.io.count = total_size;
	new_op->upcall.req.io.offset = *offset;

	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): nr_segs %lu, offset: %llu total_size: %zd\n",
		     __func__,
		     handle,
		     nr_segs,
		     llu(*offset),
		     total_size);
	/*
	 * Stage 1: copy the buffers into client-core's address space
	 * precopy_buffers only pertains to writes.
	 */
	if (type == PVFS_IO_WRITE) {
		ret = precopy_buffers(bufmap,
				      buffer_index,
				      vec,
				      nr_segs,
				      total_size);
		if (ret < 0)
			goto out;
	}

	gossip_debug(GOSSIP_FILE_DEBUG,
		     "%s(%pU): Calling post_io_request with tag (%llu)\n"</