// SPDX-License-Identifier: GPL-2.0-only
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* stack_user.c
*
* Code which interfaces ocfs2 with fs/dlm and a userspace stack.
*
* Copyright (C) 2007 Oracle. All rights reserved.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/reboot.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#include "stackglue.h"
#include <linux/dlm_plock.h>
/*
* The control protocol starts with a handshake. Until the handshake
* is complete, the control device will fail all write(2)s.
*
* The handshake is simple. First, the client reads until EOF. Each line
* of output is a supported protocol tag. All protocol tags are a single
* character followed by a two hex digit version number. Currently the
* only things supported is T01, for "Text-base version 0x01". Next, the
* client writes the version they would like to use, including the newline.
* Thus, the protocol tag is 'T01\n'. If the version tag written is
* unknown, -EINVAL is returned. Once the negotiation is complete, the
* client can start sending messages.
*
* The T01 protocol has three messages. First is the "SETN" message.
* It has the following syntax:
*
* SETN<space><8-char-hex-nodenum><newline>
*
* This is 14 characters.
*
* The "SETN" message must be the first message following the protocol.
* It tells ocfs2_control the local node number.
*
* Next comes the "SETV" message. It has the following syntax:
*
* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline>
*
* This is 11 characters.
*
* The "SETV" message sets the filesystem locking protocol version as
* negotiated by the client. The client negotiates based on the maximum
* version advertised in /sys/fs/ocfs2/max_locking_protocol. The major
* number from the "SETV" message must match
* ocfs2_user_plugin.sp_max_proto.pv_major, and the minor number
* must be less than or equal to ...sp_max_version.pv_minor.
*
* Once this information has been set, mounts will be allowed. From this
* point on, the "DOWN" message can be sent for node down notification.
* It has the following syntax:
*
* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline>
*
* eg:
*
* DOWN 632A924FDD844190BDA93C0DF6B94899 00000001\n
*
* This is 47 characters.
*/
/*
* Whether or not the client has done the handshake.
* For now, we have just one protocol version.
*/
#define OCFS2_CONTROL_PROTO "T01\n"
#define OCFS2_CONTROL_PROTO_LEN 4
/* Handshake states */
#define OCFS2_CONTROL_HANDSHAKE_INVALID (0)
#define OCFS2_CONTROL_HANDSHAKE_READ (1)
#define OCFS2_CONTROL_HANDSHAKE_PROTOCOL (2)
#define OCFS2_CONTROL_HANDSHAKE_VALID (3)
/* Messages */
#define OCFS2_CONTROL_MESSAGE_OP_LEN 4
#define OCFS2_CONTROL_MESSAGE_SETNODE_OP "SETN"
#define OCFS2_CONTROL_MESSAGE_SETNODE_TOTAL_LEN 14
#define OCFS2_CONTROL_MESSAGE_SETVERSION_OP "SETV"
#define OCFS2_CONTROL_MESSAGE_SETVERSION_TOTAL_LEN 11
#define OCFS2_CONTROL_MESSAGE_DOWN_OP "DOWN"
#define OCFS2_CONTROL_MESSAGE_DOWN_TOTAL_LEN 47
#define OCFS2_TEXT_UUID_LEN 32
#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
#define VERSION_LOCK "version_lock"
enum ocfs2_connection_type {
WITH_CONTROLD,
NO_CONTROLD
};
/*
* ocfs2_live_connection is refcounted because the filesystem and
* miscdevice sides can detach in different order. Let's just be safe.
*/
struct ocfs2_live_connection {
struct list_head oc_list;
struct ocfs2_cluster_connection *oc_conn;
enum ocfs2_connection_type oc_type;
atomic_t oc_this_node;
int oc_our_slot;
struct dlm_lksb oc_version_lksb;
char oc_lvb[DLM_LVB_LEN];
struct completion oc_sync_wait;
wait_queue_head_t oc_wait;
};
struct ocfs2_control_private {
struct list_head op_list;
int op_state;
int op_this_node;
struct ocfs2_protocol_version op_proto;
};
/* SETN<space><8-char-hex-nodenum><newline> */
struct ocfs2_control_message_setn {
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
char space;
char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
char newline;
};
/* SETV<space><2-char-hex-major><space><2-char-hex-minor><newline> */
struct ocfs2_control_message_setv {
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
char space1;
char major[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
char space2;
char minor[OCFS2_CONTROL_MESSAGE_VERNUM_LEN];
char newline;
};
/* DOWN<space><32-char-cap-hex-uuid><space><8-char-hex-nodenum><newline> */
struct ocfs2_control_message_down {
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
char space1;
char uuid[OCFS2_TEXT_UUID_LEN];
char space2;
char nodestr[OCFS2_CONTROL_MESSAGE_NODENUM_LEN];
char newline;
};
union ocfs2_control_message {
char tag[OCFS2_CONTROL_MESSAGE_OP_LEN];
struct ocfs2_control_message_setn u_setn;
struct ocfs2_control_message_setv u_setv;
struct ocfs2_control_message_down u_down;
};
static struct ocfs2_stack_plugin ocfs2_user_plugin;
static atomic_t ocfs2_control_opened;
static int ocfs2_control_this_node = -1;
static struct ocfs2_protocol_version running_proto;
static LIST_HEAD(ocfs2_live_connection_list);
static LIST_HEAD(ocfs2_control_private_list);
static DEFINE_MUTEX(ocfs2_control_lock);
static inline void ocfs2_control_set_handshake_state(struct file *file,
int state)
{
struct ocfs2_control_private *p = file->private_data;
p->op_state = state;
}
static inline int ocfs2_control_get_handshake_state(struct file *file)
{
struct ocfs2_control_private *p = file->private_data;
return p->op_state;
}
static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
{
size_t len