#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-only # # top-like utility for displaying kvm statistics # # Copyright 2006-2008 Qumranet Technologies # Copyright 2008-2011 Red Hat, Inc. # # Authors: # Avi Kivity # """The kvm_stat module outputs statistics about running KVM VMs Three different ways of output formatting are available: - as a top-like text ui - in a key -> value format - in an all keys, all values format The data is sampled from the KVM's debugfs entries and its perf events. """ from __future__ import print_function import curses import sys import locale import os import time import optparse import ctypes import fcntl import resource import struct import re import subprocess from collections import defaultdict, namedtuple VMX_EXIT_REASONS = { 'EXCEPTION_NMI': 0, 'EXTERNAL_INTERRUPT': 1, 'TRIPLE_FAULT': 2, 'PENDING_INTERRUPT': 7, 'NMI_WINDOW': 8, 'TASK_SWITCH': 9, 'CPUID': 10, 'HLT': 12, 'INVLPG': 14, 'RDPMC': 15, 'RDTSC': 16, 'VMCALL': 18, 'VMCLEAR': 19, 'VMLAUNCH': 20, 'VMPTRLD': 21, 'VMPTRST': 22, 'VMREAD': 23, 'VMRESUME': 24, 'VMWRITE': 25, 'VMOFF': 26, 'VMON': 27, 'CR_ACCESS': 28, 'DR_ACCESS': 29, 'IO_INSTRUCTION': 30, 'MSR_READ': 31, 'MSR_WRITE': 32, 'INVALID_STATE': 33, 'MWAIT_INSTRUCTION': 36, 'MONITOR_INSTRUCTION': 39, 'PAUSE_INSTRUCTION': 40, 'MCE_DURING_VMENTRY': 41, 'TPR_BELOW_THRESHOLD': 43, 'APIC_ACCESS': 44, 'EPT_VIOLATION': 48, 'EPT_MISCONFIG': 49, 'WBINVD': 54, 'XSETBV': 55, 'APIC_WRITE': 56, 'INVPCID': 58, } SVM_EXIT_REASONS = { 'READ_CR0': 0x000, 'READ_CR3': 0x003, 'READ_CR4': 0x004, 'READ_CR8': 0x008, 'WRITE_CR0': 0x010, 'WRITE_CR3': 0x013, 'WRITE_CR4': 0x014, 'WRITE_CR8': 0x018, 'READ_DR0': 0x020, 'READ_DR1': 0x021, 'READ_DR2': 0x022, 'READ_DR3': 0x023, 'READ_DR4': 0x024, 'READ_DR5': 0x025, 'READ_DR6': 0x026, 'READ_DR7': 0x027, 'WRITE_DR0': 0x030, 'WRITE_DR1': 0x031, 'WRITE_DR2': 0x032, 'WRITE_DR3': 0x033, 'WRITE_DR4': 0x034, 'WRITE_DR5': 0x035, 'WRITE_DR6': 0x036, 'WRITE_DR7': 0x037, 'EXCP_BASE': 0x040, 'INTR': 0x060, 'NMI': 0x061, 'SMI': 0x062, 'INIT': 0x063, 'VINTR': 0x064, 'CR0_SEL_WRITE': 0x065, 'IDTR_READ': 0x066, 'GDTR_READ': 0x067, 'LDTR_READ': 0x068, 'TR_READ': 0x069, 'IDTR_WRITE': 0x06a, 'GDTR_WRITE': 0x06b, 'LDTR_WRITE': 0x06c, 'TR_WRITE': 0x06d, 'RDTSC': 0x06e, 'RDPMC': 0x06f, 'PUSHF': 0x070, 'POPF': 0x071, 'CPUID': 0x072, 'RSM': 0x073, 'IRET': 0x074, 'SWINT': 0x075, 'INVD': 0x076, 'PAUSE': 0x077, 'HLT': 0x078, 'INVLPG': 0x079, 'INVLPGA': 0x07a, 'IOIO': 0x07b, 'MSR': 0x07c, 'TASK_SWITCH': 0x07d, 'FERR_FREEZE': 0x07e, 'SHUTDOWN': 0x07f, 'VMRUN': 0x080, 'VMMCALL': 0x081, 'VMLOAD': 0x082, 'VMSAVE': 0x083, 'STGI': 0x084, 'CLGI': 0x085, 'SKINIT': 0x086, 'RDTSCP': 0x087, 'ICEBP': 0x088, 'WBINVD': 0x089, 'MONITOR': 0x08a, 'MWAIT': 0x08b, 'MWAIT_COND': 0x08c, 'XSETBV': 0x08d, 'NPF': 0x400, } # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) AARCH64_EXIT_REASONS = { 'UNKNOWN': 0x00, 'WFI': 0x01, 'CP15_32': 0x03, 'CP15_64': 0x04, 'CP14_MR': 0x05, 'CP14_LS': 0x06, 'FP_ASIMD': 0x07, 'CP10_ID': 0x08, 'CP14_64': 0x0C, 'ILL_ISS': 0x0E, 'SVC32': 0x11, 'HVC32': 0x12, 'SMC32': 0x13, 'SVC64': 0x15, 'HVC64': 0x16, 'SMC64': 0x17, 'SYS64': 0x18, 'IABT': 0x20, 'IABT_HYP': 0x21, 'PC_ALIGN': 0x22, 'DABT': 0x24, 'DABT_HYP': 0x25, 'SP_ALIGN': 0x26, 'FP_EXC32': 0x28, 'FP_EXC64': 0x2C, 'SERROR': 0x2F, 'BREAKPT': 0x30, 'BREAKPT_HYP': 0x31, 'SOFTSTP': 0x32, 'SOFTSTP_HYP': 0x33, 'WATCHPT': 0x34, 'WATCHPT_HYP': 0x35, 'BKPT32': 0x38, 'VECTOR32': 0x3A, 'BRK64': 0x3C, } # From include/uapi/linux/kvm.h, KVM_EXIT_xxx USERSPACE_EXIT_REASONS = { 'UNKNOWN': 0, 'EXCEPTION': 1, 'IO': 2, 'HYPERCALL': 3, 'DEBUG': 4, 'HLT': 5, 'MMIO': 6, 'IRQ_WINDOW_OPEN': 7, 'SHUTDOWN': 8, 'FAIL_ENTRY': 9, 'INTR': 10, 'SET_TPR': 11, 'TPR_ACCESS': 12, 'S390_SIEIC': 13, 'S390_RESET': 14, 'DCR': 15, 'NMI': 16, 'INTERNAL_ERROR': 17, 'OSI': 18, 'PAPR_HCALL': 19, 'S390_UCONTROL': 20, 'WATCHDOG': 21, 'S390_TSCH': 22, 'EPR': 23, 'SYSTEM_EVENT': 24, } IOCTL_NUMBERS = { 'SET_FILTER': 0x40082406, 'ENABLE': 0x00002400, 'DISABLE': 0x00002401, 'RESET': 0x00002403, } ENCODING = locale.getpreferredencoding(False) TRACE_FILTER = re.compile(r'^[^\(]*$') class Arch(object): """Encapsulates global architecture specific data. Contains the performance event open syscall and ioctl numbers, as well as the VM exit reasons for the architecture it runs on. """ @staticmethod def get_arch(): machine = os.uname()[4] if machine.startswith('ppc'): return ArchPPC() elif machine.startswith('aarch64'): return ArchA64() elif machine.startswith('s390'): return ArchS390() else: # X86_64 for line in open('/proc/cpuinfo'): if not line.startswith('flags'): continue flags = line.split() if 'vmx' in flags: return ArchX86(VMX_EXIT_REASONS) if 'svm' in flags: return ArchX86(SVM_EXIT_REASONS) return def tracepoint_is_child(self, field): if (TRACE_FILTER.match(field)): return None return field.split('(', 1)[0] class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS self.exit_reason_field = 'exit_reason' self.exit_reasons = exit_reasons def debugfs_is_child(self, field): """ Returns name of parent if 'field' is a child, None otherwise """ return None class ArchPPC(Arch): def __init__(self): self.sc_perf_evt_open = 319 self.ioctl_numbers = IOCTL_NUMBERS self.ioctl_numbers['ENABLE'] = 0x20002400 self.ioctl_numbers['DISABLE'] = 0x20002401 self.ioctl_numbers['RESET'] = 0x20002403 # PPC comes in 32 and 64 bit and some generated ioctl # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 self.exit_reason_field = 'exit_nr' self.exit_reasons = {} def debugfs_is_child(self, field): """ Returns name of parent if 'field' is a child, None otherwise """ return None class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS self.exit_reason_field = 'esr_ec' self.exit_reasons = AARCH64_EXIT_REASONS def debugfs_is_child(self, field): """ Returns name of parent if 'field' is a child, None otherwise """ return None class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 self.ioctl_numbers = IOCTL_NUMBERS self.exit_reason_field = None self.exit_reasons = None def debugfs_is_child(self, field): """ Returns name of parent if 'field' is a child, None otherwise """ if field.startswith('instruction_'): return 'exit_instruction' ARCH = Arch.get_arch() class perf_event_attr(ctypes.Structure): """Struct that holds the necessary data to set up a trace event. For an extensive explanation see perf_event_open(2) and include/uapi/linux/perf_event.h, struct perf_event_attr All fields that are not initialized in the constructor are 0. """ _fields_ = [('type', ctypes.c_uint32), ('size', ctypes.c_uint32), ('config', ctypes.c_uint64), ('sample_freq', ctypes.c_uint64), ('sample_type', ctypes.c_uint64), ('read_format', ctypes.c_uint64), ('flags', ctypes.c_uint64), ('wakeup_events', ctypes.c_uint32), ('bp_type', ctypes.c_uint32), ('bp_addr', ctypes.c_uint64), ('bp_len', ctypes.c_uint64), ] def __init__(self): super(self.__class__, self).__init__() self.type = PERF_TYPE_TRACEPOINT self.size = ctypes.sizeof(self) self.read_format = PERF_FORMAT_GROUP PERF_TYPE_TRACEPOINT = 2 PERF_FORMAT_GROUP = 1 << 3 class Group(object): """Represents a perf event group.""" def __init__(self): self.events = [] def add_event(self, event): self.events.append(event) def read(self): """Returns a dict with 'event name: value' for all events in the group. Values are read by reading from the file descriptor of the event that is the group leader. See perf_event_open(2) for details. Read format for the used event configuration is: struct read_format { u64 nr; /* The number of events */ struct { u64 value; /* The value of the event */ } values[nr]; }; """ length = 8 * (1 + len(self.events)) read_format = 'xxxxxxxx' + 'Q' * len(self.events) return dict(zip([event.name for event in self.events], struct.unpack(read_format, os.read(self.events[0].fd, length)))) class Event(object): """Represents a performance event and manages its life cycle.""" def __init__(self, name, group, trace_cpu, trace_pid, trace_point, trace_filter, trace_set='kvm'): self.libc = ctypes.CDLL('libc.so.6', use_errno=True) self.syscall = self.libc.syscall self.name = name self.fd = None self._setup_event(group, trace_cpu, trace_pid, trace_point, trace_filter, trace_set) def __del__(self): """Closes the event's file descriptor. As no python file object was created for the file descriptor, python will not reference count the descriptor and will not close it itself automatically, so we do it. """ if self.fd: os.close(self.fd) def _perf_event_open(self, attr, pid, cpu, group_fd, flags): """Wrapper for the sys_perf_evt_open() syscall. Used to set up performance events, returns a file descriptor or -1 on error. Attributes are: - syscall number - struct perf_event_attr * - pid or -1 to monitor all pids - cpu number or -1 to monitor all cpus - The file descriptor of the group leader or -1 to create a group. - flags """ return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid), ctypes.c_int(cpu), ctypes.c_int(group_fd),
/*
 *	Sysfs attributes of bridge ports
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Stephen Hemminger		<shemminger@osdl.org>
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 */

#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/if_bridge.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>

#include "br_private.h"

struct brport_attribute {
	struct attribute	attr;
	ssize_t (*show)(struct net_bridge_port *, char *);
	int (*store)(struct net_bridge_port *, unsigned long);
};

#define BRPORT_ATTR(_name,_mode,_show,_store)		        \
struct brport_attribute brport_attr_##_name = { 	        \
	.attr = {.name = __stringify(_name), 			\
		 .mode = _mode },				\
	.show	= _show,					\
	.store	= _store,					\
};

static ssize_t show_path_cost(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->path_cost);
}

static BRPORT_ATTR(path_cost, S_IRUGO | S_IWUSR,
		   show_path_cost, br_stp_set_path_cost);

static ssize_t show_priority(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->priority);
}

static BRPORT_ATTR(priority, S_IRUGO | S_IWUSR,
			 show_priority, br_stp_set_port_priority);

static ssize_t show_designated_root(struct net_bridge_port *p, char *buf)
{
	return br_show_bridge_id(buf, &p->designated_root);
}
static BRPORT_ATTR(designated_root, S_IRUGO, show_designated_root, NULL);

static ssize_t show_designated_bridge(struct net_bridge_port *p, char *buf)
{
	return br_show_bridge_id(buf, &p->designated_bridge);
}
static BRPORT_ATTR(designated_bridge, S_IRUGO, show_designated_bridge, NULL);

static ssize_t show_designated_port(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->designated_port);
}
static BRPORT_ATTR(designated_port, S_IRUGO, show_designated_port, NULL);

static ssize_t show_designated_cost(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->designated_cost);
}
static BRPORT_ATTR(designated_cost, S_IRUGO, show_designated_cost, NULL);

static ssize_t show_port_id(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "0x%x\n", p->port_id);
}
static BRPORT_ATTR(port_id, S_IRUGO, show_port_id, NULL);

static ssize_t show_port_no(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "0x%x\n", p->port_no);
}

static BRPORT_ATTR(port_no, S_IRUGO, show_port_no, NULL);

static ssize_t show_change_ack(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->topology_change_ack);
}
static BRPORT_ATTR(change_ack, S_IRUGO, show_change_ack, NULL);

static ssize_t show_config_pending(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->config_pending);
}
static BRPORT_ATTR(config_pending, S_IRUGO, show_config_pending, NULL);

static ssize_t show_port_state(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->state);
}
static BRPORT_ATTR(state, S_IRUGO, show_port_state, NULL);

static ssize_t show_message_age_timer(struct net_bridge_port *p,
					    char *buf)
{
	return sprintf(buf, "%ld\n", br_timer_value(&p->message_age_timer));
}
static BRPORT_ATTR(message_age_timer, S_IRUGO, show_message_age_timer, NULL);

static ssize_t show_forward_delay_timer(struct net_bridge_port *p,
					    char *buf)
{
	return sprintf(buf, "%ld\n", br_timer_value(&p->forward_delay_timer));
}
static BRPORT_ATTR(forward_delay_timer, S_IRUGO, show_forward_delay_timer, NULL);

static ssize_t show_hold_timer(struct net_bridge_port *p,
					    char *buf)
{
	return sprintf(buf, "%ld\n", br_timer_value(&p->hold_timer));
}
static BRPORT_ATTR(hold_timer, S_IRUGO, show_hold_timer, NULL);

static int store_flush(struct net_bridge_port *p, unsigned long v)
{
	br_fdb_delete_by_port(p->br, p, 0); // Don't delete local entry
	return 0;
}
static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);

static ssize_t show_hairpin_mode(struct net_bridge_port *p, char *buf)
{
	int hairpin_mode = (p->flags & BR_HAIRPIN_MODE) ? 1 : 0;
	return sprintf(buf, "%d\n", hairpin_mode);
}
static int store_hairpin_mode(struct net_bridge_port *p, unsigned long v)
{
	if (v)
		p->flags |= BR_HAIRPIN_MODE;
	else
		p->flags &= ~BR_HAIRPIN_MODE;
	return 0;
}
static BRPORT_ATTR(hairpin_mode, S_IRUGO | S_IWUSR,
		   show_hairpin_mode, store_hairpin_mode);

#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
{
	return sprintf(buf, "%d\n", p->multicast_router);
}

static int store_multicast_router(struct net_bridge_port *p,
				      unsigned long v)
{
	return br_multicast_set_port_router(p, v);
}
static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router,
		   store_multicast_router);
#endif

static struct brport_attribute *brport_attrs[] = {
	&brport_attr_path_cost,
	&brport_attr_priority,
	&brport_attr_port_id,
	&brport_attr_port_no,
	&brport_attr_designated_root,
	&brport_attr_designated_bridge,
	&brport_attr_designated_port,
	&brport_attr_designated_cost,
	&brport_attr_state,
	&brport_attr_change_ack,
	&brport_attr_config_pending,
	&brport_attr_message_age_timer,
	&brport_attr_forward_delay_timer,
	&brport_attr_hold_timer,
	&brport_attr_flush,
	&brport_attr_hairpin_mode,
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
	&brport_attr_multicast_router,
#endif
	NULL
};

#define to_brport_attr(_at) container_of(_at, struct brport_attribute, attr)
#define to_brport(obj)	container_of(obj, struct net_bridge_port, kobj)

static ssize_t brport_show(struct kobject * kobj,
			   struct attribute * attr, char * buf)
{
	struct brport_attribute * brport_attr = to_brport_attr(attr);
	struct net_bridge_port * p = to_brport(kobj);

	return brport_attr->show(p, buf);
}

static ssize_t brport_store(struct kobject * kobj,
			    struct attribute * attr,
			    const char * buf, size_t count)
{
	struct brport_attribute * brport_attr = to_brport_attr(attr);
	struct net_bridge_port * p = to_brport(kobj);
	ssize_t ret = -EINVAL;
	char *endp;
	unsigned long val;

	if (!capable(CAP_NET_ADMIN))
		return -EPERM;

	val = simple_strtoul(buf, &endp, 0);
	if (endp != buf) {
		if (!rtnl_trylock())
			return restart_syscall();
		if (p->dev && p->br && brport_attr->store) {
			spin_lock_bh(&p->