From d529a18a61f3f497328f096ddf757af928d6105b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:56 -0500 Subject: drivers: base: cacheinfo: move cache_setup_of_node() In preparation for the next patch, and to aid in review of that patch, lets move cache_setup_of_node further down in the module without any changes. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 80 ++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'drivers') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index edf726267282..09ccef7ddc99 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -32,46 +32,6 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) } #ifdef CONFIG_OF -static int cache_setup_of_node(unsigned int cpu) -{ - struct device_node *np; - struct cacheinfo *this_leaf; - struct device *cpu_dev = get_cpu_device(cpu); - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - unsigned int index = 0; - - /* skip if of_node is already populated */ - if (this_cpu_ci->info_list->of_node) - return 0; - - if (!cpu_dev) { - pr_err("No cpu device for CPU %d\n", cpu); - return -ENODEV; - } - np = cpu_dev->of_node; - if (!np) { - pr_err("Failed to find cpu%d device node\n", cpu); - return -ENOENT; - } - - while (index < cache_leaves(cpu)) { - this_leaf = this_cpu_ci->info_list + index; - if (this_leaf->level != 1) - np = of_find_next_cache_node(np); - else - np = of_node_get(np);/* cpu node itself */ - if (!np) - break; - this_leaf->of_node = np; - index++; - } - - if (index != cache_leaves(cpu)) /* not all OF nodes populated */ - return -ENOENT; - - return 0; -} - static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { @@ -202,6 +162,46 @@ static void cache_of_override_properties(unsigned int cpu) cache_associativity(this_leaf); } } + +static int cache_setup_of_node(unsigned int cpu) +{ + struct device_node *np; + struct cacheinfo *this_leaf; + struct device *cpu_dev = get_cpu_device(cpu); + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + unsigned int index = 0; + + /* skip if of_node is already populated */ + if (this_cpu_ci->info_list->of_node) + return 0; + + if (!cpu_dev) { + pr_err("No cpu device for CPU %d\n", cpu); + return -ENODEV; + } + np = cpu_dev->of_node; + if (!np) { + pr_err("Failed to find cpu%d device node\n", cpu); + return -ENOENT; + } + + while (index < cache_leaves(cpu)) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level != 1) + np = of_find_next_cache_node(np); + else + np = of_node_get(np);/* cpu node itself */ + if (!np) + break; + this_leaf->of_node = np; + index++; + } + + if (index != cache_leaves(cpu)) /* not all OF nodes populated */ + return -ENOENT; + + return 0; +} #else static void cache_of_override_properties(unsigned int cpu) { } static inline int cache_setup_of_node(unsigned int cpu) { return 0; } -- cgit v1.2.3 From 2ff075c7dfd4705de12d687daede2dd664386b1c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:57 -0500 Subject: drivers: base: cacheinfo: setup DT cache properties early The original intent in cacheinfo was that an architecture specific populate_cache_leaves() would probe the hardware and then cache_shared_cpu_map_setup() and cache_override_properties() would provide firmware help to extend/expand upon what was probed. Arm64 was really the only architecture that was working this way, and with the removal of most of the hardware probing logic it became clear that it was possible to simplify the logic a bit. This patch combines the walk of the DT nodes with the code updating the cache size/line_size and nr_sets. cache_override_properties() (which was DT specific) is then removed. The result is that cacheinfo.of_node is no longer used as a temporary place to hold DT references for future calls that update cache properties. That change helps to clarify its one remaining use (matching cacheinfo nodes that represent shared caches) which will be used by the ACPI/PPTT code in the following patches. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 65 +++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 09ccef7ddc99..a872523e8951 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -71,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type) return type; } -static void cache_size(struct cacheinfo *this_leaf) +static void cache_size(struct cacheinfo *this_leaf, struct device_node *np) { const char *propname; const __be32 *cache_size; @@ -80,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].size_prop; - cache_size = of_get_property(this_leaf->of_node, propname, NULL); + cache_size = of_get_property(np, propname, NULL); if (cache_size) this_leaf->size = of_read_number(cache_size, 1); } /* not cache_line_size() because that's a macro in include/linux/cache.h */ -static void cache_get_line_size(struct cacheinfo *this_leaf) +static void cache_get_line_size(struct cacheinfo *this_leaf, + struct device_node *np) { const __be32 *line_size; int i, lim, ct_idx; @@ -98,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf) const char *propname; propname = cache_type_info[ct_idx].line_size_props[i]; - line_size = of_get_property(this_leaf->of_node, propname, NULL); + line_size = of_get_property(np, propname, NULL); if (line_size) break; } @@ -107,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf) this_leaf->coherency_line_size = of_read_number(line_size, 1); } -static void cache_nr_sets(struct cacheinfo *this_leaf) +static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np) { const char *propname; const __be32 *nr_sets; @@ -116,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].nr_sets_prop; - nr_sets = of_get_property(this_leaf->of_node, propname, NULL); + nr_sets = of_get_property(np, propname, NULL); if (nr_sets) this_leaf->number_of_sets = of_read_number(nr_sets, 1); } @@ -135,32 +136,27 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } -static bool cache_node_is_unified(struct cacheinfo *this_leaf) +static bool cache_node_is_unified(struct cacheinfo *this_leaf, + struct device_node *np) { - return of_property_read_bool(this_leaf->of_node, "cache-unified"); + return of_property_read_bool(np, "cache-unified"); } -static void cache_of_override_properties(unsigned int cpu) +static void cache_of_set_props(struct cacheinfo *this_leaf, + struct device_node *np) { - int index; - struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - - for (index = 0; index < cache_leaves(cpu); index++) { - this_leaf = this_cpu_ci->info_list + index; - /* - * init_cache_level must setup the cache level correctly - * overriding the architecturally specified levels, so - * if type is NONE at this stage, it should be unified - */ - if (this_leaf->type == CACHE_TYPE_NOCACHE && - cache_node_is_unified(this_leaf)) - this_leaf->type = CACHE_TYPE_UNIFIED; - cache_size(this_leaf); - cache_get_line_size(this_leaf); - cache_nr_sets(this_leaf); - cache_associativity(this_leaf); - } + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf, np)) + this_leaf->type = CACHE_TYPE_UNIFIED; + cache_size(this_leaf, np); + cache_get_line_size(this_leaf, np); + cache_nr_sets(this_leaf, np); + cache_associativity(this_leaf); } static int cache_setup_of_node(unsigned int cpu) @@ -193,6 +189,7 @@ static int cache_setup_of_node(unsigned int cpu) np = of_node_get(np);/* cpu node itself */ if (!np) break; + cache_of_set_props(this_leaf, np); this_leaf->of_node = np; index++; } @@ -203,7 +200,6 @@ static int cache_setup_of_node(unsigned int cpu) return 0; } #else -static void cache_of_override_properties(unsigned int cpu) { } static inline int cache_setup_of_node(unsigned int cpu) { return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) @@ -286,12 +282,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) } } -static void cache_override_properties(unsigned int cpu) -{ - if (of_have_populated_dt()) - return cache_of_override_properties(cpu); -} - static void free_cache_attributes(unsigned int cpu) { if (!per_cpu_cacheinfo(cpu)) @@ -325,6 +315,10 @@ static int detect_cache_attributes(unsigned int cpu) if (per_cpu_cacheinfo(cpu) == NULL) return -ENOMEM; + /* + * populate_cache_leaves() may completely setup the cache leaves and + * shared_cpu_map or it may leave it partially setup. + */ ret = populate_cache_leaves(cpu); if (ret) goto free_ci; @@ -338,7 +332,6 @@ static int detect_cache_attributes(unsigned int cpu) goto free_ci; } - cache_override_properties(cpu); return 0; free_ci: -- cgit v1.2.3 From 9b97387c5c4260ffcdf3b913bdef0d98cb2d4a74 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:58 -0500 Subject: cacheinfo: rename of_node to fw_token Rename and change the type of of_node to indicate it is a generic pointer which is generally only used for comparison purposes. In a later patch we will put an ACPI/PPTT token pointer in fw_token so that the code which builds the shared cpu masks can be reused. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index a872523e8951..597aacb233fc 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -35,7 +35,7 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { - return sib_leaf->of_node == this_leaf->of_node; + return sib_leaf->fw_token == this_leaf->fw_token; } /* OF properties to query for a given cache type */ @@ -167,9 +167,10 @@ static int cache_setup_of_node(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; - /* skip if of_node is already populated */ - if (this_cpu_ci->info_list->of_node) + /* skip if fw_token is already populated */ + if (this_cpu_ci->info_list->fw_token) { return 0; + } if (!cpu_dev) { pr_err("No cpu device for CPU %d\n", cpu); @@ -190,7 +191,7 @@ static int cache_setup_of_node(unsigned int cpu) if (!np) break; cache_of_set_props(this_leaf, np); - this_leaf->of_node = np; + this_leaf->fw_token = np; index++; } @@ -278,7 +279,7 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - of_node_put(this_leaf->of_node); + of_node_put(this_leaf->fw_token); } } @@ -323,8 +324,9 @@ static int detect_cache_attributes(unsigned int cpu) if (ret) goto free_ci; /* - * For systems using DT for cache hierarchy, of_node and shared_cpu_map - * will be set up here only if they are not populated already + * For systems using DT for cache hierarchy, fw_token + * and shared_cpu_map will be set up here only if they are + * not populated already */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { -- cgit v1.2.3 From 2bd00bcd73e5edd5769e2a5f24c59a517582d862 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:00 -0500 Subject: ACPI/PPTT: Add Processor Properties Topology Table parsing ACPI 6.2 adds a new table, which describes how processing units are related to each other in tree like fashion. Caches are also sprinkled throughout the tree and describe the properties of the caches in relation to other caches and processing units. Add the code to parse the cache hierarchy and report the total number of levels of cache for a given core using acpi_find_last_cache_level() as well as fill out the individual cores cache information with cache_setup_acpi() once the cpu_cacheinfo structure has been populated by the arch specific code. An additional patch later in the set adds the ability to report peers in the topology using find_acpi_cpu_topology() to report a unique ID for each processing unit at a given level in the tree. These unique id's can then be used to match related processing units which exist as threads, within a given package, etc. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Rafael J. Wysocki Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 655 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 655 insertions(+) create mode 100644 drivers/acpi/pptt.c (limited to 'drivers') diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c new file mode 100644 index 000000000000..e5ea1974d1e3 --- /dev/null +++ b/drivers/acpi/pptt.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * pptt.c - parsing of Processor Properties Topology Table (PPTT) + * + * Copyright (C) 2018, ARM + * + * This file implements parsing of the Processor Properties Topology Table + * which is optionally used to describe the processor and cache topology. + * Due to the relative pointers used throughout the table, this doesn't + * leverage the existing subtable parsing in the kernel. + * + * The PPTT structure is an inverted tree, with each node potentially + * holding one or two inverted tree data structures describing + * the caches available at that level. Each cache structure optionally + * contains properties describing the cache at a given level which can be + * used to override hardware probed values. + */ +#define pr_fmt(fmt) "ACPI PPTT: " fmt + +#include +#include +#include + +static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + struct acpi_subtable_header *entry; + + /* there isn't a subtable at reference 0 */ + if (pptt_ref < sizeof(struct acpi_subtable_header)) + return NULL; + + if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length) + return NULL; + + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref); + + if (entry->length == 0) + return NULL; + + if (pptt_ref + entry->length > table_hdr->length) + return NULL; + + return entry; +} + +static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, pptt_ref); +} + +static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref); +} + +static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *node, + int resource) +{ + u32 *ref; + + if (resource >= node->number_of_priv_resources) + return NULL; + + ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor)); + ref += resource; + + return fetch_pptt_subtable(table_hdr, *ref); +} + +static inline bool acpi_pptt_match_type(int table_type, int type) +{ + return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type || + table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type); +} + +/** + * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache + * @table_hdr: Pointer to the head of the PPTT table + * @local_level: passed res reflects this cache level + * @res: cache resource in the PPTT we want to walk + * @found: returns a pointer to the requested level if found + * @level: the requested cache level + * @type: the requested cache type + * + * Attempt to find a given cache level, while counting the max number + * of cache levels for the cache node. + * + * Given a pptt resource, verify that it is a cache node, then walk + * down each level of caches, counting how many levels are found + * as well as checking the cache type (icache, dcache, unified). If a + * level & type match, then we set found, and continue the search. + * Once the entire cache branch has been walked return its max + * depth. + * + * Return: The cache structure and the level we terminated with. + */ +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr, + int local_level, + struct acpi_subtable_header *res, + struct acpi_pptt_cache **found, + int level, int type) +{ + struct acpi_pptt_cache *cache; + + if (res->type != ACPI_PPTT_TYPE_CACHE) + return 0; + + cache = (struct acpi_pptt_cache *) res; + while (cache) { + local_level++; + + if (local_level == level && + cache->flags & ACPI_PPTT_CACHE_TYPE_VALID && + acpi_pptt_match_type(cache->attributes, type)) { + if (*found != NULL && cache != *found) + pr_warn("Found duplicate cache level/type unable to determine uniqueness\n"); + + pr_debug("Found cache @ level %d\n", level); + *found = cache; + /* + * continue looking at this node's resource list + * to verify that we don't find a duplicate + * cache node. + */ + } + cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache); + } + return local_level; +} + +static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node, + int *starting_level, int level, + int type) +{ + struct acpi_subtable_header *res; + int number_of_levels = *starting_level; + int resource = 0; + struct acpi_pptt_cache *ret = NULL; + int local_level; + + /* walk down from processor node */ + while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) { + resource++; + + local_level = acpi_pptt_walk_cache(table_hdr, *starting_level, + res, &ret, level, type); + /* + * we are looking for the max depth. Since its potentially + * possible for a given node to have resources with differing + * depths verify that the depth we have found is the largest. + */ + if (number_of_levels < local_level) + number_of_levels = local_level; + } + if (number_of_levels > *starting_level) + *starting_level = number_of_levels; + + return ret; +} + +/** + * acpi_count_levels() - Given a PPTT table, and a cpu node, count the caches + * @table_hdr: Pointer to the head of the PPTT table + * @cpu_node: processor node we wish to count caches for + * + * Given a processor node containing a processing unit, walk into it and count + * how many levels exist solely for it, and then walk up each level until we hit + * the root node (ignore the package level because it may be possible to have + * caches that exist across packages). Count the number of cache levels that + * exist at each level on the way up. + * + * Return: Total number of levels found. + */ +static int acpi_count_levels(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node) +{ + int total_levels = 0; + + do { + acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0); + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } while (cpu_node); + + return total_levels; +} + +/** + * acpi_pptt_leaf_node() - Given a processor node, determine if its a leaf + * @table_hdr: Pointer to the head of the PPTT table + * @node: passed node is checked to see if its a leaf + * + * Determine if the *node parameter is a leaf node by iterating the + * PPTT table, looking for nodes which reference it. + * + * Return: 0 if we find a node referencing the passed node (or table error), + * or 1 if we don't. + */ +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *node) +{ + struct acpi_subtable_header *entry; + unsigned long table_end; + u32 node_entry; + struct acpi_pptt_processor *cpu_node; + u32 proc_sz; + + table_end = (unsigned long)table_hdr + table_hdr->length; + node_entry = ACPI_PTR_DIFF(node, table_hdr); + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor *); + + while ((unsigned long)entry + proc_sz < table_end) { + cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && + cpu_node->parent == node_entry) + return 0; + if (entry->length == 0) + return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + + } + return 1; +} + +/** + * acpi_find_processor_node() - Given a PPTT table find the requested processor + * @table_hdr: Pointer to the head of the PPTT table + * @acpi_cpu_id: cpu we are searching for + * + * Find the subtable entry describing the provided processor. + * This is done by iterating the PPTT table looking for processor nodes + * which have an acpi_processor_id that matches the acpi_cpu_id parameter + * passed into the function. If we find a node that matches this criteria + * we verify that its a leaf node in the topology rather than depending + * on the valid flag, which doesn't need to be set for leaf nodes. + * + * Return: NULL, or the processors acpi_pptt_processor* + */ +static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id) +{ + struct acpi_subtable_header *entry; + unsigned long table_end; + struct acpi_pptt_processor *cpu_node; + u32 proc_sz; + + table_end = (unsigned long)table_hdr + table_hdr->length; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor *); + + /* find the processor structure associated with this cpuid */ + while ((unsigned long)entry + proc_sz < table_end) { + cpu_node = (struct acpi_pptt_processor *)entry; + + if (entry->length == 0) { + pr_warn("Invalid zero length subtable\n"); + break; + } + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && + acpi_cpu_id == cpu_node->acpi_processor_id && + acpi_pptt_leaf_node(table_hdr, cpu_node)) { + return (struct acpi_pptt_processor *)entry; + } + + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + } + + return NULL; +} + +static int acpi_find_cache_levels(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id) +{ + int number_of_levels = 0; + struct acpi_pptt_processor *cpu; + + cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id); + if (cpu) + number_of_levels = acpi_count_levels(table_hdr, cpu); + + return number_of_levels; +} + +static u8 acpi_cache_type(enum cache_type type) +{ + switch (type) { + case CACHE_TYPE_DATA: + pr_debug("Looking for data cache\n"); + return ACPI_PPTT_CACHE_TYPE_DATA; + case CACHE_TYPE_INST: + pr_debug("Looking for instruction cache\n"); + return ACPI_PPTT_CACHE_TYPE_INSTR; + default: + case CACHE_TYPE_UNIFIED: + pr_debug("Looking for unified cache\n"); + /* + * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED + * contains the bit pattern that will match both + * ACPI unified bit patterns because we use it later + * to match both cases. + */ + return ACPI_PPTT_CACHE_TYPE_UNIFIED; + } +} + +static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id, + enum cache_type type, + unsigned int level, + struct acpi_pptt_processor **node) +{ + int total_levels = 0; + struct acpi_pptt_cache *found = NULL; + struct acpi_pptt_processor *cpu_node; + u8 acpi_type = acpi_cache_type(type); + + pr_debug("Looking for CPU %d's level %d cache type %d\n", + acpi_cpu_id, level, acpi_type); + + cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id); + + while (cpu_node && !found) { + found = acpi_find_cache_level(table_hdr, cpu_node, + &total_levels, level, acpi_type); + *node = cpu_node; + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } + + return found; +} + +/* total number of attributes checked by the properties code */ +#define PPTT_CHECKED_ATTRIBUTES 4 + +/** + * update_cache_properties() - Update cacheinfo for the given processor + * @this_leaf: Kernel cache info structure being updated + * @found_cache: The PPTT node describing this cache instance + * @cpu_node: A unique reference to describe this cache instance + * + * The ACPI spec implies that the fields in the cache structures are used to + * extend and correct the information probed from the hardware. Lets only + * set fields that we determine are VALID. + * + * Return: nothing. Side effect of updating the global cacheinfo + */ +static void update_cache_properties(struct cacheinfo *this_leaf, + struct acpi_pptt_cache *found_cache, + struct acpi_pptt_processor *cpu_node) +{ + int valid_flags = 0; + + this_leaf->fw_token = cpu_node; + if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) { + this_leaf->size = found_cache->size; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) { + this_leaf->coherency_line_size = found_cache->line_size; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) { + this_leaf->number_of_sets = found_cache->number_of_sets; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) { + this_leaf->ways_of_associativity = found_cache->associativity; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) { + switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) { + case ACPI_PPTT_CACHE_POLICY_WT: + this_leaf->attributes = CACHE_WRITE_THROUGH; + break; + case ACPI_PPTT_CACHE_POLICY_WB: + this_leaf->attributes = CACHE_WRITE_BACK; + break; + } + } + if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) { + switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) { + case ACPI_PPTT_CACHE_READ_ALLOCATE: + this_leaf->attributes |= CACHE_READ_ALLOCATE; + break; + case ACPI_PPTT_CACHE_WRITE_ALLOCATE: + this_leaf->attributes |= CACHE_WRITE_ALLOCATE; + break; + case ACPI_PPTT_CACHE_RW_ALLOCATE: + case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT: + this_leaf->attributes |= + CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE; + break; + } + } + /* + * If the above flags are valid, and the cache type is NOCACHE + * update the cache type as well. + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + valid_flags == PPTT_CHECKED_ATTRIBUTES) + this_leaf->type = CACHE_TYPE_UNIFIED; +} + +static void cache_setup_acpi_cpu(struct acpi_table_header *table, + unsigned int cpu) +{ + struct acpi_pptt_cache *found_cache; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct cacheinfo *this_leaf; + unsigned int index = 0; + struct acpi_pptt_processor *cpu_node = NULL; + + while (index < get_cpu_cacheinfo(cpu)->num_leaves) { + this_leaf = this_cpu_ci->info_list + index; + found_cache = acpi_find_cache_node(table, acpi_cpu_id, + this_leaf->type, + this_leaf->level, + &cpu_node); + pr_debug("found = %p %p\n", found_cache, cpu_node); + if (found_cache) + update_cache_properties(this_leaf, + found_cache, + cpu_node); + + index++; + } +} + +/* Passing level values greater than this will result in search termination */ +#define PPTT_ABORT_PACKAGE 0xFF + +static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu, + int level, int flag) +{ + struct acpi_pptt_processor *prev_node; + + while (cpu && level) { + if (cpu->flags & flag) + break; + pr_debug("level %d\n", level); + prev_node = fetch_pptt_node(table_hdr, cpu->parent); + if (prev_node == NULL) + break; + cpu = prev_node; + level--; + } + return cpu; +} + +/** + * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature + * @table: Pointer to the head of the PPTT table + * @cpu: Kernel logical cpu number + * @level: A level that terminates the search + * @flag: A flag which terminates the search + * + * Get a unique value given a cpu, and a topology level, that can be + * matched to determine which cpus share common topological features + * at that level. + * + * Return: Unique value, or -ENOENT if unable to locate cpu + */ +static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, + unsigned int cpu, int level, int flag) +{ + struct acpi_pptt_processor *cpu_node; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (cpu_node) { + cpu_node = acpi_find_processor_package_id(table, cpu_node, + level, flag); + /* Only the first level has a guaranteed id */ + if (level == 0) + return cpu_node->acpi_processor_id; + return ACPI_PTR_DIFF(cpu_node, table); + } + pr_warn_once("PPTT table found, but unable to locate core %d (%d)\n", + cpu, acpi_cpu_id); + return -ENOENT; +} + +static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag) +{ + struct acpi_table_header *table; + acpi_status status; + int retval; + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cpu topology may be inaccurate\n"); + return -ENOENT; + } + retval = topology_get_acpi_cpu_tag(table, cpu, level, flag); + pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n", + cpu, level, retval); + acpi_put_table(table); + + return retval; +} + +/** + * acpi_find_last_cache_level() - Determines the number of cache levels for a PE + * @cpu: Kernel logical cpu number + * + * Given a logical cpu number, returns the number of levels of cache represented + * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0 + * indicating we didn't find any cache levels. + * + * Return: Cache levels visible to this core. + */ +int acpi_find_last_cache_level(unsigned int cpu) +{ + u32 acpi_cpu_id; + struct acpi_table_header *table; + int number_of_levels = 0; + acpi_status status; + + pr_debug("Cache Setup find last level cpu=%d\n", cpu); + + acpi_cpu_id = get_acpi_id_for_cpu(cpu); + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cache topology may be inaccurate\n"); + } else { + number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id); + acpi_put_table(table); + } + pr_debug("Cache Setup find last level level=%d\n", number_of_levels); + + return number_of_levels; +} + +/** + * cache_setup_acpi() - Override CPU cache topology with data from the PPTT + * @cpu: Kernel logical cpu number + * + * Updates the global cache info provided by cpu_get_cacheinfo() + * when there are valid properties in the acpi_pptt_cache nodes. A + * successful parse may not result in any updates if none of the + * cache levels have any valid flags set. Futher, a unique value is + * associated with each known CPU cache entry. This unique value + * can be used to determine whether caches are shared between cpus. + * + * Return: -ENOENT on failure to find table, or 0 on success + */ +int cache_setup_acpi(unsigned int cpu) +{ + struct acpi_table_header *table; + acpi_status status; + + pr_debug("Cache Setup ACPI cpu %d\n", cpu); + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cache topology may be inaccurate\n"); + return -ENOENT; + } + + cache_setup_acpi_cpu(table, cpu); + acpi_put_table(table); + + return status; +} + +/** + * find_acpi_cpu_topology() - Determine a unique topology value for a given cpu + * @cpu: Kernel logical cpu number + * @level: The topological level for which we would like a unique ID + * + * Determine a topology unique ID for each thread/core/cluster/mc_grouping + * /socket/etc. This ID can then be used to group peers, which will have + * matching ids. + * + * The search terminates when either the requested level is found or + * we reach a root node. Levels beyond the termination point will return the + * same unique ID. The unique id for level 0 is the acpi processor id. All + * other levels beyond this use a generated value to uniquely identify + * a topological feature. + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents a unique topological feature. + */ +int find_acpi_cpu_topology(unsigned int cpu, int level) +{ + return find_acpi_cpu_topology_tag(cpu, level, 0); +} + +/** + * find_acpi_cpu_cache_topology() - Determine a unique cache topology value + * @cpu: Kernel logical cpu number + * @level: The cache level for which we would like a unique ID + * + * Determine a unique ID for each unified cache in the system + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents a unique topological feature. + */ +int find_acpi_cpu_cache_topology(unsigned int cpu, int level) +{ + struct acpi_table_header *table; + struct acpi_pptt_cache *found_cache; + acpi_status status; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_processor *cpu_node = NULL; + int ret = -1; + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, topology may be inaccurate\n"); + return -ENOENT; + } + + found_cache = acpi_find_cache_node(table, acpi_cpu_id, + CACHE_TYPE_UNIFIED, + level, + &cpu_node); + if (found_cache) + ret = ACPI_PTR_DIFF(cpu_node, table); + + acpi_put_table(table); + + return ret; +} + + +/** + * find_acpi_cpu_topology_package() - Determine a unique cpu package value + * @cpu: Kernel logical cpu number + * + * Determine a topology unique package ID for the given cpu. + * This ID can then be used to group peers, which will have matching ids. + * + * The search terminates when either a level is found with the PHYSICAL_PACKAGE + * flag set or we reach a root node. + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents the package for this cpu. + */ +int find_acpi_cpu_topology_package(unsigned int cpu) +{ + return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, + ACPI_PPTT_PHYSICAL_PACKAGE); +} -- cgit v1.2.3 From 0ce82232232a2f76128e9bfcc6e8b662e110a671 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:01 -0500 Subject: ACPI: Enable PPTT support on ARM64 Now that we have a PPTT parser, in preparation for its use on arm64, lets build it. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/acpi/Kconfig | 3 +++ drivers/acpi/Makefile | 1 + 2 files changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 516d7b36d6fb..b533eeb6139d 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -547,6 +547,9 @@ config ACPI_CONFIGFS if ARM64 source "drivers/acpi/arm64/Kconfig" + +config ACPI_PPTT + bool endif config TPS68470_PMIC_OPREGION diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 48e202752754..6d59aa109a91 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_ACPI_BGRT) += bgrt.o obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o +obj-$(CONFIG_ACPI_PPTT) += pptt.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o -- cgit v1.2.3 From 582b468bdc6d9c287a432a63225cf7922e985e15 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:02 -0500 Subject: drivers: base cacheinfo: Add support for ACPI based firmware tables Call ACPI cache parsing routines from base cacheinfo code if ACPI is enabled. Also stub out cache_setup_acpi and acpi_find_last_cache_level so that individual architectures can enable ACPI topology parsing. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 597aacb233fc..2880e2ab01f5 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -206,7 +206,7 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { /* - * For non-DT systems, assume unique level 1 cache, system-wide + * For non-DT/ACPI systems, assume unique level 1 caches, system-wide * shared caches for all other levels. This will be used only if * arch specific code has not populated shared_cpu_map */ @@ -214,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, } #endif +int __weak cache_setup_acpi(unsigned int cpu) +{ + return -ENOTSUPP; +} + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -227,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (of_have_populated_dt()) ret = cache_setup_of_node(cpu); else if (!acpi_disabled) - /* No cache property/hierarchy support yet in ACPI */ - ret = -ENOTSUPP; + ret = cache_setup_acpi(cpu); + if (ret) return ret; @@ -279,7 +284,8 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - of_node_put(this_leaf->fw_token); + if (of_have_populated_dt()) + of_node_put(this_leaf->fw_token); } } -- cgit v1.2.3 From bce1a65172d1172a8ec26c8251b9a4a97a3cae23 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:06 -0500 Subject: ACPI: Add PPTT to injectable table list Add ACPI_SIG_PPTT to the table so initrd's can override the system topology. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Ard Biesheuvel Acked-by: Rafael J. Wysocki Signed-off-by: Geoffrey Blake Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/acpi/tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 849c4fb19b03..30d93bf7c6a2 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -457,7 +457,7 @@ static const char * const table_sigs[] = { ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, - ACPI_SIG_NFIT, ACPI_SIG_HMAT, NULL }; + ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, NULL }; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) -- cgit v1.2.3 From d0f2e423295313a30b1d56f3b24e9e927f0b66b7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 19 Apr 2018 16:06:07 +0200 Subject: perf: simplify getting .drvdata We should get drvdata from struct device directly. Going via platform_device is an unneeded step back and forth. Signed-off-by: Wolfram Sang Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 28bb642af18b..54ec278d2fc4 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -131,8 +131,7 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct platform_device *pdev = to_platform_device(dev); - struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); struct dev_ext_attribute *ea = container_of(attr, struct dev_ext_attribute, attr); int cap = (long)ea->var; @@ -247,8 +246,7 @@ static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { - struct platform_device *pdev = to_platform_device(dev); - struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus); } -- cgit v1.2.3 From 5c591304e710339a75a9f0f9f3f085aa4109e55d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 21 May 2018 12:17:09 +0100 Subject: perf/arm-cci: Remove unnecessary period adjustment Since sampling events are rejected up-front by cci_pmu_event_init(), it doesn't make much sense to go fiddling with the sampling period later. This would seem to be just another leftover artefact of the arm_pmu framwork, and as such can go. Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 383b2d3dcbc6..72c464485470 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1304,15 +1304,6 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config_base |= (unsigned long)mapping; - /* - * Limit the sample_period to half of the counter width. That way, the - * new counter value is far less likely to overtake the previous one - * unless you have some serious IRQ latency issues. - */ - hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - if (event->group_leader != event) { if (validate_group(event) != 0) return -EINVAL; -- cgit v1.2.3 From 0788f1e97324d8378e860dc2560699ddc6f3aef9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 10 May 2018 11:35:15 +0100 Subject: arm_pmu: simplify arm_pmu::handle_irq The arm_pmu::handle_irq() callback has the same prototype as a generic IRQ handler, taking the IRQ number and a void pointer argument which it must convert to an arm_pmu pointer. This means that all arm_pmu::handle_irq() take an IRQ number they never use, and all must explicitly cast the void pointer to an arm_pmu pointer. Instead, let's change arm_pmu::handle_irq to take an arm_pmu pointer, allowing these casts to be removed. The redundant IRQ number parameter is also removed. Suggested-by: Hoeun Ryu Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1a0d340b65cf..a6347d487635 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -339,7 +339,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) return IRQ_NONE; start_clock = sched_clock(); - ret = armpmu->handle_irq(irq, armpmu); + ret = armpmu->handle_irq(armpmu); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); -- cgit v1.2.3 From 75dc344145190bf53aed7a28dbc27c11180d94e5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 May 2018 14:34:51 +0100 Subject: perf/arm-cc*: Fix MODULE_LICENSE() tags The CCI/CCN drivers are licensed under GPLv2, but the MODULE_LICENSE() tags are using the bare "GPL" string implying GPLv2 or later. Fix them to match their actual file license. Acked-by: Pawel Moll Acked-by: Suzuki K Poulose Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 2 +- drivers/perf/arm-ccn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 72c464485470..7d916dc2214d 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1709,5 +1709,5 @@ static struct platform_driver cci_pmu_driver = { }; builtin_platform_driver(cci_pmu_driver); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ARM CCI PMU support"); diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 65b7e4042ece..917b47e776df 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1594,4 +1594,4 @@ module_init(arm_ccn_init); module_exit(arm_ccn_exit); MODULE_AUTHOR("Pawel Moll "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 28c01dc9d85e4cd744f254c45d3c894bca168ed6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 May 2018 14:34:52 +0100 Subject: perf/arm-cci: Remove pointless PMU disabling The CCI PMU driver bears some legacy remnants of the arm_pmu framework from when it was split in c6f85cb4305b ("bus: cci: move away from arm_pmu framework"). In particular this perf_pmu_{dis,en}able() dance around pmu->add which was fixed for arm_pmu in a9e469d1c89b ("drivers/perf: arm_pmu: remove pointless PMU disabling"). For the exact same reasons (i.e. perf core already does this around the call anyway), give cci_pmu_add() the exact same change, which also prevents having to export those core functions to build it as a module. Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 7d916dc2214d..33b47c292d79 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1184,16 +1184,11 @@ static int cci_pmu_add(struct perf_event *event, int flags) struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; struct hw_perf_event *hwc = &event->hw; int idx; - int err = 0; - - perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ idx = pmu_get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } + if (idx < 0) + return idx; event->hw.idx = idx; hw_events->events[idx] = event; @@ -1205,9 +1200,7 @@ static int cci_pmu_add(struct perf_event *event, int flags) /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); -out: - perf_pmu_enable(event->pmu); - return err; + return 0; } static void cci_pmu_del(struct perf_event *event, int flags) -- cgit v1.2.3 From 8b0c93c20ef78f15d8b760964ff79bda7f68c610 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 May 2018 14:34:53 +0100 Subject: perf/arm-cci: Allow building as a module Fill in the few extra bits and annotations needed to make the driver work properly as a module, and jiggle the Kconfig to expose the driver-level ARM_CCI_PMU option. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 34 ++++++++++++++++++---------------- drivers/perf/arm-cci.c | 17 ++++++++++++++++- 2 files changed, 34 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 28bb5a029558..110330ecf714 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -6,30 +6,32 @@ menu "Performance monitor support" depends on PERF_EVENTS config ARM_CCI_PMU - bool + tristate "ARM CCI PMU driver" + depends on (ARM && CPU_V7) || ARM64 select ARM_CCI + help + Support for PMU events monitoring on the ARM CCI (Cache Coherent + Interconnect) family of products. + + If compiled as a module, it will be called arm-cci. config ARM_CCI400_PMU - bool "ARM CCI400 PMU support" - depends on (ARM && CPU_V7) || ARM64 + bool "support CCI-400" + default y + depends on ARM_CCI_PMU select ARM_CCI400_COMMON - select ARM_CCI_PMU help - Support for PMU events monitoring on the ARM CCI-400 (cache coherent - interconnect). CCI-400 supports counting events related to the - connected slave/master interfaces. + CCI-400 provides 4 independent event counters counting events related + to the connected slave/master interfaces, plus a cycle counter. config ARM_CCI5xx_PMU - bool "ARM CCI-500/CCI-550 PMU support" - depends on (ARM && CPU_V7) || ARM64 - select ARM_CCI_PMU + bool "support CCI-500/CCI-550" + default y + depends on ARM_CCI_PMU help - Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache - coherent interconnects. Both of them provide 8 independent event counters, - which can count events pertaining to the slave/master interfaces as well - as the internal events to the CCI. - - If unsure, say Y + CCI-500/CCI-550 both provide 8 independent event counters, which can + count events pertaining to the slave/master interfaces as well as the + internal events to the CCI. config ARM_CCN tristate "ARM CCN driver support" diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 33b47c292d79..e6fadc8e1178 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1407,6 +1407,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) pmu_format_attr_group.attrs = model->format_attrs; cci_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, .name = cci_pmu->model->name, .task_ctx_nr = perf_invalid_context, .pmu_enable = cci_pmu_enable, @@ -1572,6 +1573,7 @@ static const struct of_device_id arm_cci_pmu_matches[] = { #endif {}, }; +MODULE_DEVICE_TABLE(of, arm_cci_pmu_matches); static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) { @@ -1693,14 +1695,27 @@ static int cci_pmu_probe(struct platform_device *pdev) return 0; } +static int cci_pmu_remove(struct platform_device *pdev) +{ + if (!g_cci_pmu) + return 0; + + cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE); + perf_pmu_unregister(&g_cci_pmu->pmu); + g_cci_pmu = NULL; + + return 0; +} + static struct platform_driver cci_pmu_driver = { .driver = { .name = DRIVER_NAME, .of_match_table = arm_cci_pmu_matches, }, .probe = cci_pmu_probe, + .remove = cci_pmu_remove, }; -builtin_platform_driver(cci_pmu_driver); +module_platform_driver(cci_pmu_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ARM CCI PMU support"); -- cgit v1.2.3 From 1898eb61fbc9703efee886d3abec27a388cf28c3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 21 May 2018 18:19:49 +0100 Subject: drivers/perf: arm-ccn: don't log to dmesg in event_init The ARM CCN PMU driver uses dev_warn() to complain about parameters in the user-provided perf_event_attr. This means that under normal operation (e.g. a single invocation of the perf tool), a number of messages warnings may be logged to dmesg. Tools may issue multiple syscalls to probe for feature support, and multiple applications (from multiple users) can attempt to open events simultaneously, so this is not very helpful, even if a user happens to have access to dmesg. Worse, this can push important information out of the dmesg ring buffer, and can significantly slow down syscall fuzzers, vastly increasing the time it takes to find critical bugs. Demote the dev_warn() instances to dev_dbg(), as is the case for all other PMU drivers under drivers/perf/. Users who wish to debug PMU event initialisation can enable dynamic debug to receive these messages. Signed-off-by: Mark Rutland Cc: Pawel Moll Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 917b47e776df..b416ee18e6bb 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -736,7 +736,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) ccn = pmu_to_arm_ccn(event->pmu); if (hw->sample_period) { - dev_warn(ccn->dev, "Sampling not supported!\n"); + dev_dbg(ccn->dev, "Sampling not supported!\n"); return -EOPNOTSUPP; } @@ -744,12 +744,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) event->attr.exclude_kernel || event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || event->attr.exclude_guest) { - dev_warn(ccn->dev, "Can't exclude execution levels!\n"); + dev_dbg(ccn->dev, "Can't exclude execution levels!\n"); return -EINVAL; } if (event->cpu < 0) { - dev_warn(ccn->dev, "Can't provide per-task data!\n"); + dev_dbg(ccn->dev, "Can't provide per-task data!\n"); return -EOPNOTSUPP; } /* @@ -771,13 +771,13 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) switch (type) { case CCN_TYPE_MN: if (node_xp != ccn->mn_id) { - dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid MN ID %d!\n", node_xp); return -EINVAL; } break; case CCN_TYPE_XP: if (node_xp >= ccn->num_xps) { - dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid XP ID %d!\n", node_xp); return -EINVAL; } break; @@ -785,11 +785,11 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) break; default: if (node_xp >= ccn->num_nodes) { - dev_warn(ccn->dev, "Invalid node ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid node ID %d!\n", node_xp); return -EINVAL; } if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) { - dev_warn(ccn->dev, "Invalid type 0x%x for node %d!\n", + dev_dbg(ccn->dev, "Invalid type 0x%x for node %d!\n", type, node_xp); return -EINVAL; } @@ -808,19 +808,19 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) if (event_id != e->event) continue; if (e->num_ports && port >= e->num_ports) { - dev_warn(ccn->dev, "Invalid port %d for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid port %d for node/XP %d!\n", port, node_xp); return -EINVAL; } if (e->num_vcs && vc >= e->num_vcs) { - dev_warn(ccn->dev, "Invalid vc %d for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid vc %d for node/XP %d!\n", vc, node_xp); return -EINVAL; } valid = 1; } if (!valid) { - dev_warn(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", event_id, node_xp); return -EINVAL; } -- cgit v1.2.3 From b89205bd508ed384253b4449c6a7a755b956a0f8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 22 May 2018 23:54:04 +0800 Subject: drivers/perf: Remove ARM_SPE_PMU explicit PERF_EVENTS dependency Since commit bddb9b68d3fb ("drivers/perf: commonise PERF_EVENTS dependency"), all perf drivers depend on PERF_EVENTS config under a common menu. Config ARM_SPE_PMU still declares explicitly a dependency on PERF_EVENTS, which is unneeded, so remove it. Acked-by: Mark Rutland Signed-off-by: John Garry Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 110330ecf714..08ebaf7cca8b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -96,7 +96,7 @@ config XGENE_PMU config ARM_SPE_PMU tristate "Enable support for the ARMv8.2 Statistical Profiling Extension" - depends on PERF_EVENTS && ARM64 + depends on ARM64 help Enable perf support for the ARMv8.2 Statistical Profiling Extension, which provides periodic sampling of operations in -- cgit v1.2.3 From 984e9cf1b9eaab08e4f1f082ce49ed2670e99d90 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 May 2018 17:41:47 +0200 Subject: drivers/bus: arm-cci: fix build warnings When the arm-cci driver is enabled, but both CONFIG_ARM_CCI5xx_PMU and CONFIG_ARM_CCI400_PMU are not, we get a warning about how parts of the driver are never used: drivers/perf/arm-cci.c:1454:29: error: 'cci_pmu_models' defined but not used [-Werror=unused-variable] drivers/perf/arm-cci.c:693:16: error: 'cci_pmu_event_show' defined but not used [-Werror=unused-function] drivers/perf/arm-cci.c:685:16: error: 'cci_pmu_format_show' defined but not used [-Werror=unused-function] Marking all three functions as __maybe_unused avoids the warnings in randconfig builds. I'm doing this lacking any ideas for a better fix. Fixes: 3de6be7a3dd8 ("drivers/bus: Split Arm CCI driver") Reviewed-by: Robin Murphy Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index e6fadc8e1178..0d09d8e669cd 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -120,9 +120,9 @@ enum cci_models { static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask); -static ssize_t cci_pmu_format_show(struct device *dev, +static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev, struct device_attribute *attr, char *buf); -static ssize_t cci_pmu_event_show(struct device *dev, +static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf); #define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ @@ -1451,7 +1451,7 @@ static int cci_pmu_offline_cpu(unsigned int cpu) return 0; } -static struct cci_pmu_model cci_pmu_models[] = { +static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI400_PMU [CCI400_R0] = { .name = "CCI_400", -- cgit v1.2.3