From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 6 Jan 2009 14:39:14 -0800 Subject: mm: show node to memory section relationship with symlinks in sysfs Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade Signed-off-by: Badari Pulavarty Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 19 +++++++--- drivers/base/node.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5260e9e0df48..989429cfed88 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -347,8 +347,9 @@ static inline int memory_probe_init(void) * section belongs to... */ -static int add_memory_block(unsigned long node_id, struct mem_section *section, - unsigned long state, int phys_device) +static int add_memory_block(int nid, struct mem_section *section, + unsigned long state, int phys_device, + enum mem_add_context context) { struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); int ret = 0; @@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, ret = mem_create_simple_file(mem, phys_device); if (!ret) ret = mem_create_simple_file(mem, removable); + if (!ret) { + if (context == HOTPLUG) + ret = register_mem_sect_under_node(mem, nid); + } return ret; } @@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section, * * This could be made generic for all sysdev classes. */ -static struct memory_block *find_memory_block(struct mem_section *section) +struct memory_block *find_memory_block(struct mem_section *section) { struct kobject *kobj; struct sys_device *sysdev; @@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, struct memory_block *mem; mem = find_memory_block(section); + unregister_mem_sect_under_nodes(mem); mem_remove_simple_file(mem, phys_index); mem_remove_simple_file(mem, state); mem_remove_simple_file(mem, phys_device); @@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section, * need an interface for the VM to add new memory regions, * but without onlining it. */ -int register_new_memory(struct mem_section *section) +int register_new_memory(int nid, struct mem_section *section) { - return add_memory_block(0, section, MEM_OFFLINE, 0); + return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG); } int unregister_memory_section(struct mem_section *section) @@ -458,7 +464,8 @@ int __init memory_dev_init(void) for (i = 0; i < NR_MEM_SECTIONS; i++) { if (!present_section_nr(i)) continue; - err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); + err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, + 0, BOOT); if (!ret) ret = err; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 91636cd8b6c9..43fa90b837ee 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) return 0; } +#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE +#define page_initialized(page) (page->lru.next) + +static int get_nid_for_pfn(unsigned long pfn) +{ + struct page *page; + + if (!pfn_valid_within(pfn)) + return -1; + page = pfn_to_page(pfn); + if (!page_initialized(page)) + return -1; + return pfn_to_nid(pfn); +} + +/* register memory section under specified node if it spans that node */ +int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) +{ + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + if (!node_online(nid)) + return 0; + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + int page_nid; + + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; + return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, + &mem_blk->sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + /* mem section does not span the specified node */ + return 0; +} + +/* unregister memory section under all nodes that it spans */ +int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) +{ + nodemask_t unlinked_nodes; + unsigned long pfn, sect_start_pfn, sect_end_pfn; + + if (!mem_blk) + return -EFAULT; + nodes_clear(unlinked_nodes); + sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); + sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { + unsigned int nid; + + nid = get_nid_for_pfn(pfn); + if (nid < 0) + continue; + if (!node_online(nid)) + continue; + if (node_test_and_set(nid, unlinked_nodes)) + continue; + sysfs_remove_link(&node_devices[nid].sysdev.kobj, + kobject_name(&mem_blk->sysdev.kobj)); + } + return 0; +} + +static int link_mem_sections(int nid) +{ + unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; + unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; + unsigned long pfn; + int err = 0; + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + unsigned long section_nr = pfn_to_section_nr(pfn); + struct mem_section *mem_sect; + struct memory_block *mem_blk; + int ret; + + if (!present_section_nr(section_nr)) + continue; + mem_sect = __nr_to_section(section_nr); + mem_blk = find_memory_block(mem_sect); + ret = register_mem_sect_under_node(mem_blk, nid); + if (!err) + err = ret; + + /* discard ref obtained in find_memory_block() */ + kobject_put(&mem_blk->sysdev.kobj); + } + return err; +} +#else +static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + int register_one_node(int nid) { int error = 0; @@ -267,6 +367,9 @@ int register_one_node(int nid) if (cpu_to_node(cpu) == nid) register_cpu_under_node(cpu, nid); } + + /* link memory sections under this node */ + error = link_mem_sections(nid); } return error; -- cgit v1.2.3