/* * kexec.c - kexec system call * Copyright (C) 2002-2004 Eric Biederman * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Per cpu memory for storing cpu states in case of system crash. */ note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; size_t vmcoreinfo_size; size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; /* Location of the reserved area for the crash kernel */ struct resource crashk_res = { .name = "Crash kernel", .start = 0, .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; struct resource crashk_low_res = { .name = "Crash kernel", .start = 0, .end = 0, .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; int kexec_should_crash(struct task_struct *p) { if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) return 1; return 0; } /* * When kexec transitions to the new kernel there is a one-to-one * mapping between physical and virtual addresses. On processors * where you can disable the MMU this is trivial, and easy. For * others it is still a simple predictable page table to setup. * * In that environment kexec copies the new kernel to its final * resting place. This means I can only support memory whose * physical address can fit in an unsigned long. In particular * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. * If the assembly stub has more restrictive requirements * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be * defined more restrictively in . * * The code for the transition from the current kernel to the * the new kernel is placed in the control_code_buffer, whose size * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single * page of memory is necessary, but some architectures require more. * Because this memory must be identity mapped in the transition from * virtual to physical addresses it must live in the range * 0 - TASK_SIZE, as only the user space mappings are arbitrarily * modifiable. * * The assembly stub in the control code buffer is passed a linked list * of descriptor pages detailing the source pages of the new kernel, * and the destination addresses of those source pages. As this data * structure is not used in the context of the current OS, it must * be self-contained. * * The code has been made to work with highmem pages and will use a * destination page in its final resting place (if it happens * to allocate it). The end product of this is that most of the * physical address space, and most of RAM can be used. * * Future directions include: * - allocating a page table with the control code buffer identity * mapped, to simplify machine_kexec and make kexec_on_panic more * reliable. */ /* * KIMAGE_NO_DEST is an impossible destination address..., for * allocating pages whose destination address we do not care about. */ #define KIMAGE_NO_DEST (-1UL) static int kimage_is_destination_range(struct kimage *image, unsigned long start, unsigned long end); static struct page *kimage_alloc_page(struct kimage *image, gfp_t gfp_mask, unsigned long dest); static int do_kimage_alloc(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments) { size_t segment_bytes; struct kimage *image; unsigned long i; int result; /* Allocate a controlling structure */ result = -ENOMEM; image = kzalloc(sizeof(*image), GFP_KERNEL); if (!image) goto out; image->head = 0; image->entry = &image->head; image->last_entry = &image->head; image->control_page = ~0; /* By default this does not apply */ image->start = entry; image->type = KEXEC_TYPE_DEFAULT; /* Initialize the list of control pages */ INIT_LIST_HEAD(&image->control_pages); /* Initialize the list of destination pages */ INIT_LIST_HEAD(&image->dest_pages); /* Initialize the list of unusable pages */
/*
 * Copyright (C) 2012 Fusion-io  All rights reserved.
 * Copyright (C) 2012 Intel Corp. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License v2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#ifndef __BTRFS_RAID56__
#define __BTRFS_RAID56__
static inline int nr_parity_stripes(struct map_lookup *map)
{
	if (map->type & BTRFS_BLOCK_GROUP_RAID5)
		return 1;
	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
		return 2;
	else
		return 0;
}

static inline int nr_data_stripes(struct map_lookup *map)
{
	return map->num_stripes - nr_parity_stripes(map);
}
#define RAID5_P_STRIPE ((u64)-2)
#define RAID6_Q_STRIPE ((u64)-1)

#define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||		\
			     ((x) == RAID6_Q_STRIPE))

struct btrfs_raid_bio;
struct btrfs_device;

int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio,
			  struct btrfs_bio *bbio, u64 stripe_len,
			  int mirror_num, int generic_io);
int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio,
			       struct btrfs_bio *bbio, u64 stripe_len);

void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
			    u64 logical);

struct btrfs_raid_bio *
raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
			       struct btrfs_bio *bbio, u64 stripe_len,
			       struct btrfs_device *scrub_dev,
			       unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);

struct btrfs_raid_bio *
raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio,
			  struct btrfs_bio *bbio, u64 length);
void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);

int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
#endif
; } } return page; } static int kimage_load_normal_segment(struct kimage *image, struct kexec_segment *segment) { unsigned long maddr; size_t ubytes, mbytes; int result; unsigned char __user *buf; result = 0; buf = segment->buf; ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; result = kimage_set_destination(image, maddr); if (result < 0) goto out; while (mbytes) { struct page *page; char *ptr; size_t uchunk, mchunk; page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); if (!page) { result = -ENOMEM; goto out; } result = kimage_add_page(image, page_to_pfn(page) << PAGE_SHIFT); if (result < 0) goto out; ptr = kmap(page); /* Start with a clear page */ clear_page(ptr); ptr += maddr & ~PAGE_MASK; mchunk = min_t(size_t, mbytes, PAGE_SIZE - (maddr & ~PAGE_MASK)); uchunk = min(ubytes, mchunk); result = copy_from_user(ptr, buf, uchunk); kunmap(page); if (result) { result = -EFAULT; goto out; } ubytes -= uchunk; maddr += mchunk; buf += mchunk; mbytes -= mchunk; } out: return result; } static int kimage_load_crash_segment(struct kimage *image, struct kexec_segment *segment) { /* For crash dumps kernels we simply copy the data from * user space to it's destination. * We do things a page at a time for the sake of kmap. */ unsigned long maddr; size_t ubytes, mbytes; int result; unsigned char __user *buf; result = 0; buf = segment->buf; ubytes = segment->bufsz; mbytes = segment->memsz; maddr = segment->mem; while (mbytes) { struct page *page; char *ptr; size_t uchunk, mchunk; page = pfn_to_page(maddr >> PAGE_SHIFT); if (!page) { result = -ENOMEM; goto out; } ptr = kmap(page); ptr += maddr & ~PAGE_MASK; mchunk = min_t(size_t, mbytes, PAGE_SIZE - (maddr & ~PAGE_MASK)); uchunk = min(ubytes, mchunk); if (mchunk > uchunk) { /* Zero the trailing part of the page */ memset(ptr + uchunk, 0, mchunk - uchunk); } result = copy_from_user(ptr, buf, uchunk); kexec_flush_icache_page(page); kunmap(page); if (result) { result = -EFAULT; goto out; } ubytes -= uchunk; maddr += mchunk; buf += mchunk; mbytes -= mchunk; } out: return result; } static int kimage_load_segment(struct kimage *image, struct kexec_segment *segment) { int result = -ENOMEM; switch (image->type) { case KEXEC_TYPE_DEFAULT: result = kimage_load_normal_segment(image, segment); break; case KEXEC_TYPE_CRASH: result = kimage_load_crash_segment(image, segment); break; } return result; } /* * Exec Kernel system call: for obvious reasons only root may call it. * * This call breaks up into three pieces. * - A generic part which loads the new kernel from the current * address space, and very carefully places the data in the * allocated pages. * * - A generic part that interacts with the kernel and tells all of * the devices to shut down. Preventing on-going dmas, and placing * the devices in a consistent state so a later kernel can * reinitialize them. * * - A machine specific part that includes the syscall number * and then copies the image to it's final destination. And * jumps into the image at entry. * * kexec does not sync, or unmount filesystems so if you need * that to happen you need to do that yourself. */ struct kimage *kexec_image; struct kimage *kexec_crash_image; int kexec_load_disabled; static DEFINE_MUTEX(kexec_mutex); SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, struct kexec_segment __user *, segments, unsigned long, flags) { struct kimage **dest_image, *image; int result; /* We only trust the superuser with rebooting the system. */ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) return -EPERM; /* * Verify we have a legal set of flags * This leaves us room for future extensions. */ if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) return -EINVAL; /* Verify we are on the appropriate architecture */ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) return -EINVAL; /* Put an artificial cap on the number * of segments passed to kexec_load. */ if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; image = NULL; result = 0; /* Because we write directly to the reserved memory * region when loading crash kernels we need a mutex here to * prevent multiple crash kernels from attempting to load * simultaneously, and to prevent a crash kernel from loading * over the top of a in use crash kernel. * * KISS: always take the mutex. */ if (!mutex_trylock(&kexec_mutex)) return -EBUSY; dest_image = &kexec_image; if (flags & KEXEC_ON_CRASH) dest_image = &kexec_crash_image; if (nr_segments > 0) { unsigned long i; /* Loading another kernel to reboot into */ if ((flags & KEXEC_ON_CRASH) == 0) result = kimage_normal_alloc(&image, entry, nr_segments, segments); /* Loading another kernel to switch to if this one crashes */ else if (flags & KEXEC_ON_CRASH) { /* Free any current crash dump kernel before * we corrupt it. */ kimage_free(xchg(&kexec_crash_image, NULL)); result = kimage_crash_alloc(&image, entry, nr_segments, segments); crash_map_reserved_pages(); } if (result) goto out; if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; result = machine_kexec_prepare(image); if (result) goto out; for (i = 0; i < nr_segments; i++) { result = kimage_load_segment(image, &image->segment[i]); if (result) goto out; } kimage_terminate(image); if (flags & KEXEC_ON_CRASH) crash_unmap_reserved_pages(); } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); out: mutex_unlock(&kexec_mutex); kimage_free(image); return result; } /* * Add and remove page tables for crashkernel memory * * Provide an empty default implementation here -- architecture * code may override this */ void __weak crash_map_reserved_pages(void) {} void __weak crash_unmap_reserved_pages(void) {} #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, compat_ulong_t, nr_segments, struct compat_kexec_segment __user *, segments, compat_ulong_t, flags) { struct compat_kexec_segment in; struct kexec_segment out, __user *ksegments; unsigned long i, result; /* Don't allow clients that don't understand the native * architecture to do anything. */ if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); for (i=0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); if (result) return -EFAULT; out.buf = compat_ptr(in.buf); out.bufsz = in.bufsz; out.mem = in.mem; out.memsz = in.memsz; result = copy_to_user(&ksegments[i], &out, sizeof(out)); if (result) return -EFAULT; } return sys_kexec_load(entry, nr_segments, ksegments, flags); } #endif void crash_kexec(struct pt_regs *regs) { /* Take the kexec_mutex here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * * If the crash kernel was not located in a fixed area * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ if (mutex_trylock(&kexec_mutex)) { if (kexec_crash_image) { struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } mutex_unlock(&kexec_mutex); } } size_t crash_get_memory_size(void) { size_t size = 0; mutex_lock(&kexec_mutex); if (crashk_res.end != crashk_res.start) size = resource_size(&crashk_res); mutex_unlock(&kexec_mutex); return size; } void __weak crash_free_reserved_phys_range(unsigned long begin, unsigned long end) { unsigned long addr; for (addr = begin; addr < end; addr += PAGE_SIZE) free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); } int crash_shrink_memory(unsigned long new_size) { int ret = 0; unsigned long start, end; unsigned long old_size; struct resource *ram_res; mutex_lock(&kexec_mutex); if (kexec_crash_image) { ret = -ENOENT; goto unlock; } start = crashk_res.start; end = crashk_res.end; old_size = (end == 0) ? 0 : end - start + 1; if (new_size >= old_size) { ret = (new_size == old_size) ? 0 : -EINVAL; goto unlock; } ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); if (!ram_res) { ret = -ENOMEM; goto unlock; } start = roundup(start, KEXEC_CRASH_MEM_ALIGN); end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); crash_map_reserved_pages(); crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) release_resource(&crashk_res); ram_res->start = end; ram_res->end = crashk_res.end; ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; ram_res->name = "System RAM"; crashk_res.end = end - 1; insert_resource(&iomem_resource, ram_res); crash_unmap_reserved_pages(); unlock: mutex_unlock(&kexec_mutex); return ret; } static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, size_t data_len) { struct elf_note note; note.n_namesz = strlen(name) + 1; note.n_descsz = data_len; note.n_type = type; memcpy(buf, ¬e, sizeof(note)); buf += (sizeof(note) + 3)/4; memcpy(buf, name, note.n_namesz); buf += (note.n_namesz + 3)/4; memcpy(buf, data, note.n_descsz); buf += (note.n_descsz + 3)/4; return buf; } static void final_note(u32 *buf) { struct elf_note note; note.n_namesz = 0; note.n_descsz = 0; note.n_type = 0; memcpy(buf, ¬e, sizeof(note)); } void crash_save_cpu(struct pt_regs *regs, int cpu) { struct elf_prstatus prstatus; u32 *buf; if ((cpu < 0) || (cpu >= nr_cpu_ids)) return; /* Using ELF notes here is opportunistic. * I need a well defined structure format * for the data I pass, and I need tags * on the data to indicate what information I have * squirrelled away. ELF notes happen to provide * all of that, so there is no need to invent something new. */ buf = (u32*)per_cpu_ptr(crash_notes, cpu); if (!buf) return; memset(&prstatus, 0, sizeof(prstatus)); prstatus.pr_pid = current->pid; elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, &prstatus, sizeof(prstatus)); final_note(buf); } static int __init crash_notes_memory_init(void) { /* Allocate memory for saving cpu registers. */ crash_notes = alloc_percpu(note_buf_t); if (!crash_notes) { printk("Kexec: Memory allocation for saving cpu register" " states failed\n"); return -ENOMEM; } return 0; } subsys_initcall(crash_notes_memory_init); /* * parsing the "crashkernel" commandline * * this code is intended to be called from architecture specific code */ /* * This function parses command lines in the format * * crashkernel=ramsize-range:size[,...][@offset] * * The function returns 0 on success and -EINVAL on failure. */ static int __init parse_crashkernel_mem(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { char *cur = cmdline, *tmp; /* for each entry of the comma-separated list */ do { unsigned long long start, end = ULLONG_MAX, size; /* get the start of the range */ start = memparse(cur, &tmp); if (cur == tmp) { pr_warning("crashkernel: Memory value expected\n"); return -EINVAL; } cur = tmp; if (*cur != '-') { pr_warning("crashkernel: '-' expected\n"); return -EINVAL; } cur++; /* if no ':' is here, than we read the end */ if (*cur != ':') { end = memparse(cur, &tmp); if (cur == tmp) { pr_warning("crashkernel: Memory " "value expected\n"); return -EINVAL; } cur = tmp; if (end <= start) { pr_warning("crashkernel: end <= start\n"); return -EINVAL; } } if (*cur != ':') { pr_warning("crashkernel: ':' expected\n"); return -EINVAL; } cur++; size = memparse(cur, &tmp); if (cur == tmp) { pr_warning("Memory value expected\n"); return -EINVAL; } cur = tmp; if (size >= system_ram) { pr_warning("crashkernel: invalid size\n"); return -EINVAL; } /* match ? */ if (system_ram >= start && system_ram < end) { *crash_size = size; break; } } while (*cur++ == ','); if (*crash_size > 0) { while (*cur && *cur != ' ' && *cur != '@') cur++; if (*cur == '@') { cur++; *crash_base = memparse(cur, &tmp); if (cur == tmp) { pr_warning("Memory value expected " "after '@'\n"); return -EINVAL; } } } return 0; } /* * That function parses "simple" (old) crashkernel command lines like * * crashkernel=size[@offset] * * It returns 0 on success and -EINVAL on failure. */ static int __init parse_crashkernel_simple(char *cmdline, unsigned long long *crash_size, unsigned long long *crash_base) { char *cur = cmdline; *crash_size = memparse(cmdline, &cur); if (cmdline == cur) { pr_warning("crashkernel: memory value expected\n"); return -EINVAL; } if (*cur == '@') *crash_base = memparse(cur+1, &cur); else if (*cur != ' ' && *cur != '\0') { pr_warning("crashkernel: unrecognized char\n"); return -EINVAL; } return 0; } #define SUFFIX_HIGH 0 #define SUFFIX_LOW 1 #define SUFFIX_NULL 2 static __initdata char *suffix_tbl[] = { [SUFFIX_HIGH] = ",high", [SUFFIX_LOW] = ",low", [SUFFIX_NULL] = NULL, }; /* * That function parses "suffix" crashkernel command lines like * * crashkernel=size,[high|low] * * It returns 0 on success and -EINVAL on failure. */ static int __init parse_crashkernel_suffix(char *cmdline, unsigned long long *crash_size, unsigned long long *crash_base, const char *suffix) { char *cur = cmdline; *crash_size = memparse(cmdline, &cur); if (cmdline == cur) { pr_warn("crashkernel: memory value expected\n"); return -EINVAL; } /* check with suffix */ if (strncmp(cur, suffix, strlen(suffix))) { pr_warn("crashkernel: unrecognized char\n"); return -EINVAL; } cur += strlen(suffix); if (*cur != ' ' && *cur != '\0') { pr_warn("crashkernel: unrecognized char\n"); return -EINVAL; } return 0; } static __init char *get_last_crashkernel(char *cmdline, const char *name, const char *suffix) { char *p = cmdline, *ck_cmdline = NULL; /* find crashkernel and use the last one if there are more */ p = strstr(p, name); while (p) { char *end_p = strchr(p, ' '); char *q; if (!end_p) end_p = p + strlen(p); if (!suffix) { int i; /* skip the one with any known suffix */ for (i = 0; suffix_tbl[i]; i++) { q = end_p - strlen(suffix_tbl[i]); if (!strncmp(q, suffix_tbl[i], strlen(suffix_tbl[i]))) goto next; } ck_cmdline = p; } else { q = end_p - strlen(suffix); if (!strncmp(q, suffix, strlen(suffix))) ck_cmdline = p; } next: p = strstr(p+1, name); } if (!ck_cmdline) return NULL; return ck_cmdline; } static int __init __parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base, const char *name, const char *suffix) { char *first_colon, *first_space; char *ck_cmdline; BUG_ON(!crash_size || !crash_base); *crash_size = 0; *crash_base = 0; ck_cmdline = get_last_crashkernel(cmdline, name, suffix); if (!ck_cmdline) return -EINVAL; ck_cmdline += strlen(name); if (suffix) return parse_crashkernel_suffix(ck_cmdline, crash_size, crash_base, suffix); /* * if the commandline contains a ':', then that's the extended * syntax -- if not, it must be the classic syntax */ first_colon = strchr(ck_cmdline, ':'); first_space = strchr(ck_cmdline, ' '); if (first_colon && (!first_space || first_colon < first_space)) return parse_crashkernel_mem(ck_cmdline, system_ram, crash_size, crash_base); return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); } /* * That function is the entry point for command line parsing and should be * called from the arch-specific code. */ int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, "crashkernel=", NULL); } int __init parse_crashkernel_high(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, "crashkernel=", suffix_tbl[SUFFIX_HIGH]); } int __init parse_crashkernel_low(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base) { return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, "crashkernel=", suffix_tbl[SUFFIX_LOW]); } static void update_vmcoreinfo_note(void) { u32 *buf = vmcoreinfo_note; if (!vmcoreinfo_size) return; buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, vmcoreinfo_size); final_note(buf); } void crash_save_vmcoreinfo(void) { vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); update_vmcoreinfo_note(); } void vmcoreinfo_append_str(const char *fmt, ...) { va_list args; char buf[0x50]; size_t r; va_start(args, fmt); r = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); vmcoreinfo_size += r; } /* * provide an empty default implementation here -- architecture * code may override this */ void __weak arch_crash_save_vmcoreinfo(void) {} unsigned long __weak paddr_vmcoreinfo_note(void) { return __pa((unsigned long)(char *)&vmcoreinfo_note); } static int __init crash_save_vmcoreinfo_init(void) { VMCOREINFO_OSRELEASE(init_uts_ns.name.release); VMCOREINFO_PAGESIZE(PAGE_SIZE); VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); #ifdef CONFIG_MMU VMCOREINFO_SYMBOL(swapper_pg_dir); #endif VMCOREINFO_SYMBOL(_stext); VMCOREINFO_SYMBOL(vmap_area_list); #ifndef CONFIG_NEED_MULTIPLE_NODES VMCOREINFO_SYMBOL(mem_map); VMCOREINFO_SYMBOL(contig_page_data); #endif #ifdef CONFIG_SPARSEMEM VMCOREINFO_SYMBOL(mem_section); VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); VMCOREINFO_STRUCT_SIZE(mem_section); VMCOREINFO_OFFSET(mem_section, section_mem_map); #endif VMCOREINFO_STRUCT_SIZE(page); VMCOREINFO_STRUCT_SIZE(pglist_data); VMCOREINFO_STRUCT_SIZE(zone); VMCOREINFO_STRUCT_SIZE(free_area); VMCOREINFO_STRUCT_SIZE(list_head); VMCOREINFO_SIZE(nodemask_t); VMCOREINFO_OFFSET(page, flags); VMCOREINFO_OFFSET(page, _count); VMCOREINFO_OFFSET(page, mapping); VMCOREINFO_OFFSET(page, lru); VMCOREINFO_OFFSET(page, _mapcount); VMCOREINFO_OFFSET(page, private); VMCOREINFO_OFFSET(pglist_data, node_zones); VMCOREINFO_OFFSET(pglist_data, nr_zones); #ifdef CONFIG_FLAT_NODE_MEM_MAP VMCOREINFO_OFFSET(pglist_data, node_mem_map); #endif VMCOREINFO_OFFSET(pglist_data, node_start_pfn); VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); VMCOREINFO_OFFSET(pglist_data, node_id); VMCOREINFO_OFFSET(zone, free_area); VMCOREINFO_OFFSET(zone, vm_stat); VMCOREINFO_OFFSET(zone, spanned_pages); VMCOREINFO_OFFSET(free_area, free_list); VMCOREINFO_OFFSET(list_head, next); VMCOREINFO_OFFSET(list_head, prev); VMCOREINFO_OFFSET(vmap_area, va_start); VMCOREINFO_OFFSET(vmap_area, list); VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); log_buf_kexec_setup(); VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); VMCOREINFO_NUMBER(NR_FREE_PAGES); VMCOREINFO_NUMBER(PG_lru); VMCOREINFO_NUMBER(PG_private); VMCOREINFO_NUMBER(PG_swapcache); VMCOREINFO_NUMBER(PG_slab); #ifdef CONFIG_MEMORY_FAILURE VMCOREINFO_NUMBER(PG_hwpoison); #endif VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); arch_crash_save_vmcoreinfo(); update_vmcoreinfo_note(); return 0; } subsys_initcall(crash_save_vmcoreinfo_init); /* * Move into place and start executing a preloaded standalone * executable. If nothing was preloaded return an error. */ int kernel_kexec(void) { int error = 0; if (!mutex_trylock(&kexec_mutex)) return -EBUSY; if (!kexec_image) { error = -EINVAL; goto Unlock; } #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { lock_system_sleep(); pm_prepare_console(); error = freeze_processes(); if (error) { error = -EBUSY; goto Restore_console; } suspend_console(); error = dpm_suspend_start(PMSG_FREEZE); if (error) goto Resume_console; /* At this point, dpm_suspend_start() has been called, * but *not* dpm_suspend_end(). We *must* call * dpm_suspend_end() now. Otherwise, drivers for * some devices (e.g. interrupt controllers) become * desynchronized with the actual state of the * hardware at resume time, and evil weirdness ensues. */ error = dpm_suspend_end(PMSG_FREEZE); if (error) goto Resume_devices; error = disable_nonboot_cpus(); if (error) goto Enable_cpus; local_irq_disable(); error = syscore_suspend(); if (error) goto Enable_irqs; } else #endif { kexec_in_progress = true; kernel_restart_prepare(NULL); migrate_to_reboot_cpu(); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); } machine_kexec(kexec_image); #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { syscore_resume(); Enable_irqs: local_irq_enable(); Enable_cpus: enable_nonboot_cpus(); dpm_resume_start(PMSG_RESTORE); Resume_devices: dpm_resume_end(PMSG_RESTORE); Resume_console: resume_console(); thaw_processes(); Restore_console: pm_restore_console(); unlock_system_sleep(); } #endif Unlock: mutex_unlock(&kexec_mutex); return error; }