summaryrefslogtreecommitdiffstats
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss28
-rw-r--r--Documentation/cgroups/cgroups.txt11
-rw-r--r--Documentation/debugging-via-ohci1394.txt8
-rw-r--r--Documentation/feature-removal-schedule.txt30
-rw-r--r--Documentation/filesystems/ext3.txt16
-rw-r--r--Documentation/hwmon/ltc42157
-rw-r--r--Documentation/hwmon/ltc42457
-rw-r--r--Documentation/i2c/instantiating-devices2
-rw-r--r--Documentation/infiniband/user_mad.txt4
-rw-r--r--Documentation/infiniband/user_verbs.txt2
-rw-r--r--Documentation/isdn/INTERFACE.CAPI83
-rw-r--r--Documentation/kernel-parameters.txt1
-rw-r--r--Documentation/misc-devices/eeprom (renamed from Documentation/i2c/chips/eeprom)0
-rw-r--r--Documentation/misc-devices/max6875 (renamed from Documentation/i2c/chips/max6875)6
-rw-r--r--Documentation/networking/pktgen.txt8
-rw-r--r--Documentation/scsi/hptiop.txt21
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt1
-rw-r--r--Documentation/vm/ksm.txt13
-rw-r--r--Documentation/vm/page-types.c304
-rw-r--r--Documentation/vm/pagemap.txt8
-rw-r--r--Documentation/w1/masters/ds24826
21 files changed, 424 insertions, 142 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index 0a92a7c93a62..4f29e5f1ebfa 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -31,3 +31,31 @@ Date: March 2009
Kernel Version: 2.6.30
Contact: iss_storagedev@hp.com
Description: A symbolic link to /sys/block/cciss!cXdY
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/rescan
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Kicks of a rescan of the controller to discover logical
+ drive topology changes.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Displays the 8-byte LUN ID used to address logical
+ drive Y of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Displays the RAID level of logical drive Y of
+ controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Displays the usage count (number of opens) of logical drive Y
+ of controller X.
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 455d4e6d346d..0b33bfe7dde9 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system.
Each cgroup is represented by a directory in the cgroup file system
containing the following files describing that cgroup:
- - tasks: list of tasks (by pid) attached to that cgroup
+ - tasks: list of tasks (by pid) attached to that cgroup. This list
+ is not guaranteed to be sorted. Writing a thread id into this file
+ moves the thread into this cgroup.
+ - cgroup.procs: list of tgids in the cgroup. This list is not
+ guaranteed to be sorted or free of duplicate tgids, and userspace
+ should sort/uniquify the list if this property is required.
+ Writing a tgid into this file moves all threads with that tgid into
+ this cgroup.
- notify_on_release flag: run the release agent on exit?
- release_agent: the path to use for release notifications (this file
exists in the top cgroup only)
@@ -374,7 +381,7 @@ Now you want to do something with this cgroup.
In this directory you can find several files:
# ls
-notify_on_release tasks
+cgroup.procs notify_on_release tasks
(plus whatever files added by the attached subsystems)
Now attach your shell to this cgroup:
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
index 59a91e5c6909..611f5a5499b1 100644
--- a/Documentation/debugging-via-ohci1394.txt
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -64,14 +64,14 @@ be used to view the printk buffer of a remote machine, even with live update.
Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
from 32-bit firescope and vice versa:
-- ftp://ftp.suse.de/private/bk/firewire/tools/firescope-0.2.2.tar.bz2
+- http://halobates.de/firewire/firescope-0.2.2.tar.bz2
and he implemented fast system dump (alpha version - read README.txt):
-- ftp://ftp.suse.de/private/bk/firewire/tools/firedump-0.1.tar.bz2
+- http://halobates.de/firewire/firedump-0.1.tar.bz2
There is also a gdb proxy for firewire which allows to use gdb to access
data which can be referenced from symbols found by gdb in vmlinux:
-- ftp://ftp.suse.de/private/bk/firewire/tools/fireproxy-0.33.tar.bz2
+- http://halobates.de/firewire/fireproxy-0.33.tar.bz2
The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
yet stable) with kgdb over an memory-based communication module (kgdbom).
@@ -178,7 +178,7 @@ Step-by-step instructions for using firescope with early OHCI initialization:
Notes
-----
-Documentation and specifications: ftp://ftp.suse.de/private/bk/firewire/docs
+Documentation and specifications: http://halobates.de/firewire/
FireWire is a trademark of Apple Inc. - for more information please refer to:
http://en.wikipedia.org/wiki/FireWire
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 89a47b5aff07..04e6c819b28a 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -451,3 +451,33 @@ Why: OSS sound_core grabs all legacy minors (0-255) of SOUND_MAJOR
will also allow making ALSA OSS emulation independent of
sound_core. The dependency will be broken then too.
Who: Tejun Heo <tj@kernel.org>
+
+----------------------------
+
+What: Support for VMware's guest paravirtuliazation technique [VMI] will be
+ dropped.
+When: 2.6.37 or earlier.
+Why: With the recent innovations in CPU hardware acceleration technologies
+ from Intel and AMD, VMware ran a few experiments to compare these
+ techniques to guest paravirtualization technique on VMware's platform.
+ These hardware assisted virtualization techniques have outperformed the
+ performance benefits provided by VMI in most of the workloads. VMware
+ expects that these hardware features will be ubiquitous in a couple of
+ years, as a result, VMware has started a phased retirement of this
+ feature from the hypervisor. We will be removing this feature from the
+ Kernel too. Right now we are targeting 2.6.37 but can retire earlier if
+ technical reasons (read opportunity to remove major chunk of pvops)
+ arise.
+
+ Please note that VMI has always been an optimization and non-VMI kernels
+ still work fine on VMware's platform.
+ Latest versions of VMware's product which support VMI are,
+ Workstation 7.0 and VSphere 4.0 on ESX side, future maintainence
+ releases for these products will continue supporting VMI.
+
+ For more details about VMI retirement take a look at this,
+ http://blogs.vmware.com/guestosguide/2009/09/vmi-retirement.html
+
+Who: Alok N Kataria <akataria@vmware.com>
+
+----------------------------
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 570f9bd9be2b..05d5cf1d743f 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks.
sb=n Use alternate superblock at this location.
-quota
-noquota
-grpquota
-usrquota
+quota These options are ignored by the filesystem. They
+noquota are used only by quota tools to recognize volumes
+grpquota where quota should be turned on. See documentation
+usrquota in the quota-tools package for more details
+ (http://sourceforge.net/projects/linuxquota).
+
+jqfmt=<quota type> These options tell filesystem details about quota
+usrjquota=<file> so that quota information can be properly updated
+grpjquota=<file> during journal replay. They replace the above
+ quota options. See documentation in the quota-tools
+ package for more details
+ (http://sourceforge.net/projects/linuxquota).
bh (*) ext3 associates buffer heads to data pages to
nobh (a) cache disk block mapping information
diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215
index 2e6a21eb656c..c196a1846259 100644
--- a/Documentation/hwmon/ltc4215
+++ b/Documentation/hwmon/ltc4215
@@ -22,12 +22,13 @@ Usage Notes
-----------
This driver does not probe for LTC4215 devices, due to the fact that some
-of the possible addresses are unfriendly to probing. You will need to use
-the "force" parameter to tell the driver where to find the device.
+of the possible addresses are unfriendly to probing. You will have to
+instantiate the devices explicitly.
Example: the following will load the driver for an LTC4215 at address 0x44
on I2C bus #0:
-$ modprobe ltc4215 force=0,0x44
+$ modprobe ltc4215
+$ echo ltc4215 0x44 > /sys/bus/i2c/devices/i2c-0/new_device
Sysfs entries
diff --git a/Documentation/hwmon/ltc4245 b/Documentation/hwmon/ltc4245
index bae7a3adc5d8..02838a47d862 100644
--- a/Documentation/hwmon/ltc4245
+++ b/Documentation/hwmon/ltc4245
@@ -23,12 +23,13 @@ Usage Notes
-----------
This driver does not probe for LTC4245 devices, due to the fact that some
-of the possible addresses are unfriendly to probing. You will need to use
-the "force" parameter to tell the driver where to find the device.
+of the possible addresses are unfriendly to probing. You will have to
+instantiate the devices explicitly.
Example: the following will load the driver for an LTC4245 at address 0x23
on I2C bus #1:
-$ modprobe ltc4245 force=1,0x23
+$ modprobe ltc4245
+$ echo ltc4245 0x23 > /sys/bus/i2c/devices/i2c-1/new_device
Sysfs entries
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
index c740b7b41088..e89490270aba 100644
--- a/Documentation/i2c/instantiating-devices
+++ b/Documentation/i2c/instantiating-devices
@@ -188,7 +188,7 @@ segment, the address is sufficient to uniquely identify the device to be
deleted.
Example:
-# echo eeprom 0x50 > /sys/class/i2c-adapter/i2c-3/new_device
+# echo eeprom 0x50 > /sys/bus/i2c/devices/i2c-3/new_device
While this interface should only be used when in-kernel device declaration
can't be done, there is a variety of cases where it can be helpful:
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 744687dd195b..8a366959f5cc 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -128,8 +128,8 @@ Setting IsSM Capability Bit
To create the appropriate character device files automatically with
udev, a rule like
- KERNEL="umad*", NAME="infiniband/%k"
- KERNEL="issm*", NAME="infiniband/%k"
+ KERNEL=="umad*", NAME="infiniband/%k"
+ KERNEL=="issm*", NAME="infiniband/%k"
can be used. This will create device nodes named
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt
index f847501e50b5..afe3f8da9018 100644
--- a/Documentation/infiniband/user_verbs.txt
+++ b/Documentation/infiniband/user_verbs.txt
@@ -58,7 +58,7 @@ Memory pinning
To create the appropriate character device files automatically with
udev, a rule like
- KERNEL="uverbs*", NAME="infiniband/%k"
+ KERNEL=="uverbs*", NAME="infiniband/%k"
can be used. This will create device nodes named
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 686e107923ec..5fe8de5cc727 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -60,10 +60,9 @@ open() operation on regular files or character devices.
After a successful return from register_appl(), CAPI messages from the
application may be passed to the driver for the device via calls to the
-send_message() callback function. The CAPI message to send is stored in the
-data portion of an skb. Conversely, the driver may call Kernel CAPI's
-capi_ctr_handle_message() function to pass a received CAPI message to Kernel
-CAPI for forwarding to an application, specifying its ApplID.
+send_message() callback function. Conversely, the driver may call Kernel
+CAPI's capi_ctr_handle_message() function to pass a received CAPI message to
+Kernel CAPI for forwarding to an application, specifying its ApplID.
Deregistration requests (CAPI operation CAPI_RELEASE) from applications are
forwarded as calls to the release_appl() callback function, passing the same
@@ -142,6 +141,7 @@ u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)
to accepting or queueing the message. Errors occurring during the
actual processing of the message should be signaled with an
appropriate reply message.
+ May be called in process or interrupt context.
Calls to this function are not serialized by Kernel CAPI, ie. it must
be prepared to be re-entered.
@@ -154,7 +154,8 @@ read_proc_t *ctr_read_proc
system entry, /proc/capi/controllers/<n>; will be called with a
pointer to the device's capi_ctr structure as the last (data) argument
-Note: Callback functions are never called in interrupt context.
+Note: Callback functions except send_message() are never called in interrupt
+context.
- to be filled in before calling capi_ctr_ready():
@@ -171,14 +172,40 @@ u8 serial[CAPI_SERIAL_LEN]
value to return for CAPI_GET_SERIAL
-4.3 The _cmsg Structure
+4.3 SKBs
+
+CAPI messages are passed between Kernel CAPI and the driver via send_message()
+and capi_ctr_handle_message(), stored in the data portion of a socket buffer
+(skb). Each skb contains a single CAPI message coded according to the CAPI 2.0
+standard.
+
+For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual
+payload data immediately follows the CAPI message itself within the same skb.
+The Data and Data64 parameters are not used for processing. The Data64
+parameter may be omitted by setting the length field of the CAPI message to 22
+instead of 30.
+
+
+4.4 The _cmsg Structure
(declared in <linux/isdn/capiutil.h>)
The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
-accessible form. It contains members for all possible CAPI 2.0 parameters, of
-which only those appearing in the message type currently being processed are
-actually used. Unused members should be set to zero.
+accessible form. It contains members for all possible CAPI 2.0 parameters,
+including subparameters of the Additional Info and B Protocol structured
+parameters, with the following exceptions:
+
+* second Calling party number (CONNECT_IND)
+
+* Data64 (DATA_B3_REQ and DATA_B3_IND)
+
+* Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ)
+
+* Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP
+ and SELECT_B_PROTOCOL_REQ)
+
+Only those parameters appearing in the message type currently being processed
+are actually used. Unused members should be set to zero.
Members are named after the CAPI 2.0 standard names of the parameters they
represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
@@ -190,18 +217,19 @@ u16 for CAPI parameters of type 'word'
u32 for CAPI parameters of type 'dword'
-_cstruct for CAPI parameters of type 'struct' not containing any
- variably-sized (struct) subparameters (eg. 'Called Party Number')
+_cstruct for CAPI parameters of type 'struct'
The member is a pointer to a buffer containing the parameter in
CAPI encoding (length + content). It may also be NULL, which will
be taken to represent an empty (zero length) parameter.
+ Subparameters are stored in encoded form within the content part.
-_cmstruct for CAPI parameters of type 'struct' containing 'struct'
- subparameters ('Additional Info' and 'B Protocol')
+_cmstruct alternative representation for CAPI parameters of type 'struct'
+ (used only for the 'Additional Info' and 'B Protocol' parameters)
The representation is a single byte containing one of the values:
- CAPI_DEFAULT: the parameter is empty
- CAPI_COMPOSE: the values of the subparameters are stored
- individually in the corresponding _cmsg structure members
+ CAPI_DEFAULT: The parameter is empty/absent.
+ CAPI_COMPOSE: The parameter is present.
+ Subparameter values are stored individually in the corresponding
+ _cmsg structure members.
Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
messages between their transport encoding described in the CAPI 2.0 standard
@@ -297,3 +325,26 @@ char *capi_cmd2str(u8 Command, u8 Subcommand)
be NULL if the command/subcommand is not one of those defined in the
CAPI 2.0 standard.
+
+7. Debugging
+
+The module kernelcapi has a module parameter showcapimsgs controlling some
+debugging output produced by the module. It can only be set when the module is
+loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on
+the command line or in the configuration file.
+
+If the lowest bit of showcapimsgs is set, kernelcapi logs controller and
+application up and down events.
+
+In addition, every registered CAPI controller has an associated traceflag
+parameter controlling how CAPI messages sent from and to tha controller are
+logged. The traceflag parameter is initialized with the value of the
+showcapimsgs parameter when the controller is registered, but can later be
+changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE.
+
+If the value of traceflag is non-zero, CAPI messages are logged.
+DATA_B3 messages are only logged if the value of traceflag is > 2.
+
+If the lowest bit of traceflag is set, only the command/subcommand and message
+length are logged. Otherwise, kernelcapi logs a readable representation of
+the entire message.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1dc4b9cc20e5..c8d1b2be28ef 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -671,6 +671,7 @@ and is between 256 and 4096 characters. It is defined in the file
earlyprintk= [X86,SH,BLACKFIN]
earlyprintk=vga
earlyprintk=serial[,ttySn[,baudrate]]
+ earlyprintk=ttySn[,baudrate]
earlyprintk=dbgp[debugController#]
Append ",keep" to not disable it when the real console
diff --git a/Documentation/i2c/chips/eeprom b/Documentation/misc-devices/eeprom
index f7e8104b5764..f7e8104b5764 100644
--- a/Documentation/i2c/chips/eeprom
+++ b/Documentation/misc-devices/eeprom
diff --git a/Documentation/i2c/chips/max6875 b/Documentation/misc-devices/max6875
index 10ca43cd1a72..1e89ee3ccc1b 100644
--- a/Documentation/i2c/chips/max6875
+++ b/Documentation/misc-devices/max6875
@@ -42,10 +42,12 @@ General Remarks
Valid addresses for the MAX6875 are 0x50 and 0x52.
Valid addresses for the MAX6874 are 0x50, 0x52, 0x54 and 0x56.
-The driver does not probe any address, so you must force the address.
+The driver does not probe any address, so you explicitly instantiate the
+devices.
Example:
-$ modprobe max6875 force=0,0x50
+$ modprobe max6875
+$ echo max6875 0x50 > /sys/bus/i2c/devices/i2c-0/new_device
The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple
addresses. For example, for address 0x50, it also reserves 0x51.
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index c6cf4a3c16e0..61bb645d50e0 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -90,6 +90,11 @@ Examples:
pgset "dstmac 00:00:00:00:00:00" sets MAC destination address
pgset "srcmac 00:00:00:00:00:00" sets MAC source address
+ pgset "queue_map_min 0" Sets the min value of tx queue interval
+ pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
+ To select queue 1 of a given device,
+ use queue_map_min=1 and queue_map_max=1
+
pgset "src_mac_count 1" Sets the number of MACs we'll range through.
The 'minimum' MAC is what you set with srcmac.
@@ -101,6 +106,9 @@ Examples:
IPDST_RND, UDPSRC_RND,
UDPDST_RND, MACSRC_RND, MACDST_RND
MPLS_RND, VID_RND, SVID_RND
+ QUEUE_MAP_RND # queue map random
+ QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
+
pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
cycle through the port range.
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt
index a6eb4add1be6..9605179711f4 100644
--- a/Documentation/scsi/hptiop.txt
+++ b/Documentation/scsi/hptiop.txt
@@ -3,6 +3,25 @@ HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop)
Controller Register Map
-------------------------
+For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2:
+
+ BAR0 offset Register
+ 0x11C5C Link Interface IRQ Set
+ 0x11C60 Link Interface IRQ Clear
+
+ BAR2 offset Register
+ 0x10 Inbound Message Register 0
+ 0x14 Inbound Message Register 1
+ 0x18 Outbound Message Register 0
+ 0x1C Outbound Message Register 1
+ 0x20 Inbound Doorbell Register
+ 0x24 Inbound Interrupt Status Register
+ 0x28 Inbound Interrupt Mask Register
+ 0x30 Outbound Interrupt Status Register
+ 0x34 Outbound Interrupt Mask Register
+ 0x40 Inbound Queue Port
+ 0x44 Outbound Queue Port
+
For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0:
BAR0 offset Register
@@ -93,7 +112,7 @@ The driver exposes following sysfs attributes:
-----------------------------------------------------------------------------
-Copyright (C) 2006-2007 HighPoint Technologies, Inc. All Rights Reserved.
+Copyright (C) 2006-2009 HighPoint Technologies, Inc. All Rights Reserved.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 75fddb40f416..4c7f9aee5c4e 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -359,6 +359,7 @@ STAC9227/9228/9229/927x
5stack-no-fp D965 5stack without front panel
dell-3stack Dell Dimension E520
dell-bios Fixes with Dell BIOS setup
+ volknob Fixes with volume-knob widget 0x24
auto BIOS setup (default)
STAC92HD71B*
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index 72a22f65960e..262d8e6793a3 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -52,15 +52,15 @@ The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
readable by all but writable only by root:
max_kernel_pages - set to maximum number of kernel pages that KSM may use
- e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages"
+ e.g. "echo 100000 > /sys/kernel/mm/ksm/max_kernel_pages"
Value 0 imposes no limit on the kernel pages KSM may use;
but note that any process using MADV_MERGEABLE can cause
KSM to allocate these pages, unswappable until it exits.
- Default: 2000 (chosen for demonstration purposes)
+ Default: quarter of memory (chosen to not pin too much)
pages_to_scan - how many present pages to scan before ksmd goes to sleep
- e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan"
- Default: 200 (chosen for demonstration purposes)
+ e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan"
+ Default: 100 (chosen for demonstration purposes)
sleep_millisecs - how many milliseconds ksmd should sleep before next scan
e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
@@ -70,7 +70,8 @@ run - set 0 to stop ksmd from running but keep merged pages,
set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
set 2 to stop ksmd and unmerge all pages currently merged,
but leave mergeable areas registered for next run
- Default: 1 (for immediate use by apps which register)
+ Default: 0 (must be changed to 1 to activate KSM,
+ except if CONFIG_SYSFS is disabled)
The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
@@ -86,4 +87,4 @@ pages_volatile embraces several different kinds of activity, but a high
proportion there would also indicate poor use of madvise MADV_MERGEABLE.
Izik Eidus,
-Hugh Dickins, 30 July 2009
+Hugh Dickins, 24 Sept 2009
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index fa1a30d9e9d5..3ec4f2a22585 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -2,7 +2,10 @@
* page-types: Tool for querying page flags
*
* Copyright (C) 2009 Intel corporation
- * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
+ *
+ * Authors: Wu Fengguang <fengguang.wu@intel.com>
+ *
+ * Released under the General Public License (GPL).
*/
#define _LARGEFILE64_SOURCE
@@ -69,7 +72,9 @@
#define KPF_COMPOUND_TAIL 16
#define KPF_HUGE 17
#define KPF_UNEVICTABLE 18
+#define KPF_HWPOISON 19
#define KPF_NOPAGE 20
+#define KPF_KSM 21
/* [32-] kernel hacking assistances */
#define KPF_RESERVED 32
@@ -116,7 +121,9 @@ static char *page_flag_names[] = {
[KPF_COMPOUND_TAIL] = "T:compound_tail",
[KPF_HUGE] = "G:huge",
[KPF_UNEVICTABLE] = "u:unevictable",
+ [KPF_HWPOISON] = "X:hwpoison",
[KPF_NOPAGE] = "n:nopage",
+ [KPF_KSM] = "x:ksm",
[KPF_RESERVED] = "r:reserved",
[KPF_MLOCKED] = "m:mlocked",
@@ -152,9 +159,6 @@ static unsigned long opt_size[MAX_ADDR_RANGES];
static int nr_vmas;
static unsigned long pg_start[MAX_VMAS];
static unsigned long pg_end[MAX_VMAS];
-static unsigned long voffset;
-
-static int pagemap_fd;
#define MAX_BIT_FILTERS 64
static int nr_bit_filters;
@@ -163,9 +167,16 @@ static uint64_t opt_bits[MAX_BIT_FILTERS];
static int page_size;
-#define PAGES_BATCH (64 << 10) /* 64k pages */
+static int pagemap_fd;
static int kpageflags_fd;
+static int opt_hwpoison;
+static int opt_unpoison;
+
+static char *hwpoison_debug_fs = "/debug/hwpoison";
+static int hwpoison_inject_fd;
+static int hwpoison_forget_fd;
+
#define HASH_SHIFT 13
#define HASH_SIZE (1 << HASH_SHIFT)
#define HASH_MASK (HASH_SIZE - 1)
@@ -207,6 +218,74 @@ static void fatal(const char *x, ...)
exit(EXIT_FAILURE);
}
+int checked_open(const char *pathname, int flags)
+{
+ int fd = open(pathname, flags);
+
+ if (fd < 0) {
+ perror(pathname);
+ exit(EXIT_FAILURE);
+ }
+
+ return fd;
+}
+
+/*
+ * pagemap/kpageflags routines
+ */
+
+static unsigned long do_u64_read(int fd, char *name,
+ uint64_t *buf,
+ unsigned long index,
+ unsigned long count)
+{
+ long bytes;
+
+ if (index > ULONG_MAX / 8)
+ fatal("index overflow: %lu\n", index);
+
+ if (lseek(fd, index * 8, SEEK_SET) < 0) {
+ perror(name);
+ exit(EXIT_FAILURE);
+ }
+
+ bytes = read(fd, buf, count * 8);
+ if (bytes < 0) {
+ perror(name);
+ exit(EXIT_FAILURE);
+ }
+ if (bytes % 8)
+ fatal("partial read: %lu bytes\n", bytes);
+
+ return bytes / 8;
+}
+
+static unsigned long kpageflags_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
+}
+
+static unsigned long pagemap_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
+}
+
+static unsigned long pagemap_pfn(uint64_t val)
+{
+ unsigned long pfn;
+
+ if (val & PM_PRESENT)
+ pfn = PM_PFRAME(val);
+ else
+ pfn = 0;
+
+ return pfn;
+}
+
/*
* page flag names
@@ -255,7 +334,8 @@ static char *page_flag_longname(uint64_t flags)
* page list and summary
*/
-static void show_page_range(unsigned long offset, uint64_t flags)
+static void show_page_range(unsigned long voffset,
+ unsigned long offset, uint64_t flags)
{
static uint64_t flags0;
static unsigned long voff;
@@ -281,7 +361,8 @@ static void show_page_range(unsigned long offset, uint64_t flags)
count = 1;
}
-static void show_page(unsigned long offset, uint64_t flags)
+static void show_page(unsigned long voffset,
+ unsigned long offset, uint64_t flags)
{
if (opt_pid)
printf("%lx\t", voffset);
@@ -362,6 +443,62 @@ static uint64_t well_known_flags(uint64_t flags)
return flags;
}
+static uint64_t kpageflags_flags(uint64_t flags)
+{
+ flags = expand_overloaded_flags(flags);
+
+ if (!opt_raw)
+ flags = well_known_flags(flags);
+
+ return flags;
+}
+
+/*
+ * page actions
+ */
+
+static void prepare_hwpoison_fd(void)
+{
+ char buf[100];
+
+ if (opt_hwpoison && !hwpoison_inject_fd) {
+ sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs);
+ hwpoison_inject_fd = checked_open(buf, O_WRONLY);
+ }
+
+ if (opt_unpoison && !hwpoison_forget_fd) {
+ sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs);
+ hwpoison_forget_fd = checked_open(buf, O_WRONLY);
+ }
+}
+
+static int hwpoison_page(unsigned long offset)
+{
+ char buf[100];
+ int len;
+
+ len = sprintf(buf, "0x%lx\n", offset);
+ len = write(hwpoison_inject_fd, buf, len);
+ if (len < 0) {
+ perror("hwpoison inject");
+ return len;
+ }
+ return 0;
+}
+
+static int unpoison_page(unsigned long offset)
+{
+ char buf[100];
+ int len;
+
+ len = sprintf(buf, "0x%lx\n", offset);
+ len = write(hwpoison_forget_fd, buf, len);
+ if (len < 0) {
+ perror("hwpoison forget");
+ return len;
+ }
+ return 0;
+}
/*
* page frame walker
@@ -394,104 +531,83 @@ static int hash_slot(uint64_t flags)
exit(EXIT_FAILURE);
}
-static void add_page(unsigned long offset, uint64_t flags)
+static void add_page(unsigned long voffset,
+ unsigned long offset, uint64_t flags)
{
- flags = expand_overloaded_flags(flags);
-
- if (!opt_raw)
- flags = well_known_flags(flags);
+ flags = kpageflags_flags(flags);
if (!bit_mask_ok(flags))
return;
+ if (opt_hwpoison)
+ hwpoison_page(offset);
+ if (opt_unpoison)
+ unpoison_page(offset);
+
if (opt_list == 1)
- show_page_range(offset, flags);
+ show_page_range(voffset, offset, flags);
else if (opt_list == 2)
- show_page(offset, flags);
+ show_page(voffset, offset, flags);
nr_pages[hash_slot(flags)]++;
total_pages++