diff options
author | Austin S. Hemmelgarn <austin@netdata.cloud> | 2022-03-11 11:22:37 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-11 18:22:37 +0200 |
commit | 8c8350371300d8fcafd794f1697bf22b83214120 (patch) | |
tree | ac8ec374e674aa67f73b023abae69f0d23509757 | |
parent | 5fdcd854ec809079a59173d1d13ea28eb7fb92e6 (diff) |
Change default OOM score and scheduling policy to behave more sanely. (#12271)
-rw-r--r-- | daemon/README.md | 59 | ||||
-rw-r--r-- | daemon/daemon.c | 14 | ||||
-rw-r--r-- | system/netdata.service.in | 10 | ||||
-rw-r--r-- | system/netdata.service.v235.in | 10 |
4 files changed, 19 insertions, 74 deletions
diff --git a/daemon/README.md b/daemon/README.md index 1557ca0c87..44abfa8e94 100644 --- a/daemon/README.md +++ b/daemon/README.md @@ -254,57 +254,14 @@ where: See [debugging](#debugging). -## OOM Score - -Netdata runs with `OOMScore = 1000`. This means Netdata will be the first to be killed when your server runs out of -memory. - -You can set Netdata OOMScore in `netdata.conf`, like this: - -```conf -[global] - OOM score = 1000 -``` - -Netdata logs its OOM score when it starts: - -```sh -# grep OOM /var/log/netdata/error.log -2017-10-15 03:47:31: netdata INFO : Adjusted my Out-Of-Memory (OOM) score from 0 to 1000. -``` - -### OOM score and systemd - -Netdata will not be able to lower its OOM Score below zero, when it is started as the `netdata` user (systemd case). - -To allow Netdata control its OOM Score in such cases, you will need to edit `netdata.service` and set: - -```sh -[Service] -# The minimum Netdata Out-Of-Memory (OOM) score. -# Netdata (via [global].OOM score in netdata.conf) can only increase the value set here. -# To decrease it, set the minimum here and set the same or a higher value in netdata.conf. -# Valid values: -1000 (never kill netdata) to 1000 (always kill netdata). -OOMScoreAdjust=-1000 -``` - -Run `systemctl daemon-reload` to reload these changes. - -The above, sets and OOMScore for Netdata to `-1000`, so that Netdata can increase it via `netdata.conf`. - -If you want to control it entirely via systemd, you can set in `netdata.conf`: - -```conf -[global] - OOM score = keep -``` - -Using the above, whatever OOM Score you have set at `netdata.service` will be maintained by netdata. - ## Netdata process scheduling policy -By default Netdata runs with the `idle` process scheduling policy, so that it uses CPU resources, only when there is -idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on the charts. +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. You can set Netdata scheduling policy in `netdata.conf`, like this: @@ -315,9 +272,9 @@ You can set Netdata scheduling policy in `netdata.conf`, like this: You can use the following: -| policy | description | +| policy | description | | :-----------------------: | :---------- | -| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | | `other`<br/>or<br/>`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | | `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | | `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | diff --git a/daemon/daemon.c b/daemon/daemon.c index 391c7a7596..2b8a6552b6 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -177,7 +177,7 @@ int become_user(const char *username, int pid_fd) { static void oom_score_adj(void) { char buf[30 + 1]; - long long int old_score, wanted_score = OOM_SCORE_ADJ_MAX, final_score = 0; + long long int old_score, wanted_score = 0, final_score = 0; // read the existing score if(read_single_signed_number_file("/proc/self/oom_score_adj", &old_score)) { @@ -275,8 +275,8 @@ struct sched_def { // the available members are important too! // these are all the possible scheduling policies supported by netdata -#ifdef SCHED_IDLE - { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#ifdef SCHED_BATCH + { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, #endif #ifdef SCHED_OTHER @@ -284,6 +284,10 @@ struct sched_def { { "nice", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, #endif +#ifdef SCHED_IDLE + { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#endif + #ifdef SCHED_RR { "rr", SCHED_RR, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, #endif @@ -292,10 +296,6 @@ struct sched_def { { "fifo", SCHED_FIFO, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, #endif -#ifdef SCHED_BATCH - { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, -#endif - // do not change the scheduling priority { "keep", 0, 0, SCHED_FLAG_KEEP_AS_IS }, { "none", 0, 0, SCHED_FLAG_KEEP_AS_IS }, diff --git a/system/netdata.service.in b/system/netdata.service.in index a34ea61dd0..1683a0a091 100644 --- a/system/netdata.service.in +++ b/system/netdata.service.in @@ -26,22 +26,16 @@ TimeoutStopSec=150 Restart=on-failure RestartSec=30 -# The minimum netdata Out-Of-Memory (OOM) score. -# netdata (via [global].OOM score in netdata.conf) can only increase the value set here. -# To decrease it, set the minimum here and set the same or a higher value in netdata.conf. -# Valid values: -1000 (never kill netdata) to 1000 (always kill netdata). -OOMScoreAdjust=1000 - # Valid policies: other (the system default) | batch | idle | fifo | rr # To give netdata the max priority, set CPUSchedulingPolicy=rr and CPUSchedulingPriority=99 -CPUSchedulingPolicy=idle +CPUSchedulingPolicy=batch # This sets the scheduling priority (for policies: rr and fifo). # Priority gets values 1 (lowest) to 99 (highest). #CPUSchedulingPriority=1 # For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata (-20 highest to 19 lowest). -#Nice=0 +Nice=0 # Capabilities # is required for freeipmi and slabinfo plugins diff --git a/system/netdata.service.v235.in b/system/netdata.service.v235.in index 664c583f3a..dfdf88b693 100644 --- a/system/netdata.service.v235.in +++ b/system/netdata.service.v235.in @@ -27,22 +27,16 @@ TimeoutStopSec=150 Restart=on-failure RestartSec=30 -# The minimum netdata Out-Of-Memory (OOM) score. -# netdata (via [global].OOM score in netdata.conf) can only increase the value set here. -# To decrease it, set the minimum here and set the same or a higher value in netdata.conf. -# Valid values: -1000 (never kill netdata) to 1000 (always kill netdata). -OOMScoreAdjust=1000 - # Valid policies: other (the system default) | batch | idle | fifo | rr # To give netdata the max priority, set CPUSchedulingPolicy=rr and CPUSchedulingPriority=99 -CPUSchedulingPolicy=idle +CPUSchedulingPolicy=batch # This sets the scheduling priority (for policies: rr and fifo). # Priority gets values 1 (lowest) to 99 (highest). #CPUSchedulingPriority=1 # For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata (-20 highest to 19 lowest). -#Nice=0 +Nice=0 [Install] WantedBy=multi-user.target |