diff options
-rwxr-xr-x | apps_plugin.c | 105 | ||||
-rwxr-xr-x | charts.d/squid.chart.sh | 8 | ||||
-rw-r--r-- | netdata.c | 102 | ||||
-rwxr-xr-x | plugins.d/charts.d.plugin | 54 |
4 files changed, 169 insertions, 100 deletions
diff --git a/apps_plugin.c b/apps_plugin.c index dbf256db05..d0b7f39118 100755 --- a/apps_plugin.c +++ b/apps_plugin.c @@ -869,12 +869,44 @@ void update_statistics(void) } } +unsigned long long usecdiff(struct timeval *now, struct timeval *last) { + return ((((now->tv_sec * 1000000ULL) + now->tv_usec) - ((last->tv_sec * 1000000ULL) + last->tv_usec))); +} void show_dimensions(void) { + static struct timeval last = { 0, 0 }; + static struct rusage me_last; + struct target *w; + struct timeval now; + struct rusage me; + + unsigned long long usec; + unsigned long long cpuuser; + unsigned long long cpusyst; + + if(!last.tv_sec) { + gettimeofday(&last, NULL); + getrusage(RUSAGE_SELF, &me_last); + + usec = update_every * 1000000ULL; + cpuuser = 0; + cpusyst = 0; + } + else { + gettimeofday(&now, NULL); + getrusage(RUSAGE_SELF, &me); + + usec = usecdiff(&now, &last); + cpuuser = me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec; + cpusyst = me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec; + + bcopy(&now, &last, sizeof(struct timeval)); + bcopy(&me, &me_last, sizeof(struct rusage)); + } - fprintf(stdout, "BEGIN apps.cpu\n"); + fprintf(stdout, "BEGIN apps.cpu %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -882,7 +914,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.cpu_user\n"); + fprintf(stdout, "BEGIN apps.cpu_user %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -890,7 +922,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.cpu_system\n"); + fprintf(stdout, "BEGIN apps.cpu_system %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -898,7 +930,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.threads\n"); + fprintf(stdout, "BEGIN apps.threads %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -906,7 +938,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.processes\n"); + fprintf(stdout, "BEGIN apps.processes %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -914,7 +946,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.mem\n"); + fprintf(stdout, "BEGIN apps.mem %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -922,7 +954,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.minor_faults\n"); + fprintf(stdout, "BEGIN apps.minor_faults %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -930,7 +962,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.major_faults\n"); + fprintf(stdout, "BEGIN apps.major_faults %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -938,7 +970,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.lreads\n"); + fprintf(stdout, "BEGIN apps.lreads %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -946,7 +978,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.lwrites\n"); + fprintf(stdout, "BEGIN apps.lwrites %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -954,7 +986,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.preads\n"); + fprintf(stdout, "BEGIN apps.preads %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -962,7 +994,7 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); - fprintf(stdout, "BEGIN apps.pwrites\n"); + fprintf(stdout, "BEGIN apps.pwrites %llu\n", usec); for (w = target_root; w ; w = w->next) { if(w->target || (!w->processes && !w->exposed)) continue; @@ -970,6 +1002,14 @@ void show_dimensions(void) } fprintf(stdout, "END\n"); + fprintf(stdout, "BEGIN netdata.apps_cpu %llu\n", usec); + fprintf(stdout, "SET user = %llu\n", cpuuser); + fprintf(stdout, "SET system = %llu\n", cpusyst); + fprintf(stdout, "END\n"); + fprintf(stdout, "BEGIN netdata.apps_files %llu\n", usec); + fprintf(stdout, "SET files = %llu\n", file_counter); + fprintf(stdout, "END\n"); + fflush(stdout); } @@ -1074,6 +1114,13 @@ void show_charts(void) fprintf(stdout, "DIMENSION %s '' incremental 1 1024\n", w->name); } + fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' netdata netdata stacked 10000 %d\n", update_every); + fprintf(stdout, "DIMENSION user '' incremental 1 1000\n"); + fprintf(stdout, "DIMENSION system '' incremental 1 1000\n"); + + fprintf(stdout, "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' netdata netdata line 10001 %d\n", update_every); + fprintf(stdout, "DIMENSION files '' incremental 1 1\n"); + fflush(stdout); } @@ -1130,10 +1177,6 @@ long get_pid_max(void) return mpid; } -unsigned long long usecdiff(struct timeval *now, struct timeval *last) { - return ((((now->tv_sec * 1000000ULL) + now->tv_usec) - ((last->tv_sec * 1000000ULL) + last->tv_usec))); -} - int main(int argc, char **argv) { Hertz = get_hertz(); @@ -1149,13 +1192,10 @@ int main(int argc, char **argv) exit(1); } - int created_usage_chart = 0; - struct rusage me, me_last; unsigned long long counter = 1; unsigned long long usec = 0, susec = 0; struct timeval last, now; gettimeofday(&last, NULL); - getrusage(RUSAGE_SELF, &me_last); for(;1; counter++) { if(!update_from_proc()) { @@ -1168,37 +1208,10 @@ int main(int argc, char **argv) show_charts(); // this is smart enough to show only newly added apps, when needed show_dimensions(); - if(getrusage(RUSAGE_SELF, &me) == 0) { - unsigned long long cpuuser = me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec; - unsigned long long cpusyst = me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec; - - if(!created_usage_chart) { - created_usage_chart = 1; - fprintf(stdout, "CHART netdata.apps_cpu '' 'Apps Plugin CPU' 'milliseconds/s' netdata netdata stacked 10000 %d\n", update_every); - fprintf(stdout, "DIMENSION user '' incremental 1 1000\n"); - fprintf(stdout, "DIMENSION system '' incremental 1 1000\n"); - - fprintf(stdout, "CHART netdata.apps_files '' 'Apps Plugin Files' 'files/s' netdata netdata line 10001 %d\n", update_every); - fprintf(stdout, "DIMENSION files '' incremental 1 1\n"); - } - - fprintf(stdout, "BEGIN netdata.apps_cpu\n"); - fprintf(stdout, "SET user = %llu\n", cpuuser); - fprintf(stdout, "SET system = %llu\n", cpusyst); - fprintf(stdout, "END\n"); - - fprintf(stdout, "BEGIN netdata.apps_files\n"); - fprintf(stdout, "SET files = %llu\n", file_counter); - fprintf(stdout, "END\n"); - - bcopy(&me, &me_last, sizeof(struct rusage)); - } - if(debug) fprintf(stderr, "Done Loop No %llu\n", counter); fflush(NULL); gettimeofday(&now, NULL); - usec = usecdiff(&now, &last) - susec; if(debug) fprintf(stderr, "last loop took %llu usec (worked for %llu, sleeped for %llu).\n", usec + susec, usec, susec); diff --git a/charts.d/squid.chart.sh b/charts.d/squid.chart.sh index 774187a821..a9f757f91d 100755 --- a/charts.d/squid.chart.sh +++ b/charts.d/squid.chart.sh @@ -49,24 +49,24 @@ squid_update() { # write the result of the work. cat <<VALUESEOF -BEGIN squid.clients_net +BEGIN squid.clients_net $1 SET client_http_kbytes_in = $client_http_kbytes_in SET client_http_kbytes_out = $client_http_kbytes_out SET client_http_hit_kbytes_out = $client_http_hit_kbytes_out END -BEGIN squid.clients_requests +BEGIN squid.clients_requests $1 SET client_http_requests = $client_http_requests SET client_http_hits = $client_http_hits SET client_http_errors = $client_http_errors END -BEGIN squid.servers_net +BEGIN squid.servers_net $1 SET server_all_kbytes_in = $server_all_kbytes_in SET server_all_kbytes_out = $server_all_kbytes_out END -BEGIN squid.servers_requests +BEGIN squid.servers_requests $1 SET server_all_requests = $server_all_requests SET server_all_errors = $server_all_errors END @@ -1039,6 +1039,7 @@ struct rrd_stats { pthread_mutex_t mutex; unsigned long counter; // the number of times we added values to this rrd + unsigned long counter_since_reload; // the number of times we added values to this rrd int mapped; // if set to 1, this is memory mapped unsigned long memsize; // how much mem we have allocated for this (without dimensions) @@ -1056,6 +1057,7 @@ struct rrd_stats { int update_every; // every how many seconds is this updated? unsigned long long first_entry_t; // the timestamp (in microseconds) of the oldest entry in the db struct timeval last_updated; // when this data set was last updated + struct timeval next_update; unsigned long long usec_since_last_update; total_number absolute_total; @@ -1226,6 +1228,12 @@ RRD_STATS *rrd_stats_create(const char *type, const char *id, const char *name, st->priority = config_get_number(st->id, "priority", priority); st->enabled = enabled; + st->debug = 0; + st->counter_since_reload = 0; + + // initialize the next update + if(!st->next_update.tv_sec) gettimeofday(&st->next_update, NULL); + pthread_mutex_init(&st->mutex, NULL); pthread_mutex_lock(&root_mutex); @@ -1477,7 +1485,7 @@ int rrd_stats_dimension_set(RRD_STATS *st, char *id, collected_number value) return 0; } -void rrd_stats_next(RRD_STATS *st) +void rrd_stats_next_usec(RRD_STATS *st, unsigned long long microseconds) { // lock it to work with the dimensions pthread_mutex_lock(&st->mutex); @@ -1495,16 +1503,48 @@ void rrd_stats_next(RRD_STATS *st) st->absolute_total = 0; st->current_entry = ((st->current_entry + 1) >= st->entries) ? 0 : st->current_entry + 1; + st->usec_since_last_update = microseconds; + if(!st->last_updated.tv_sec) gettimeofday(&st->next_update, NULL); + else { + unsigned long long usec = st->last_updated.tv_sec * 1000000ULL + st->last_updated.tv_usec; + usec += microseconds; + + st->next_update.tv_sec = usec / 1000000ULL; + st->next_update.tv_usec = usec % 1000000ULL; + + } + pthread_mutex_unlock(&st->mutex); return; } -unsigned long long rrd_stats_done(RRD_STATS *st) +void rrd_stats_next(RRD_STATS *st) +{ + if(st->last_updated.tv_sec) { + struct timeval now; + gettimeofday(&now, NULL); + unsigned long long microseconds = usecdiff(&now, &st->last_updated); + + unsigned long long min_microseconds = st->update_every * 1000000ULL / 2ULL; + if(microseconds < min_microseconds) { + debug(D_RRD_STATS, "Chart %s is being updated too early (after %llu, expected %llu microseconds). Assuming default.", st->name, microseconds, min_microseconds); + microseconds = st->update_every * 1000000ULL; + } + + rrd_stats_next_usec(st, microseconds); + } + else + rrd_stats_next_usec(st, st->update_every * 1000000ULL); +} + +void rrd_stats_next_plugins(RRD_STATS *st) { - struct timeval now; - gettimeofday(&now, NULL); + rrd_stats_next_usec(st, st->update_every * 1000000ULL); +} +unsigned long long rrd_stats_done(RRD_STATS *st) +{ pthread_mutex_lock(&st->mutex); RRD_DIMENSION *rd, *last; @@ -1549,9 +1589,6 @@ unsigned long long rrd_stats_done(RRD_STATS *st) // if this is the second+ value we collect if(st->counter) { - - st->usec_since_last_update = usecdiff(&now, &st->last_updated); - if(!st->usec_since_last_update) st->usec_since_last_update = 1; if(st->debug) debug(D_RRD_STATS, "microseconds since last update: %llu", st->usec_since_last_update); // x 10 @@ -1662,6 +1699,8 @@ unsigned long long rrd_stats_done(RRD_STATS *st) break; } + if(!st->counter_since_reload) rd->calculated_value = 0; + // store the calculated value rd->values[st->current_entry] = (storage_number) ( rd->calculated_value @@ -1685,7 +1724,7 @@ unsigned long long rrd_stats_done(RRD_STATS *st) if(!st->first_entry_t) { // this is the first entry in the database - st->first_entry_t = now.tv_sec * 1000000ULL + now.tv_usec; + st->first_entry_t = st->next_update.tv_sec * 1000000ULL + st->next_update.tv_usec; } else { if(st->counter > (unsigned long long)st->entries) { @@ -1697,11 +1736,11 @@ unsigned long long rrd_stats_done(RRD_STATS *st) // store the time difference to the last entry st->timediff[st->current_entry] = st->usec_since_last_update; } - else st->usec_since_last_update = st->update_every * 1000000ULL; - st->last_updated.tv_sec = now.tv_sec; - st->last_updated.tv_usec = now.tv_usec; + st->last_updated.tv_sec = st->next_update.tv_sec; + st->last_updated.tv_usec = st->next_update.tv_usec; st->counter++; + st->counter_since_reload++; pthread_mutex_unlock(&st->mutex); @@ -5558,11 +5597,8 @@ void *proc_main(void *ptr) usec = usecdiff(&now, &last) - susec; debug(D_PROCNETDEV_LOOP, "PROCNETDEV: last loop took %llu usec (worked for %llu, sleeped for %llu).", usec + susec, usec, susec); - if(usec < (update_every * 1000000ULL)) susec = (update_every * 1000000ULL) - usec; - else susec = 0; - - // make sure we will wait at least 100ms - if(susec < 100000) susec = 100000; + if(usec < (update_every * 1000000ULL / 2ULL)) susec = (update_every * 1000000ULL) - usec; + else susec = update_every * 1000000ULL / 2ULL; // -------------------------------------------------------------------- @@ -5718,7 +5754,7 @@ void tc_device_commit(struct tc_device *d) } } else { - rrd_stats_next(st); + rrd_stats_next_plugins(st); if(strcmp(d->id, d->name) != 0) rrd_stats_set_name(st, d->name); } @@ -5986,7 +6022,7 @@ void *cpuidlejitter_main(void *ptr) rrd_stats_dimension_add(st, "jitter", NULL, 1, 1, RRD_DIMENSION_ABSOLUTE); } - else rrd_stats_next(st); + else rrd_stats_next_usec(st, susec); rrd_stats_dimension_set(st, "jitter", usec); rrd_stats_done(st); @@ -6115,6 +6151,8 @@ void *pluginsd_worker_thread(void *arg) } else if(!strcmp(s, "BEGIN")) { char *id = qstrsep(&p); + char *microseconds_txt = qstrsep(&p); + if(!id) { error("PLUGINSD: '%s' is requesting a BEGIN without a chart id. Disabling it.", cd->fullfilename); cd->enabled = 0; @@ -6129,7 +6167,13 @@ void *pluginsd_worker_thread(void *arg) kill(cd->pid, SIGTERM); break; } - if(st->counter) rrd_stats_next(st); + + if(st->counter) { + unsigned long long microseconds = 0; + if(microseconds_txt && *microseconds_txt) microseconds = strtoull(microseconds_txt, NULL, 10); + if(microseconds) rrd_stats_next_usec(st, microseconds); + else rrd_stats_next_plugins(st); + } } else if(!strcmp(s, "END")) { if(!st) { @@ -6141,16 +6185,7 @@ void *pluginsd_worker_thread(void *arg) if(st->debug) debug(D_PLUGINSD, "PLUGINSD: '%s' is requesting a END on chart %s", cd->fullfilename, st->id); - unsigned long long usec_since_last_update = rrd_stats_done(st); - if((time(NULL) - cd->started_t) > 10) { - if(usec_since_last_update < (cd->update_every * 1000000ULL / 10)) { - error("PLUGINSD: '%s' (up for %lu secs) updates charts too frequently. Chart %s updated after %llu microseconds, expected %llu microseconds.", cd->fullfilename, time(NULL) - cd->started_t, st->id, usec_since_last_update, cd->update_every * 1000000ULL); - //cd->enabled = 0; - //kill(cd->pid, SIGTERM); - //break; - } - } - + rrd_stats_done(st); st = NULL; } else if(!strcmp(s, "FLUSH")) { @@ -6290,8 +6325,8 @@ void *pluginsd_worker_thread(void *arg) // second+ run usec = usecdiff(&now, &last) - susec; error("PLUGINSD: %s last loop took %llu usec (worked for %llu, sleeped for %llu).\n", cd->fullfilename, usec + susec, usec, susec); - if(usec < (update_every * 1000000ULL)) susec = (update_every * 1000000ULL) - usec; - else susec = 100000ULL; + if(usec < (update_every * 1000000ULL / 2ULL)) susec = (update_every * 1000000ULL) - usec; + else susec = update_every * 1000000ULL / 2ULL; } error("PLUGINSD: %s sleeping for %llu. Will kill with SIGCONT pid %d to wake it up.\n", cd->fullfilename, susec, cd->pid); @@ -6452,6 +6487,11 @@ void sig_handler(int signo) exit(1); break; + case SIGPIPE: + error("Ignoring signal %d. Errno: %d (%s)", signo, errno, strerror(errno)); + break; + + case SIGCHLD: error("Received SIGCHLD (signal %d).", signo); siginfo_t info; diff --git a/plugins.d/charts.d.plugin b/plugins.d/charts.d.plugin index ff73bda8db..eefa31a508 100755 --- a/plugins.d/charts.d.plugin +++ b/plugins.d/charts.d.plugin @@ -275,28 +275,44 @@ done # ----------------------------------------------------------------------------- # update dimensions -while [ 1 ] -do - now_charts=$run_charts - run_charts= - - for chart in $now_charts +global_update() { + while [ 1 ] do - $chart$charts_update - if [ $? -eq 0 ] + local now_charts=$run_charts + local run_charts= + + local chart= + for chart in $now_charts + do + local d=`date +'%s.%N'` + local s=`echo $d | cut -d '.' -f 1` + local m=`echo $d | cut -d '.' -f 2 | cut -b 1-3` + local now="$s$m" # milliseconds since epoch (1-1-1970) + + eval "local last=\$last_update_$chart" + test -z "$last" && local last=$((now - 1000)) + + local dt=$(( (now - last) * 1000 )) + eval "last_update_$chart=$now" + + $chart$charts_update $dt + if [ $? -eq 0 ] + then + run_charts="$run_charts $chart" + else + echo >&2 "charts.d: chart '$chart' update() function reports failure. Disabling it." + fi + done + + if [ "$pause_method" = "suspend" ] then - run_charts="$run_charts $chart" + echo "STOPPING_WAKE_ME_UP_PLEASE" + suspend || ( echo >&2 "suspend returned error $?, falling back to sleep."; loopsleepms $debug_time $update_every ) else - echo >&2 "charts.d: chart '$chart' update() function reports failure. Disabling it." + # wait the time you are required to + loopsleepms $debug_time $update_every fi done +} - if [ "$pause_method" = "suspend" ] - then - echo "STOPPING_WAKE_ME_UP_PLEASE" - suspend || ( echo >&2 "suspend returned error $?, falling back to sleep."; loopsleepms $debug_time $update_every ) - else - # wait the time you are required to - loopsleepms $debug_time $update_every - fi -done +global_update |