// Author: Ricardas Stoma // Company: Kolmisoft // Year: 2017 // About: Daemon for system load monitoring #define SCRIPT_VERSION "1.16" #define SCRIPT_NAME "m2_server_loadstats" #include "../m2_functions.c" // DEFINITIONS #define TMP_TOP_LOG_DIR "/tmp/m2_server_top_load_tmp.txt" #define TMP_IOS_LOG_DIR "/tmp/m2_server_iostat_load_tmp.txt" #define DATA_AVG_COUNT 5 // GLOBAL VARIABLES // statistics float cpu_load = 0; float average_load = 0; float mysql_load = 0; float ruby_load = 0; float java_load = 0; float freeswitch_load = 0; float hdd_load = 0; float media_load = 0; float sems_load = 0; float kamailio_load = 0; float radius_load = 0; int hdd_usage = 0; // load limits typedef struct load_struct { float cpu_load; float average_load; float mysql_load; float freeswitch_load; float ruby_load; float java_load; float hdd_load; float media_load; float sems_load; float kamailio_load; float radius_load; } load_t; load_t avg[DATA_AVG_COUNT]; load_t gui; load_t db; // server ID int server_id = 0; int fs_present = 0; int media_present = 0; int sems_present = 0; int kamailio_present = 0; int radius_present = 0; int gui_present = 0; int db_present = 0; // delete loadstats older than x days int delete_older_than = 0; // is load OK? int load_ok = 1; int last_load_ok = 0; // time variables time_t rawtime; struct tm current_time; int last_hour = 0; int last_minute = 0; // FUNCTION DECLARATIONS int get_pipe_value(float *value, char *cmd); int get_system_data(); int write_data_to_database(); int _get_server_id(const char *path); int get_server_id(); void get_loadstats_limits(); int check_limits(load_t server, int type); int update_load_status(int status); int delete_old_loadstats(); void calculate_avg(); void get_freeswitch_uptime(); void update_hdd_status(); // MAIN FUNCTION int main(int argc, char *argv[]) { if (argc > 1 && strcmp(argv[1], "-v") == 0) { printf("%s\n", SCRIPT_VERSION); return 0; } // error file FILE *tmp_errorfile = fopen(LOG_PATH, "a+"); if (tmp_errorfile == NULL) { return 1; } if (m2_check_process_lock()) { fprintf(tmp_errorfile, "Process locked!\n"); fclose(tmp_errorfile); exit(1); } fclose(tmp_errorfile); run_in_background_check(argc, argv); if (run_in_background) { // Our process ID and Session ID pid_t pid, sid; // Fork off the parent process pid = fork(); if (pid < 0) { exit(1); } // If we got a good PID, then we can exit the parent process. if (pid > 0) { exit(0); } // Change the file mode mask umask(0); // Create a new SID for the child process sid = setsid(); if (sid < 0) { // Log the failure exit(1); } // Change the current working directory if ((chdir("/")) < 0) { // Log the failure exit(1); } // Close out the standard file descriptors close(STDIN_FILENO); close(STDOUT_FILENO); close(STDERR_FILENO); } m2_init("Starting M2 Server Load Stats daemon\n"); // get server_id get_server_id(); m2_log("Server id: %d\n", server_id); memset(&gui, 0, sizeof(load_t)); memset(&db, 0, sizeof(load_t)); memset(&avg, 0, DATA_AVG_COUNT * sizeof(load_t)); // get initial limits get_loadstats_limits(); // The Big Loop while (1) { time(&rawtime); localtime_r(&rawtime, ¤t_time); // get data from top and iostat if (get_system_data()) { m2_log("get_system_data() error\n"); exit(1); } // default values cpu_load = -1; average_load = -1; mysql_load = -1; freeswitch_load = -1; media_load = -1; sems_load = -1; kamailio_load = -1; ruby_load = -1; java_load = -1; hdd_load = -1; radius_load = -1; // get user cpu usage if (get_pipe_value(&cpu_load, "cat " TMP_TOP_LOG_DIR "| grep 'Cpu(s)' | tail -n 1 | awk '{print $2}'")) { exit(1); } // get cpu average load if (get_pipe_value(&average_load, "cat " TMP_TOP_LOG_DIR " | grep -o 'load average: [0-9\\.]\\+' | tail -n 1 | grep -o '[0-9\\.]\\+'")) { exit(1); } if (db_present) { // get mysql cpu usage if (get_pipe_value(&mysql_load, "cat " TMP_TOP_LOG_DIR " | grep -v 'safe' | grep 'mysqld' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } } if (gui_present) { // get ruby cpu usage if (get_pipe_value(&ruby_load, "cat " TMP_TOP_LOG_DIR " | grep 'ruby' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } } // get java (elasticsearch) cpu usage if (get_pipe_value(&java_load, "cat " TMP_TOP_LOG_DIR " | grep 'java' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } // get freeswitch cpu usage if (fs_present) { if (get_pipe_value(&freeswitch_load, "cat " TMP_TOP_LOG_DIR " | grep -v 'safe' | grep 'freeswitch' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } } // get media cpu usage if (media_present) { if (get_pipe_value(&media_load, "cat " TMP_TOP_LOG_DIR " | grep -v 'safe' | grep 'rtpengine' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } } // get sems cpu usage if (sems_present) { if (get_pipe_value(&sems_load, "cat " TMP_TOP_LOG_DIR " | grep -v 'safe' | grep 'sems' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } } // get kamailio cpu usage if (kamailio_present) { if (get_pipe_value(&kamailio_load, "tail --lines=+$(cat " TMP_TOP_LOG_DIR " | grep -n -F 'PID USER' | awk '{print $1}' | tail -n 1 | sed 's/.$//') " TMP_TOP_LOG_DIR " | grep 'kamailio' | awk '{sum += $9} END {print sum}'")) { exit(1); } } // get radius cpu usage if (radius_present) { if (get_pipe_value(&radius_load, "cat " TMP_TOP_LOG_DIR " | grep -v 'safe' | grep 'radiusd' | sort -n | tail -n 1 | awk '{print $9}'")) { exit(1); } } // get sda usage if (get_pipe_value(&hdd_load, "cat " TMP_IOS_LOG_DIR " | awk -v RS='' '/Device/{a=$0}END{print a}' | awk '{print $NF}' | sort -n | tail -n 1 | tr ',' '.'")) { exit(1); } // get hdd free space hdd_usage = (100 - m2_get_hdd_usage()); calculate_avg(); // write data if (write_data_to_database()) { exit(1); } int current_load_ok = 1; // check load limits for DB server if (db_present) { if (check_limits(db, 0)) { current_load_ok = 0; } } // check load limits for GUI server if (gui_present) { if (check_limits(gui, 1)) { current_load_ok = 0; } } // modify real load_ok value if (current_load_ok == 0) { load_ok = 0; } else { load_ok = 1; } // did load_ok value changed? if (load_ok != last_load_ok) { m2_log("Updating LOAD OK from %d to %d\n", last_load_ok, load_ok); if (update_load_status(load_ok)) { exit(1); } last_load_ok = load_ok; } // remove tmp file unlink(TMP_TOP_LOG_DIR); unlink(TMP_IOS_LOG_DIR); // execute every minute if (last_minute != current_time.tm_min) { get_server_id(); get_loadstats_limits(); m2_log("Server components: GUI (%d), FS (%d), DB (%d), RTPE (%d), SEMS (%d), KAMAILIO (%d), RADIUS (%d), load_ok: %d, delete loadstats older than: %d (days)\n", gui_present, fs_present, db_present, media_present, sems_present, kamailio_present, radius_present, last_load_ok, delete_older_than); m2_log("Current stats: %0.2f (cpu), %0.2f (average), %0.2f (mysql), %0.2f (freeswitch), %0.2f (ruby), %0.2f (java), %0.2f (rtpengine), %0.2f (sems), %0.2f (kamailio), %0.2f (radiusd), %0.2f (hdd)\n", cpu_load, average_load, mysql_load, freeswitch_load, ruby_load, java_load, media_load, sems_load, kamailio_load, radius_load, hdd_load); if (db_present) m2_log("Current limits [DB]: %0.2f (cpu), %0.2f (average), %0.2f (mysql), %0.2f (freeswitch), %0.2f (hdd)\n", db.cpu_load, db.average_load, db.mysql_load, db.freeswitch_load, db.hdd_load); if (gui_present) m2_log("Current limits [GUI]: %0.2f (cpu), %0.2f (average), %0.2f (freeswitch), %0.2f (ruby), %0.2f (hdd)\n", gui.cpu_load, gui.average_load, gui.freeswitch_load, gui.ruby_load, gui.hdd_load); m2_log("HDD free space: %d%%\n", hdd_usage); if (fs_present) { get_freeswitch_uptime(); } update_hdd_status(); last_minute = current_time.tm_min; } // execute every hour if (last_hour != current_time.tm_hour) { m2_log("Deleting old server loadstats from database\n"); if (delete_old_loadstats()) { exit(1); } last_hour = current_time.tm_hour; } } return 0; } int get_system_data() { FILE *pipe1 = NULL; FILE *pipe2 = NULL; // save top output to tmp file pipe1 = popen("LC_ALL=C top -b -n 2 > " TMP_TOP_LOG_DIR, "r"); if (pipe1 == NULL) { return 1; } // save iostat output to tmp file pipe2 = popen("LC_ALL=C iostat -dx 3 2 > " TMP_IOS_LOG_DIR, "r"); if (pipe2 == NULL) { return 1; } // wait for 2 iterations to complete sleep(10); // close pipes pclose(pipe1); pclose(pipe2); return 0; } int get_pipe_value(float *value, char *cmd) { char buffer[256] = { 0 }; FILE *pipe = NULL; memset(buffer, 0, 256); pipe = popen(cmd, "r"); if (pipe == NULL) { m2_log("Error while executing %s\n", cmd); return 1; } fgets(buffer, 256, pipe); buffer[strlen(buffer) - 1] = 0; *value = atof(buffer); pclose(pipe); return 0; } int write_data_to_database() { char query[1024] = ""; sprintf(query, "INSERT INTO server_loadstats(server_id, cpu_general_load, cpu_loadstats1, cpu_mysql_load, cpu_ruby_load, cpu_freeswitch_load, cpu_media_load, hdd_util, cpu_java_load, cpu_b2bua_load, cpu_kamailio_load, cpu_radius_load) " "VALUES(%d, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f, %.2f)", server_id, cpu_load, average_load, mysql_load, ruby_load, freeswitch_load, media_load, hdd_load, java_load, sems_load, kamailio_load, radius_load); if (m2_mysql_query(query)) { return 1; } return 0; } void update_hdd_status() { char query[1024] = ""; // update servers sprintf(query, "UPDATE servers SET hdd_free_space = %d WHERE id = %d", hdd_usage, server_id); if (m2_mysql_query(query)) { return; } } int get_server_id() { // get server_id if (_get_server_id(CONFPATH)) { m2_log("Cannot read server configuration from: " CONFPATH " or server_id is not set\n"); exit(1); } // just to be sure if (server_id == 0) { m2_log("Server ID is not set\n"); exit(1); } return 0; } // get server ID int _get_server_id(const char *path) { MYSQL_RES *result; MYSQL_ROW row; char sqlcmd[1024] = ""; char server_id_str[100] = ""; if (m2_get_variable("server_id", server_id_str)) { return 1; } server_id = atoi(server_id_str); if (server_id == 0) return 1; sprintf(sqlcmd, "SELECT gui, db, IF(server_type = 'freeswitch' OR fs = 1, 1, 0), load_ok, media, b2bua, proxy, core FROM servers WHERE id = %d", server_id); if (m2_mysql_query(sqlcmd)) { return 0; } result = mysql_store_result(&mysql); if (result) { row = mysql_fetch_row(result); if (row[0]) gui_present = atoi(row[0]); if (row[1]) db_present = atoi(row[1]); if (row[2]) fs_present = atoi(row[2]); if (row[3]) last_load_ok = atoi(row[3]); if (row[4]) media_present = atoi(row[4]); if (row[5]) sems_present = atoi(row[5]); if (row[6]) kamailio_present = atoi(row[6]); if (row[7]) radius_present = atoi(row[7]); } mysql_free_result(result); return 0; } // get loadstats limits from conflines void get_loadstats_limits() { MYSQL_RES *result; MYSQL_ROW row; if (m2_mysql_query("SELECT value, name FROM conflines WHERE name IN ('gui_hdd_utilisation', 'gui_cpu_general_load', 'gui_cpu_loadstats', 'gui_cpu_ruby_process', 'gui_cpu_freeswitch_process', 'db_hdd_utilisation', 'db_cpu_general_load', 'db_cpu_loadstats', 'db_cpu_mysql_process', 'db_cpu_freeswitch_process', 'Delete_Server_Load_stats_older_than')")) { return; } result = mysql_store_result(&mysql); if (result) { while ((row = mysql_fetch_row(result)) != NULL) { if (row[0]) { // gui limits if (strcmp(row[1], "GUI_HDD_utilisation") == 0) gui.hdd_load = atof(row[0]); if (strcmp(row[1], "GUI_CPU_General_load") == 0) gui.cpu_load = atof(row[0]); if (strcmp(row[1], "GUI_CPU_Loadstats") == 0) gui.average_load = atof(row[0]); if (strcmp(row[1], "GUI_CPU_freeswitch_process") == 0) gui.freeswitch_load = atof(row[0]); if (strcmp(row[1], "GUI_CPU_Ruby_process") == 0) gui.ruby_load = atof(row[0]); // db limits if (strcmp(row[1], "DB_HDD_utilisation") == 0) db.hdd_load = atof(row[0]); if (strcmp(row[1], "DB_CPU_General_load") == 0) db.cpu_load = atof(row[0]); if (strcmp(row[1], "DB_CPU_Loadstats") == 0) db.average_load = atof(row[0]); if (strcmp(row[1], "DB_CPU_MySQL_process") == 0) db.mysql_load = atof(row[0]); if (strcmp(row[1], "DB_CPU_freeswitch_process") == 0) db.freeswitch_load = atof(row[0]); // get older than variable (in days) if (strcmp(row[1], "Delete_Server_Load_stats_older_than") == 0) delete_older_than = atoi(row[0]); } } } if (delete_older_than < 0 || delete_older_than == 0) { m2_log("Incorrect 'Delete Server Load Stats older than' value: %d\n", delete_older_than); } mysql_free_result(result); } // check if current server load values exceed limits int check_limits(load_t server, int type) { char typestr[4] = ""; if (type == 0) { sprintf(typestr, "DB"); } else { sprintf(typestr, "GUI"); } if (hdd_load > server.hdd_load && server.hdd_load > 0) { m2_log("%s hdd_load exceeds limits (current value: %0.2f, limit: %0.2f\n", typestr, hdd_load, server.hdd_load); return 1; } if (cpu_load > server.cpu_load && server.cpu_load > 0) { m2_log("%s cpu_load exceeds limits (current value: %0.2f, limit: %0.2f\n", typestr, cpu_load, server.cpu_load); return 1; } if (fs_present) { if (freeswitch_load > server.freeswitch_load && server.freeswitch_load > 0) { m2_log("%s freeswitch_load exceeds limits (current value: %0.2f, limit: %0.2f\n", typestr, freeswitch_load, server.freeswitch_load); return 1; } } if (type == 0) { if (mysql_load > server.mysql_load && server.mysql_load > 0) { m2_log("%s mysql_load exceeds limits (current value: %0.2f, limit: %0.2f\n", typestr, mysql_load, server.mysql_load); return 1; } } if (average_load > server.average_load && server.average_load > 0) { m2_log("%s average_load exceeds limits (current value: %0.2f, limit: %0.2f\n", typestr, average_load, server.average_load); return 1; } if (type == 1) { if (ruby_load > server.ruby_load && server.ruby_load > 0) { m2_log("%s ruby_load exceeds limits (current value: %0.2f, limit: %0.2f\n", typestr, ruby_load, server.ruby_load); return 1; } } return 0; } int update_load_status(int status) { char sqlcmd[1024] = ""; sprintf(sqlcmd, "UPDATE servers SET load_ok = %d WHERE id = %d", status, server_id); if (m2_mysql_query(sqlcmd)) { return 1; } return 0; } int delete_old_loadstats() { char sqlcmd[1024] = ""; sprintf(sqlcmd, "DELETE FROM server_loadstats WHERE datetime < DATE_SUB(NOW(), INTERVAL %d DAY)", delete_older_than); if (m2_mysql_query(sqlcmd)) { return 1; } return 0; } /* Calculate average values for last X load stats */ void calculate_avg() { float avg_cpu_load = 0; float avg_average_load = 0; float avg_mysql_load = 0; float avg_freeswitch_load = 0; float avg_media_load = 0; float avg_sems_load = 0; float avg_kamailio_load = 0; float avg_radius_load = 0; float avg_ruby_load = 0; float avg_java_load = 0; float avg_hdd_load = 0; int i = 0; // shift values memmove(&avg[1], &avg[0], (DATA_AVG_COUNT - 1) * sizeof(load_t)); // copy new values avg[0].cpu_load = cpu_load; avg[0].average_load = average_load; avg[0].mysql_load = mysql_load; avg[0].freeswitch_load = freeswitch_load; avg[0].media_load = media_load; avg[0].sems_load = sems_load; avg[0].kamailio_load = kamailio_load; avg[0].radius_load = radius_load; avg[0].ruby_load = ruby_load; avg[0].java_load = java_load; avg[0].hdd_load = hdd_load; // sum all values for (i = 0; i < DATA_AVG_COUNT; i++) { avg_cpu_load += avg[i].cpu_load; avg_average_load += avg[i].average_load; avg_mysql_load += avg[i].mysql_load; avg_freeswitch_load += avg[i].freeswitch_load; avg_media_load += avg[i].media_load; avg_sems_load += avg[i].sems_load; avg_kamailio_load += avg[i].kamailio_load; avg_radius_load += avg[i].radius_load; avg_ruby_load += avg[i].ruby_load; avg_java_load += avg[i].java_load; avg_hdd_load+= avg[i].hdd_load; } // calculate average avg_cpu_load = avg_cpu_load / DATA_AVG_COUNT; avg_average_load = avg_average_load / DATA_AVG_COUNT; avg_mysql_load = avg_mysql_load / DATA_AVG_COUNT; avg_freeswitch_load = avg_freeswitch_load / DATA_AVG_COUNT; avg_media_load = avg_media_load / DATA_AVG_COUNT; avg_sems_load = avg_sems_load / DATA_AVG_COUNT; avg_kamailio_load = avg_kamailio_load / DATA_AVG_COUNT; avg_radius_load = avg_radius_load / DATA_AVG_COUNT; avg_ruby_load = avg_ruby_load / DATA_AVG_COUNT; avg_java_load = avg_java_load / DATA_AVG_COUNT; avg_hdd_load = avg_hdd_load / DATA_AVG_COUNT; // set new values cpu_load = avg_cpu_load; average_load = avg_average_load; if (db_present) mysql_load = avg_mysql_load; if (fs_present) freeswitch_load = avg_freeswitch_load; if (media_present) media_load = avg_media_load; if (sems_present) sems_load = avg_sems_load; if (kamailio_present) kamailio_load = avg_kamailio_load; if (radius_present) radius_load = avg_radius_load; if (gui_present) ruby_load = avg_ruby_load; java_load = avg_java_load; hdd_load = avg_hdd_load; } /* Get freeswitch uptime in seconds */ void get_freeswitch_uptime() { char query[2048] = ""; char system_cmd[1024] = "fs_cli -x 'status' | grep '^UP' | sed 's| seconds,| seconds\\n|' | head -n 1"; char response[2048] = ""; char uptime_string[2000] = ""; char *uptime_ptr = NULL; FILE *pipe = NULL; pipe = popen(system_cmd, "r"); if (pipe) { fgets(response, 2040, pipe); uptime_ptr = strstr(response, "UP "); if (uptime_ptr) { strcpy(uptime_string, response + strlen("UP ")); uptime_string[strlen(uptime_string) - 1] = '\0'; } pclose(pipe); m2_log("Freeswitch uptime: %s\n", uptime_string); sprintf(query, "UPDATE servers SET uptime = '%s' WHERE id = %d", uptime_string, server_id); m2_mysql_query(query); } }