Go to content Go to menu Go to search

Мониторим виртуалки на Proxmox (скрипт на Perl)

Постановка задачи

Есть Proxmox-кластер к большим количеством однотипных виртуальных машин (LAMP + wordpress) при этом виртуальные машины добавляются и удаляются ежедневно в больших количествах.

Решение

Будем делать так:
1. На каждую виртуальную машину кладем cgi-скрипт, который будет отдавать текущее состояние виртуальной машины. Если найдены неполадки - этот скрипт будет выдавать сообщение о тревоге.
2. На отдельную виртуальную машину кладем cgi-скрипт, который будет получать список виртуальных машин через proxmox-api, на каждой из них опрашивать первый скрипт, и отображать таблицу с результатами.
3. Натравливаем на этот (п.2) скрипт любую систему мониторинга.

Реализация

Скрипт system_status.cgi, кладем в cgi-bin на каждую виртуальную машину, в разделе triggers устанавливаем лимиты для срабатывания тревоги

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/perl

# This script show web page with system status. Used by Proxmox monitoring script
my @errors;
#
# constants
#
my $prog_ps="/bin/ps";
#
# triggers
#
my $la_treshold=-1;                 # max 5min la
my $procs_running_treshold=-1;      # max processes running
my $mem_free_treshhold=999999;      # min free mem in bytes
my $total_processes_treshhold=-1;   # max total processes
my $fs_usage_treshold=90;               # min free disk space in percent
#
# get load average
#
if ($la_treshold) {
    open (INPUT_FILE,"/proc/loadavg");
    while(<INPUT_FILE>) { chomp;    @la = split (" ", $_);  }
    close INPUT_FILE;
    push (@errors, "Load average is $la[0] and above treshold $la_treshold") if ( $la[0] > $la_treshold );
}
#
# get running procs
#
if ($procs_running_treshold) {
    open (INPUT_FILE,"/proc/stat");
    while(<INPUT_FILE>) { chomp;    $procs_running = $1 if ($_ =~ /^procs_running\s+(\d+)/); }
    close INPUT_FILE;
    push (@errors, "Running procs is $procs_running and above treshold $procs_running_treshold") if ( $procs_running > $procs_running_treshold );
}
#
# get mem info
#
if ($mem_free_treshhold) {
    open (INPUT_FILE,"/proc/meminfo");
    while(<INPUT_FILE>) {
        chomp;
        $mem_total = $1     if ( $_ =~ /^MemTotal:\s+(\d+)\s.*$/ );
        $mem_free = $1      if ( $_ =~ /^MemFree:\s+(\d+)\s.*$/ );
        $mem_buffers = $1   if ( $_ =~ /^Buffers:\s+(\d+)\s.*$/ );
        $mem_cached = $1    if ( $_ =~ /^Cached:\s+(\d+)\s.*$/ );
    }
    close INPUT_FILE;
    $mem_free = $mem_free + $mem_buffers + $mem_cached;
    push (@errors, "Low free memory \( $mem_free \) with min treshold is $mem_free_treshhold") if ( $mem_free < $mem_free_treshhold );
}
#
# get process list
#
if ($total_processes_treshhold) {
    @process_list = `$prog_ps axf -o pid,cmd`;
    my $total_processes = scalar(@process_list)-1;
    push (@errors, "Too many processes  \( $total_processes \) on host  with max treshold is $total_processes_treshhold") if ( scalar(@process_list)-1 > $total_processes_treshhold );
}
#
# get filesystems stats
#
if ($fs_usage_treshold) {
    @fs_stats=`df -h`;
    foreach ( @fs_stats ){
        $_ =~ m/.*\s(\d+)\%\s(\S+)$/i;
        my $disk_free_space = 100-$1;
        push (@errors, "Low free disk space: $disk_free_space% free on $2") if ( $disk_free_space < $fs_usage_treshold );
    }
}
#
# Print all stats
#
print "Content-Type: text/plain\n";
print "\n";
if ( @errors ) {
    foreach (@errors) { print "Alert!: $_\n";}
    }
else {print "Overall system status is: OK\n";}
print "\n";
print "Load average 5min: $la[1]\n" if ($la_treshold);
print "Total memory: $mem_total\n" if ($mem_free_treshhold);
print "Free memory: $mem_free\n" if ($mem_free_treshhold);
print "Total processes: ", scalar(@process_list)-1,"\n" if ($total_processes_treshhold);
print "Running processes: $procs_running\n" if ($procs_running_treshold);
print "\n";
print "@fs_stats" if ($fs_usage_treshold);
print "\n";
print "List of the processes:\n" if ($total_processes_treshhold);
if ($total_processes_treshhold) { foreach ( @process_list ) { print $_; } }

Скрипт proxmox_mon.cgi размещаем на хосте, который имеет доступ ко всем виртуальным машинам, в массиве @excluded_ids перечисляем vmid виртуалок, которые не нужно мониторить.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/perl -l

# This script will show web page with list of all proxmox VM-s

# List of vm-id's which we are wish to exclude from monitoring
my @excluded_ids=('108','109','110','111');

use strict;
use CGI::Carp qw(fatalsToBrowser);
use Net::Proxmox::VE;
use LWP::UserAgent;
use HTML::Template;
use Switch;

# Global variables
my $status_is_alert;
my $status_is_warning;
my $starting_time = time;
my $vm_counter = 0;
my $row_counter = 1;
my @RUNNING_VM_ROW1_DATA;
my @RUNNING_VM_ROW2_DATA;
my @RUNNING_VM_ROW3_DATA;

# Prepare web user agent
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
my $ua = LWP::UserAgent->new;
$ua->agent("proxmox_status_mon");
$ua->timeout(5);

# Prepare proxmox api agent and connect to proxmox
my %args = (
    host     => '192.168.1.2',
    username => 'login',
    password => 'password',
    port     => '8006',
    realm    => 'pve',
#    debug    => 'true'
);
my $host = Net::Proxmox::VE->new(%args);
$host->login() or &error_exit;

# get pve version and set virtualization_tech according to pve version for use in future requests
my $version_hash = $host->get("/version"); # this request return hash with version information
my $virtualization_tech = 'lxc';
$virtualization_tech = 'openvz' if ( ${$version_hash}{'version'} < 4 );

# Get nodes list from proxmox, foreach node get virtual machines, read status and prepare data for output
my @nodes = $host->get("/nodes");

foreach (@nodes){
    my %node_info=%{$_};
    my @vm_list = $host->get("/nodes/$node_info{'node'}/$virtualization_tech");
    foreach (@vm_list){
    my %vm_info=%{$_};
    next if ($vm_info{'vmid'} ~~ @excluded_ids);

    $vm_counter++;
    my %row_data;
    $row_data{'VM_Status'}=$vm_info{'status'};
    $row_data{'VM_NODE'}=$node_info{'node'};
    $row_data{'VM_ID'}=$vm_info{'vmid'};
        if ( $virtualization_tech eq 'lxc' ){
        my $net_config = $host->get("/nodes/$node_info{'node'}/lxc/$vm_info{'vmid'}/config");
        ${$net_config}{'net0'} =~ /.*ip=(\d+\.\d+\.\d+\.\d+).*/;
        $row_data{'VM_IP'}=$1;
    }
    else {
        $row_data{'VM_IP'}=$vm_info{'ip'};
    }
    $row_data{'VM_Name'}=$vm_info{'name'};
    if ( $row_data{'VM_Status'} eq "running" ) {
        $row_data{'VM_Status_Info'}=&get_vm_stats ($row_data{'VM_IP'});
        if ( $row_data{'VM_Status_Info'} =~ m/Warning.*/ ) { $row_data{'WARNING'} = 1; $status_is_warning = 1; };
        if ( $row_data{'VM_Status_Info'} =~ m/Alert.*/ ) { $row_data{'ALERT'} = 1; $status_is_alert = 1; };
    }
    switch ($row_counter) {
        case 1 { push(@RUNNING_VM_ROW1_DATA, \%row_data); $row_counter++; }
        case 2 { push(@RUNNING_VM_ROW2_DATA, \%row_data); $row_counter++; }
        case 3 { push(@RUNNING_VM_ROW3_DATA, \%row_data); $row_counter=1;}
    }
    }
}

my $generation_time = time - $starting_time;

# Print out data thru template
print "Content-Type: text/html\n\n";
my $template = HTML::Template->new(filename => 'template.html');
$template->param(GENERATION_TIME => $generation_time);
$template->param(VM_COUNTER => $vm_counter);
$template->param(ALERT => $status_is_alert);
$template->param(WARNING => $status_is_warning);
$template->param(RUNNING_VM_ROW1 => \@RUNNING_VM_ROW1_DATA);
$template->param(RUNNING_VM_ROW2 => \@RUNNING_VM_ROW2_DATA);
$template->param(RUNNING_VM_ROW3 => \@RUNNING_VM_ROW3_DATA);
print $template->output();

# Get status from VM procedure
sub get_vm_stats ($vm_ip){
    my $vm_ip = @_[0];
    my $return_value;

    my $req = HTTP::Request->new(GET => "http://$vm_ip/cgi-bin/system_status.cgi");
    my $res = $ua->request($req);
    if ( $res->is_error ) {
    my $status_line = $res->status_line;
    return ("Alert! unable to get VM status: $status_line") if ( $status_line =~ m/.*500.*/ );
    return ("Alert! unable to get VM status: $status_line") if ( $status_line =~ m/.*500.*/ );
    return ("Warning! unable to get VM status: $status_line");
    }

    my $system_status = $res->content;
    if( $system_status =~ m/(Alert!:.*\n)/i ) {
    foreach ( split ("\n", $system_status) ) {
        $return_value = $return_value.$1."<br>" if ( $_ =~ m/(Alert!:.*)/i );
    }
    } else {
    $return_value = undef;
    }

    return $return_value;
}

sub error_exit  {print "Couldnt log in to proxmox host $args{'host'}"; die ('Couldnt log in to proxmox host');}

Туда же кладем темплейт страницы с отчетом:

<!-- proxmox_mon.tmpl -->
<html>
<head>
<title>Proxmox monitoring tool</title>
</head>
<body>
<!-- TMPL_IF ALERT -->
<h1 align=center>Status of Virtual Machines: Alert</h1>
<!-- TMPL_ELSE -->
    <!-- TMPL_IF WARNING -->
    <h1 align=center>Status of Virtual Machines: Warning</h1>
    <!-- TMPL_ELSE -->
        <h1 align=center>Status of Virtual Machines: OK</h1>
    <!-- /TMPL_IF -->
<!-- /TMPL_IF -->

<p style="text-align: right; font-size: small; margin: 0;">Page generated in: <!-- TMPL_VAR NAME=GENERATION_TIME  --> seconds</p>
<p style="text-align: right; font-size: small; margin: 0;">Number of virtual machines: <!-- TMPL_VAR NAME=VM_COUNTER  --></p>

<table width=100%>
<tr>

<td align=center valign=top>
<table border=1 width=100%>
   <tr>
      <th>VM_Status</th>
      <th>Node</th>
      <th>VM_ID</th>
      <th>VM_IP</th>
      <th>VM_Name</th>
   </tr>
<!-- TMPL_LOOP NAME=RUNNING_VM_ROW1 -->
   <tr>
<!-- TMPL_IF VM_Status_Info -->
      <td rowspan=2 align=center>
<!-- TMPL_ELSE -->
      <td align=center>
<!-- /TMPL_IF -->
          <!-- TMPL_VAR NAME=VM_Status --></td>
      <td><!-- TMPL_VAR NAME=VM_NODE --></td>
      <td><!-- TMPL_VAR NAME=VM_ID --></td>
      <td><!-- TMPL_VAR NAME=VM_IP --></td>
      <td><a target="_blank" href="http://<!-- TMPL_VAR NAME=VM_Name -->/cgi-bin/system_status.cgi"><!-- TMPL_VAR NAME=VM_Name --></a></td>
   </tr>
<!-- TMPL_IF VM_Status_Info -->
    <tr>
      <td colspan="4" 
        <!-- TMPL_IF WARNING -->style="background-color: yellow;"<!-- /TMPL_IF --> 
        <!-- TMPL_IF ALERT -->style="background-color: red;"<!-- /TMPL_IF --> >
        <!-- TMPL_VAR NAME=VM_Status_Info -->
      </td>
    </tr>
<!-- /TMPL_IF -->
<!-- /TMPL_LOOP -->
</table>
</td>

<td align=center valign=top>
<table border=1 width=100%>
   <tr>
      <th>VM_Status</th>
      <th>Node</th>
      <th>VM_ID</th>
      <th>VM_IP</th>
      <th>VM_Name</th>
   </tr>
<!-- TMPL_LOOP NAME=RUNNING_VM_ROW2 -->
   <tr>
<!-- TMPL_IF VM_Status_Info -->
      <td rowspan=2 align=center>
<!-- TMPL_ELSE -->
      <td align=center>
<!-- /TMPL_IF -->
          <!-- TMPL_VAR NAME=VM_Status --></td>
      <td><!-- TMPL_VAR NAME=VM_NODE --></td>
      <td><!-- TMPL_VAR NAME=VM_ID --></td>
      <td><!-- TMPL_VAR NAME=VM_IP --></td>
      <td><a target="_blank" href="http://<!-- TMPL_VAR NAME=VM_Name -->/cgi-bin/system_status.cgi"><!-- TMPL_VAR NAME=VM_Name --></a></td>
   </tr>
<!-- TMPL_IF VM_Status_Info -->
    <tr>
      <td colspan="4" 
        <!-- TMPL_IF WARNING -->style="background-color: yellow;"<!-- /TMPL_IF --> 
        <!-- TMPL_IF ALERT -->style="background-color: red;"<!-- /TMPL_IF --> >
        <!-- TMPL_VAR NAME=VM_Status_Info -->
      </td>
    </tr>
<!-- /TMPL_IF -->
<!-- /TMPL_LOOP -->
</table>
</td>

<td align=center valign=top>
<table border=1 width=100%>
   <tr>
      <th>VM_Status</th>
      <th>Node</th>
      <th>VM_ID</th>
      <th>VM_IP</th>
      <th>VM_Name</th>
   </tr>
<!-- TMPL_LOOP NAME=RUNNING_VM_ROW3 -->
   <tr>
<!-- TMPL_IF VM_Status_Info -->
      <td rowspan=2 align=center>
<!-- TMPL_ELSE -->
      <td align=center>
<!-- /TMPL_IF -->
          <!-- TMPL_VAR NAME=VM_Status --></td>
      <td><!-- TMPL_VAR NAME=VM_NODE --></td>
      <td><!-- TMPL_VAR NAME=VM_ID --></td>
      <td><!-- TMPL_VAR NAME=VM_IP --></td>
      <td><a target="_blank" href="http://<!-- TMPL_VAR NAME=VM_Name -->/cgi-bin/system_status.cgi"><!-- TMPL_VAR NAME=VM_Name --></a></td>
   </tr>
<!-- TMPL_IF VM_Status_Info -->
    <tr>
      <td colspan="4" 
        <!-- TMPL_IF WARNING -->style="background-color: yellow;"<!-- /TMPL_IF --> 
        <!-- TMPL_IF ALERT -->style="background-color: red;"<!-- /TMPL_IF --> >
        <!-- TMPL_VAR NAME=VM_Status_Info -->
      </td>
    </tr>
<!-- /TMPL_IF -->
<!-- /TMPL_LOOP -->
</table>
</td>

</tr>
</table>

</body>
</html>

Ну и натравливаем на все это мониторинг по ключевому слову Alert.

Скриншоты

proxmox_mon.cgi - состояние OK proxmox_mon.cgi - состояние Alert
enlarge enlarge
system_status.cgi - состояние OK system_status.cgi - состояние Alert
enlarge enlarge


при публикации материалов с данного сайта обратная ссылка на сайт обязательна.
valynkin.ru © no rights reserved