原创作品,允许转载,转载时请务必以超链接形式标明文章 原始出处 、作者信息和本声明。否则将追究法律责任。http://navyaijm.blog.51cto.com/4647068/1263912
一、安装
1、基础环境安装(系统最小安装)
1
|
yum install gcc glibc glibc-common gd gd-devel httpd
|
2、 nagios和 nagios-plus的安装
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
/usr/sbin/useradd nagios
/usr/sbin/groupadd nagcmd
/usr/sbin/usermod -G nagcmd nagios
/usr/sbin/usermod -G nagcmd apache
tar -jxvf nagios-cn-3.2.3. tar .bz2
cd nagios-cn-3.2.3
. /configure --with-group=nagios --with-user=nagios --with- command -group=nagcmd --with-gd-lib= /usr/lib --with
-gd-inc= /usr/include
make all
make install
make install -init
make install -commandmode
make install -config
make install -webconf
htpasswd -c /usr/local/nagios/etc/htpasswd . users nagiosadmin ##创建web登录用户名和密码
chown -R nagios.nagios /usr/local/nagios/etc/htpasswd . users
tar -zxvf nagios-plugins-1.4.16. tar .gz
cd nagios-plus-1.4.16
. /configure --prefix= /usr/local/nagios --with-nagios-user=nagios --with-nagios-gourp=nagios
make && make install
|
PS:当安装nagios-plugins-1.4.16时候报错如下:
1
2
3
4
5
6
7
8
9
10
|
ps /check_http .Tpo -c -o check_http.o check_http.c
check_http.c: In function ‘process_arguments’:
check_http.c:312: error: ‘ssl_version’ undeclared (first use in this function )
check_http.c:312: error: (Each undeclared identifier is reported only once check_http.c:312: error: for each function it appears in .)
make [2]: *** [check_http.o] Error 1
make [2]: Leaving directory ` /data/nagios-plugins-1 .4.16 /plugins '
make [1]: *** [all-recursive] Error 1
make [1]: Leaving directory ` /data/nagios-plugins-1 .4.16'
make : *** [all] Error 2
|
解决办法:
1
2
|
1、yum -y install openssl openssl-devel
2、重新configure |
3、配置文件修改
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
[root@db3 etc] # pwd
/usr/local/nagios/etc [root@db3 etc] # tree
. ├── cgi.cfg ##需要修改
├── htpasswd. users
├── nagios.cfg ##需要修改
├── nagiosgraph.cfg ├── objects │ ├── commands.cfg ##需要修改
│ ├── contacts.cfg ##需要修改
│ ├── localhost.cfg │ ├── printer.cfg │ ├── switch.cfg │ ├── templates.cfg ##需要修改
│ ├── timeperiods.cfg │ └── windows.cfg └── resource.cfg |
cgi.cfg内容如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
[root@db3 etc] # cat cgi.cfg |grep -v "#" | awk "NF"
main_config_file= /usr/local/nagios/etc/nagios .cfg
physical_html_path= /usr/local/nagios/share
url_html_path= /nagios
show_context_help=0 use_pending_states=1 use_authentication=1 use_ssl_authentication=0 authorized_for_system_information=nagiosadmin, command
authorized_for_configuration_information=nagiosadmin, command
authorized_for_system_commands=nagiosadmin authorized_for_all_services=nagiosadmin authorized_for_all_hosts=nagiosadmin authorized_for_all_service_commands=nagiosadmin authorized_for_all_host_commands=nagiosadmin statusmap_background_image=zytianbk.png default_statusmap_layout=0 default_statuswrl_layout=4 ping_syntax= /bin/ping -n -U -c 5 $HOSTADDRESS$
refresh_rate=90 escape_html_tags=1 action_url_target=_blank notes_url_target=_blank lock_author_names=1 ttf_file= /usr/local/nagios/sbin/simhei .ttf
statusmap_mod=2 |
nagios.cfg内容如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
|
[root@yw_monitor_db etc] # cat nagios.cfg |grep -v "#" | awk "NF"
log_file= /usr/local/nagios/var/nagios .log
cfg_file= /usr/local/nagios/etc/objects/commands .cfg
cfg_file= /usr/local/nagios/etc/objects/contacts .cfg
cfg_file= /usr/local/nagios/etc/objects/timeperiods .cfg
cfg_file= /usr/local/nagios/etc/objects/templates .cfg
cfg_dir= /usr/local/nagios/etc/objects/cfg
object_cache_file= /usr/local/nagios/var/objects .cache
precached_object_file= /usr/local/nagios/var/objects .precache
resource_file= /usr/local/nagios/etc/resource .cfg
status_file= /usr/local/nagios/var/status .dat
status_update_interval=10 nagios_user=nagios nagios_group=nagios check_external_commands=1 command_check_interval=-1 command_file= /usr/local/nagios/var/rw/nagios .cmd
external_command_buffer_slots=4096 lock_file= /usr/local/nagios/var/nagios .lock
temp_file= /usr/local/nagios/var/nagios .tmp
temp_path= /tmp
event_broker_options=-1 log_rotation_method=d log_archive_path= /usr/local/nagios/var/archives
use_syslog=1 log_notifications=1 log_service_retries=1 log_host_retries=1 log_event_handlers=1 log_initial_states=0 log_external_commands=1 log_passive_checks=1 service_inter_check_delay_method=s max_service_check_spread=30 service_interleave_factor=s host_inter_check_delay_method=s max_host_check_spread=30 max_concurrent_checks=0 check_result_reaper_frequency=10 max_check_result_reaper_time=30 check_result_path= /usr/local/nagios/var/spool/checkresults
max_check_result_file_age=3600 cached_host_check_horizon=15 cached_service_check_horizon=15 enable_predictive_host_dependency_checks=1 enable_predictive_service_dependency_checks=1 soft_state_dependencies=0 auto_reschedule_checks=0 auto_rescheduling_interval=30 auto_rescheduling_window=180 sleep_time=0.25 service_check_timeout=60 host_check_timeout=30 event_handler_timeout=30 notification_timeout=30 ocsp_timeout=5 perfdata_timeout=5 retain_state_information=1 state_retention_file= /usr/local/nagios/var/retention .dat
retention_update_interval=60 use_retained_program_state=1 use_retained_scheduling_info=1 retained_host_attribute_mask=0 retained_service_attribute_mask=0 retained_process_host_attribute_mask=0 retained_process_service_attribute_mask=0 retained_contact_host_attribute_mask=0 retained_contact_service_attribute_mask=0 interval_length=60 check_for_updates=1 bare_update_check=0 use_aggressive_host_checking=0 execute_service_checks=1 accept_passive_service_checks=1 execute_host_checks=1 accept_passive_host_checks=1 enable_notifications=1 enable_event_handlers=1 process_performance_data=0 obsess_over_services=0 obsess_over_hosts=0 translate_passive_host_checks=0 passive_host_checks_are_soft=0 check_for_orphaned_services=1 check_for_orphaned_hosts=1 check_service_freshness=1 service_freshness_check_interval=60 check_host_freshness=0 host_freshness_check_interval=60 additional_freshness_latency=15 enable_flap_detection=1 low_service_flap_threshold=5.0 high_service_flap_threshold=20.0 low_host_flap_threshold=5.0 high_host_flap_threshold=20.0 date_format=us p1_file= /usr/local/nagios/bin/p1 .pl
enable_embedded_perl=1 use_embedded_perl_implicitly=1 illegal_object_name_chars=`~!$%^&*|'"<>?,()= illegal_macro_output_chars=`~$&|'"<> use_regexp_matching=0 use_true_regexp_matching=0 admin_email=nagios@localhost admin_pager=pagenagios@localhost daemon_dumps_core=0 use_large_installation_tweaks=0 enable_environment_macros=1 debug_level=0 debug_verbosity=1 debug_file= /usr/local/nagios/var/nagios .debug
max_debug_file_size=1000000 |
commands.cfg的内容如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
[root@yw_monitor_db objects] # cat commands.cfg |grep -v "#" | awk "NF"
define command {
command_name notify-host-by-email command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
} define command {
command_name notify-service-by-email command_line /usr/bin/printf "Host: $HOSTALIAS$\nService: $SERVICEDESC$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nDate/Time: $LONGDATETIME$\nInfo:$SERVICEOUTPUT$\n持续时间:$SERVICEDURATION$" | mail -s " $HOSTALIAS$ $SERVICESTATE$" $CONTACTEMAIL$
} define command {
command_name notify-service-by-email-163 command_line /usr/bin/printf "Host: $HOSTALIAS$\nService: $SERVICEDESC$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\nDate/Time: $LONGDATETIME$\nInfo:$SERVICEOUTPUT$\n持续时间:$SERVICEDURATION$" | mail -s " $HOSTALIAS$ $SERVICESTATE$" $CONTACTEMAIL$
} define command {
command_name notify-service-by-email-164 command_line /usr/local/nagios/bin/send_message .sh $CONTACTEMAIL$ $_SERVICETYPE$ $HOSTALIAS$-$SERVICEDESC$ info:$SERVICEOUTPUT$ Duration:$SERVICEDURATION$
} define command {
command_name check_mysql command_line $USER1$ /check_mysql -H $HOSTADDRESS$ -u nagios -d nagios -p 123456
} define command {
command_name check_mysql_slave command_line $USER1$ /check_mysql -H $HOSTADDRESS$ -P13267 -S -uroot -p123456 -s /data/mysql/nagios/logs/mysql .sock
} define command {
command_name check-host-alive command_line $USER1$ /check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
} define command {
command_name check_local_disk command_line $USER1$ /check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$
} define command {
command_name check_local_load command_line $USER1$ /check_load -w $ARG1$ -c $ARG2$
} define command {
command_name check_local_procs command_line $USER1$ /check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$
} define command {
command_name check_local_users command_line $USER1$ /check_users -w $ARG1$ -c $ARG2$
} define command {
command_name check_local_swap command_line $USER1$ /check_swap -w $ARG1$ -c $ARG2$
} define command {
command_name check_local_mrtgtraf command_line $USER1$ /check_mrtgtraf -F $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$
} define command {
command_name check_ftp command_line $USER1$ /check_ftp -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_hpjd command_line $USER1$ /check_hpjd -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_snmp command_line $USER1$ /check_snmp -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_http command_line $USER1$ /check_http -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_ssh command_line $USER1$ /check_ssh $ARG1$ $HOSTADDRESS$
} define command {
command_name check_dhcp command_line $USER1$ /check_dhcp $ARG1$
} define command {
command_name check_ping command_line $USER1$ /check_ping -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p 5
} define command {
command_name check_pop command_line $USER1$ /check_pop -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_imap command_line $USER1$ /check_imap -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_smtp command_line $USER1$ /check_smtp -H $HOSTADDRESS$ $ARG1$
} define command {
command_name check_tcp command_line $USER1$ /check_tcp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
} define command {
command_name check_udp command_line $USER1$ /check_udp -H $HOSTADDRESS$ -p $ARG1$ $ARG2$
} define command {
command_name check_nt command_line $USER1$ /check_nt -H $HOSTADDRESS$ -p 12489 - v $ARG1$ $ARG2$
} define command {
command_name check_dig command_line $USER1$ /check_dig -H $HOSTADDRESS$ -T $ARG4$ -l $ARG1$ -w $ARG2$ -c $ARG3$
} define command {
command_name process-host-perfdata command_line /usr/bin/printf "%b" "$LASTHOSTCHECK$\t$HOSTNAME$\t$HOSTSTATE$\t$HOSTATTEMPT$\t$HOSTSTATETYPE$\t$HOSTEXECUTIONTIME$\t$HOSTOUTPUT$\t$HOSTPERFDATA$\n" >> /usr/local/nagios/var/host-perfdata .out
} define command {
command_name process-service-perfdata command_line /usr/bin/printf "%b" "$LASTSERVICECHECK$\t$HOSTNAME$\t$SERVICEDESC$\t$SERVICESTATE$\t$SERVICEATTEMPT$\t$SERVICESTATETYPE$\t$SERVICEEXECUTIONTIME$\t$SERVICELATENCY$\t$SERVICEOUTPUT$\t$SERVICEPERFDATA$\n" >> /usr/local/nagios/var/service-perfdata .out
} define command {
command_name check_nrpe command_line $USER1$ /check_nrpe -H $HOSTADDRESS$ -c $ARG1$
} |
contacts.cfg的内容如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
[root@yw_monitor_db objects] # cat contacts.cfg |grep -v "#" | awk "NF"
define contact{ use generic-contact-163 contact_name zhaohaijun-mail alias zhaohaijun-mail
email navyaijm@sina.com } define contact{ use generic-contact-164 contact_name navyaijm-call alias navyaijm-call
email 18630152179 } define contactgroup{ contactgroup_name ops_admin alias ops_admin
members navyaijm-call,navyaijm-mail } |
templates.cfg的内容如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
[root@yw_monitor_db objects] # cat templates.cfg |grep -v "#" | awk "NF"
define contact{ name generic-contact ; The name of this contact template service_notification_period 24x7 ; service notifications can be sent anytime host_notification_period 24x7 ; host notifications can be sent anytime service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events
host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events
service_notification_commands notify-service-by-email ; send service notifications via email host_notification_commands notify-host-by-email ; send host notifications via email register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE! } define contact{ name generic-contact-163 ; The name of this contact template service_notification_period 24x7 ; service notifications can be sent anytime host_notification_period 24x7 ; host notifications can be sent anytime service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events
host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events
service_notification_commands notify-service-by-email-163 ; send service notifications via email host_notification_commands notify-host-by-email ; send host notifications via email register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE! } define contact{ name generic-contact-164 ; The name of this contact template service_notification_period 24x7 ; service notifications can be sent anytime host_notification_period 24x7 ; host notifications can be sent anytime service_notification_options w,u,c,r,f,s ; send notifications for all service states, flapping events, and scheduled downtime events
host_notification_options d,u,r,f,s ; send notifications for all host states, flapping events, and scheduled downtime events
service_notification_commands notify-service-by-email-164 ; send service notifications via email host_notification_commands notify-host-by-email ; send host notifications via email register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL CONTACT, JUST A TEMPLATE! } define host{ name generic-host ; The name of this host template notifications_enabled 1 ; Host notifications are enabled event_handler_enabled 1 ; Host event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts notification_period 24x7 ; Send host notifications at any time
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define host{ name linux-server ; The name of this host template use generic-host ; This template inherits other values from the generic-host template check_period 24x7 ; By default, Linux hosts are checked round the clock check_interval 5 ; Actively check the host every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals active_checks_enabled 0 max_check_attempts 10 ; Check each Linux host 10 times (max)
check_command check-host-alive ; Default command to check Linux hosts
notification_period workhours ; Linux ops_admin hate to be woken up, so we only notify during the day ; Note that the notification_period variable is being overridden from ; the value that is inherited from the generic-host template! notification_interval 120 ; Resend notifications every 2 hours notification_options d,u,r ; Only send notifications for specific host states
contact_groups ops_admin ; Notifications get sent to the ops_admin by default register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define host{ name windows-server ; The name of this host template use generic-host ; Inherit default values from the generic-host template check_period 24x7 ; By default, Windows servers are monitored round the clock check_interval 5 ; Actively check the server every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each server 10 times (max)
check_command check-host-alive ; Default command to check if servers are "alive"
notification_period 24x7 ; Send notification out at any time - day or night
notification_interval 30 ; Resend notifications every 30 minutes notification_options d,r ; Only send notifications for specific host states
contact_groups ops_admin ; Notifications get sent to the ops_admin by default hostgroups windows-servers ; Host groups that Windows servers should be a member of
register 0 ; DONT REGISTER THIS - ITS JUST A TEMPLATE } define host{ name generic-printer ; The name of this host template use generic-host ; Inherit default values from the generic-host template check_period 24x7 ; By default, printers are monitored round the clock check_interval 5 ; Actively check the printer every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each printer 10 times (max)
check_command check-host-alive ; Default command to check if printers are "alive"
notification_period workhours ; Printers are only used during the workday notification_interval 30 ; Resend notifications every 30 minutes notification_options d,r ; Only send notifications for specific host states
contact_groups ops_admin ; Notifications get sent to the ops_admin by default register 0 ; DONT REGISTER THIS - ITS JUST A TEMPLATE } define host{ name generic-switch ; The name of this host template use generic-host ; Inherit default values from the generic-host template check_period 24x7 ; By default, switches are monitored round the clock check_interval 5 ; Switches are checked every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 10 ; Check each switch 10 times (max)
check_command check-host-alive ; Default command to check if routers are "alive"
notification_period 24x7 ; Send notifications at any time
notification_interval 30 ; Resend notifications every 30 minutes notification_options d,r ; Only send notifications for specific host states
contact_groups ops_admin ; Notifications get sent to the ops_admin by default register 0 ; DONT REGISTER THIS - ITS JUST A TEMPLATE } define service{ name generic-service ; The 'name' of this service template
active_checks_enabled 1 ; Active service checks are enabled passive_checks_enabled 1 ; Passive service checks are enabled /accepted
parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems) obsess_over_service 1 ; We should obsess over this service ( if necessary)
check_freshness 0 ; Default is to NOT check service 'freshness'
notifications_enabled 1 ; Service notifications are enabled event_handler_enabled 1 ; Service event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts is_volatile 0 ; The service is not volatile check_period 24x7 ; The service can be checked at any time of the day
max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state
normal_check_interval 2 ; Check the service every 10 minutes under normal conditions retry_check_interval 1 ; Re-check the service every two minutes until a hard state can be determined
contact_groups ops_admin ; Notifications get sent out to everyone in the 'ops_admin' group
notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events notification_interval 5 ; Re-notify about service problems every hour notification_period 24x7 ; Notifications can be sent out at any time
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! } define service{ name local -service ; The name of this service template
use generic-service ; Inherit default values from the generic-service definition max_check_attempts 4 ; Re-check the service up to 4 times in order to determine its final (hard) state
normal_check_interval 5 ; Check the service every 5 minutes under normal conditions retry_check_interval 1 ; Re-check the service every minute until a hard state can be determined
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! } |
本文出自 “屌丝运维男” 博客,请务必保留此出处http://navyaijm.blog.51cto.com/4647068/1263912