ambientTemp.conf
modules { module { name = “ambientTemp” #此处名称要与自定义模块名相同 language = "python" } } collection_group { collect_every = 10 time_threshold = 50 metric { name = "ambientTemp" title = "Ambient Temperature" value_threshold = 70 } }
ambientTemp.py #模块程序名称
import os def temp_handler(name): # our commands we're going to execute sdrfile = "/tmp/sdr.dump" ipmitool = "/usr/bin/ipmitool" # Before you run this Load the IPMI drivers: # modprobe ipmi_msghandler # modprobe ipmi_si # modprobe ipmi_devintf # you'll also need to change permissions of /dev/ipmi0 for nobody # chown nobody:nobody /dev/ipmi0 # put the above in /etc/rc.d/rc.local foo = os.path.exists(sdrfile) if os.path.exists(sdrfile) != True: os.system(ipmitool + ' sdr dump ' + sdrfile) if os.path.exists(sdrfile): ipmicmd = ipmitool + " -S " + sdrfile + " -c sdr" else: print "file does not exist... oops!" ipmicmd = ipmitool + " -c sdr" cmd = ipmicmd + " type temperature | sed 's/ /_/g' " cmd = cmd + " | awk -F, '/Planar/ {print $2}' " #print cmd entries = os.popen(cmd) for l in entries: line = l.split() # print line return int(line[0]) def metric_init(params): global descriptors temp = {'name': 'Ambient Temp', 'call_back': temp_handler, 'time_max': 90, 'value_type': 'uint', 'units': 'C', 'slope': 'both', 'format': '%u', 'description': 'Ambient Temperature of host through IPMI', 'groups': 'IPMI In Band'} descriptors = [temp] return descriptors def metric_cleanup(): '''Clean up the metric module.''' pass #This code is for debugging and unit testing if __name__ == '__main__': metric_init(None) for d in descriptors: v = d['call_back'](d['name']) print 'value for %s is %u' % (d['name'], v)
gmond -d 10 #查看加载模块 so文件 gmond -m #自定义模块加载
监视hadoop系统首选,因为hadoop的配置中默认带hadoop-metrics.properties,修改一下就可以用 简单记录一下安装和配置流程
写在最前面,系统为centos 5.4 x86版,其他的linux版本大同小异.
#先装这个,如果你装过rpmforge的话,需要先把这个关掉,另外如果你装了rpmforge的rrdtool程序包,请remove,因为ganglia用不了这个高版本的. rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/i386/epel-release-5-3.noarch.rpm yum -y install ganglia ganglia-devel ganglia-gmetad ganglia-gmond ganglia-web #安装这些包,在服务器上,被监控端不需要装这么多
#vi /etc/gmond.conf globals { host_dmax = 7200 #2小时没反应的机器就认为死掉了 } cluster { name = "hadoop" #看你喜好随意定义 owner = "owner" #看你喜好随意定义 latlong = "unspecified" url = "unspecified" } udp_send_channel { mcast_join = [your server IP] #只修改这个IP就OK了 port = 8649 ttl = 1 } udp_recv_channel { /* mcast_join = 239.2.11.71 bind = 239.2.11.71 */ #这两行注释掉 port = 8649 } #其余默认就OK #service gmond start #vi /etc/gmetad.conf data_source "hadoop cluster" [your server IP]:8649 service gmetad start #数据接受服务器端就OK了 简单测试一下 telnet localhost 8649 #会显示一片东西
#node就更简单了 yum -y install ganglia-gmond sed -i '20s/unspecified/hadoop/' /etc/gmond.conf sed -i '21s/unspecified/owner/' /etc/gmond.conf sed -i '34s/239.2.11.71/[your server IP]/' /etc/gmond.conf sed -i '41s/^/\/*/' /etc/gmond.conf sed -i '41s/$/*\//' /etc/gmond.conf sed -i '43s/^/\/*/' /etc/gmond.conf sed -i '43s/$/*\//' /etc/gmond.conf 把上面那个IP换成你自己的IP执行一下就OK了 #service gmond start
以上ganglia就配置完成了. http://your_server_IP/ganglia 可以看到图,其他的不需要你操作
sed -i '11s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '13s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '14s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '14s/localhost/[your server IP]/' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '27s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '29s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '30s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '30s/localhost/[your server IP]/' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '42s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '43s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '44s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties sed -i '44s/localhost/[your server IP]/' /etc/hadoop-0.20/conf/hadoop-metrics.properties echo "" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties echo "rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties echo "rpc.period=10" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties echo "rpc.servers=[your server IP]:8649" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties service gmond start /etc/init.d/hadoop-0.20-datanode restart /etc/init.d/hadoop-0.20-tasktracker restart 上面的代码把IP地址改一下,执行就OK了,服务器端会自动收集数据
#从IBM知识网站上看到的,我没用过,修改一下适应大规模自动导入
# for i in `cat /tmp/mynodes`; do scp /usr/sbin/gmond $i:/usr/sbin/gmond ssh $i mkdir -p /etc/ganglia/ scp /etc/ganglia/gmond.conf $i:/etc/ganglia/ scp /etc/init.d/gmond $i:/etc/init.d/ scp /usr/lib64/libganglia-3.1.1.so.0 $i:/usr/lib64/ scp /lib64/libexpat.so.0 $i:/lib64/ scp /usr/lib64/libconfuse.so.0 $i:/usr/lib64/ scp /usr/lib64/libapr-1.so.0 $i:/usr/lib64/ scp -r /usr/lib64/ganglia $i:/usr/lib64/ ssh $i service gmond start done