Ganglia

自定义模块加载

ambientTemp.conf

modules { 
  module { 
    name = “ambientTemp”     #此处名称要与自定义模块名相同 
    language = "python" 
  } 
} 
 
collection_group { 
  collect_every = 10 
  time_threshold = 50 
  metric { 
    name = "ambientTemp" 
    title = "Ambient Temperature" 
    value_threshold = 70 
  } 
}

ambientTemp.py #模块程序名称

import os 
def temp_handler(name): 
  # our commands we're going to execute 
  sdrfile = "/tmp/sdr.dump" 
  ipmitool = "/usr/bin/ipmitool" 
  # Before you run this Load the IPMI drivers: 
  # modprobe ipmi_msghandler 
  # modprobe ipmi_si 
  # modprobe ipmi_devintf 
  # you'll also need to change permissions of /dev/ipmi0 for nobody 
  # chown nobody:nobody /dev/ipmi0 
  # put the above in /etc/rc.d/rc.local 
 
  foo = os.path.exists(sdrfile) 
  if os.path.exists(sdrfile) != True: 
    os.system(ipmitool + ' sdr dump ' + sdrfile) 
 
  if os.path.exists(sdrfile): 
    ipmicmd = ipmitool + " -S " + sdrfile + " -c sdr" 
  else: 
    print "file does not exist... oops!" 
    ipmicmd = ipmitool + " -c sdr" 
  cmd = ipmicmd + " type temperature | sed 's/ /_/g' " 
  cmd = cmd + " | awk -F, '/Planar/ {print $2}' " 
  #print cmd 
  entries = os.popen(cmd) 
  for l in entries: 
    line = l.split() 
  # print line 
  return int(line[0]) 
 
def metric_init(params): 
    global descriptors 
 
    temp = {'name': 'Ambient Temp', 
        'call_back': temp_handler, 
        'time_max': 90, 
        'value_type': 'uint', 
        'units': 'C', 
        'slope': 'both', 
        'format': '%u', 
        'description': 'Ambient Temperature of host through IPMI', 
        'groups': 'IPMI In Band'} 
 
    descriptors = [temp] 
 
    return descriptors 
 
def metric_cleanup(): 
    '''Clean up the metric module.''' 
    pass 
 
#This code is for debugging and unit testing 
if __name__ == '__main__': 
    metric_init(None) 
    for d in descriptors: 
        v = d['call_back'](d['name']) 
        print 'value for %s is %u' % (d['name'],  v)

command line tips

gmond -d 10   #查看加载模块 so文件 
gmond -m      #自定义模块加载

监视hadoop系统首选,因为hadoop的配置中默认带hadoop-metrics.properties,修改一下就可以用
简单记录一下安装和配置流程

写在最前面,系统为centos 5.4 x86版,其他的linux版本大同小异.

安装

 
#先装这个,如果你装过rpmforge的话,需要先把这个关掉,另外如果你装了rpmforge的rrdtool程序包,请remove,因为ganglia用不了这个高版本的. 
rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/i386/epel-release-5-3.noarch.rpm 

yum -y install ganglia ganglia-devel ganglia-gmetad ganglia-gmond ganglia-web  
#安装这些包,在服务器上,被监控端不需要装这么多 

配置 Server

#vi /etc/gmond.conf 
 globals { 
  host_dmax = 7200  #2小时没反应的机器就认为死掉了 
} 
 
cluster {  
  name = "hadoop"    #看你喜好随意定义 
  owner = "owner"    #看你喜好随意定义 
  latlong = "unspecified"  
  url = "unspecified"  
}  
 
udp_send_channel { 
  mcast_join = [your server IP]  #只修改这个IP就OK了 
  port = 8649 
  ttl = 1 
} 
 
udp_recv_channel { 
/*  mcast_join = 239.2.11.71 
  bind = 239.2.11.71  */        #这两行注释掉 
  port = 8649 
} 
#其余默认就OK 
#service gmond start 
 
#vi /etc/gmetad.conf 
data_source "hadoop cluster" [your server IP]:8649 
service gmetad start 
#数据接受服务器端就OK了 
 
简单测试一下 
 
telnet localhost 8649 
#会显示一片东西

配置 Node

#node就更简单了 
 
yum -y install ganglia-gmond 
sed -i '20s/unspecified/hadoop/' /etc/gmond.conf  
sed -i '21s/unspecified/owner/' /etc/gmond.conf  
sed -i '34s/239.2.11.71/[your server IP]/' /etc/gmond.conf  
sed -i '41s/^/\/*/' /etc/gmond.conf  
sed -i '41s/$/*\//' /etc/gmond.conf  
sed -i '43s/^/\/*/' /etc/gmond.conf  
sed -i '43s/$/*\//' /etc/gmond.conf  
 
把上面那个IP换成你自己的IP执行一下就OK了 
#service gmond start

以上ganglia就配置完成了.
http://your_server_IP/ganglia 可以看到图,其他的不需要你操作

配置 Node的hadoop监控

sed -i '11s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '13s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '14s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '14s/localhost/[your server IP]/' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
 
sed -i '27s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '29s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '30s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '30s/localhost/[your server IP]/' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
 
sed -i '42s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '43s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '44s/^# //' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
sed -i '44s/localhost/[your server IP]/' /etc/hadoop-0.20/conf/hadoop-metrics.properties 
 
echo "" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties 
echo "rpc.class=org.apache.hadoop.metrics.ganglia.GangliaContext" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties 
echo "rpc.period=10" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties 
echo "rpc.servers=[your server IP]:8649" >> /etc/hadoop-0.20/conf/hadoop-metrics.properties 
 
service gmond start 
/etc/init.d/hadoop-0.20-datanode restart 
/etc/init.d/hadoop-0.20-tasktracker restart 
 
上面的代码把IP地址改一下,执行就OK了,服务器端会自动收集数据

快速配置多结点

#从IBM知识网站上看到的,我没用过,修改一下适应大规模自动导入

# for i in `cat /tmp/mynodes`; do  
scp /usr/sbin/gmond $i:/usr/sbin/gmond 
ssh $i mkdir -p /etc/ganglia/ 
scp /etc/ganglia/gmond.conf $i:/etc/ganglia/ 
scp /etc/init.d/gmond $i:/etc/init.d/ 
scp /usr/lib64/libganglia-3.1.1.so.0 $i:/usr/lib64/ 
scp /lib64/libexpat.so.0 $i:/lib64/ 
scp /usr/lib64/libconfuse.so.0 $i:/usr/lib64/ 
scp /usr/lib64/libapr-1.so.0 $i:/usr/lib64/ 
scp -r /usr/lib64/ganglia $i:/usr/lib64/ 
ssh $i service gmond start 
done
/home1/yepnnet/public_html/wiki/data/pages/ganglia.txt · 最后更改: 2010/03/24 20:09 由 admin
到顶部
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0