Hadoop

基本安装手顺

快速安装手顺,如果你刚接触hadoop请不要参考这篇文章,这是我工作中的命令手顺对新手没有任何帮助. 
rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/x86_64/epel-release-5-3.noarch.rpm 
rpm -Uvh http://apt.sw.be/redhat/el5/en/x86_64/RPMS.dag/rpmforge-release-0.3.6-1.el5.rf.x86_64.rpm 
rm -rf /etc/munin/plugins/sendmail_mail* 
rm -rf /etc/munin/plugins/netstat 
rm -rf /etc/munin/plugins/interrupts 
rm -rf /etc/munin/plugins/irqstats 
rm -rf /etc/munin/plugins/entropy 
rm -rf /etc/munin/plugins/nfs_client 
rm -rf /etc/munin/plugins/ntp_offset 
sed -i '37s/.*/allow ^10\\\.40\\\.0\\\.2$/' /etc/munin/munin-node.conf 
service munin-node restart 
 
yum -y install nagios-plugins-nrpe.x86_64 nagios-nrpe.x86_64 
yum -y install nagios-plugins-* 
 
yum -y install nrpe.x86_64 
sed -i '79s/.*/allowed_hosts=127\.0\.0\.1,10\.40\.0\.2/' /etc/nagios/nrpe.cfg 
sed -i '204s/.*/command[check_ssh]=\/usr\/lib64\/nagios\/plugins\/check_ssh -H 127\.0\.0\.1/' /etc/nagios/nrpe.cfg 
service nrpe start 
 
yum -y install ncftp 
 
sed -i '77s/.*/yepn        ALL=(ALL)       ALL/' /etc/sudoers 
rpm -Uvh /tmp/jdk-6u17-linux-amd64.rpm 
 
yum -y install compat-libstdc++-33.i386 compat-libstdc++-296.i386 
wget http://ftp.riken.jp/net/apache/ant/binaries/apache-ant-1.8.0RC1-bin.tar.gz 
tar xvzf apache-ant-1.8.0RC1-bin.tar.gz 
mv apache-ant-1.8.0RC1 /usr/local/ 
ln -s /usr/local/apache-ant-1.8.0RC1 /usr/local/ant 
wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.03.tar.gz 
tar xvzf lzo-2.03.tar.gz && cd lzo-2.03 && ./configure --enable-shared && make && make install 
mv /usr/bin/ant /usr/bin/ant.org 
ln -s /usr/local/ant/bin/ant /usr/bin/ant 
cd ~/ 
 
wget http://archive.cloudera.com/redhat/cdh/cloudera-testing.repo 
mv cloudera-testing.repo /etc/yum.repos.d/ 
yum -y install hadoop-0.20 hadoop-0.20-native hadoop-0.20-namenode hadoop-0.20-secondarynamenode \ 
 
hadoop-0.20-datanode hadoop-0.20-jobtracker hadoop-0.20-tasktracker  hadoop-0.20-docs 
 
chkconfig hadoop-0.20-jobtracker off 
chkconfig hadoop-0.20-namenode off 
chkconfig hadoop-0.20-secondarynamenode off 
for i in `chkconfig  --list|grep hbase|awk '{print $1}'`; do chkconfig $i off; done 
 
echo "# /home/yepn/.bash_profile" > /home/yepn/.bash_profile 
echo "" >> /home/yepn/.bash_profile 
echo "# Get the aliases and functions" >> /home/yepn/.bash_profile 
echo "if [ -f ~/.bashrc ]; then" >> /home/yepn/.bash_profile 
echo "        . ~/.bashrc" >> /home/yepn/.bash_profile 
echo "fi" >> /home/yepn/.bash_profile 
echo "" >> /home/yepn/.bash_profile 
echo "# User specific environment and startup programs" >> /home/yepn/.bash_profile 
echo "export ANT_HOME=/usr/local/ant" >> /home/yepn/.bash_profile 
echo "PATH=/usr/java/bin:$PATH:$HOME/bin:$ANT_HOME/bin" >> /home/yepn/.bash_profile 
echo "LD_LIBRARY_PATH=/usr/local/lib" >> /home/yepn/.bash_profile 
echo "JAVA_HOME=/usr/java/latest" >> /home/yepn/.bash_profile 
echo "" >> /home/yepn/.bash_profile 
echo "export LD_LIBRARY_PATH" >> /home/yepn/.bash_profile 
echo "export JAVA_HOME" >> /home/yepn/.bash_profile 
echo "export PATH" >> /home/yepn/.bash_profile 
 
su - yepn 
git clone git://github.com/kevinweil/hadoop-lzo.git 
cd hadoop-lzo 
ant compile-native tar 
 
exit 
cd /home/yepn/hadoop-lzo 
cp build/hadoop-lzo-0.3.0/hadoop-lzo-0.3.0.jar /usr/lib/hadoop-0.20/lib 
tar -cBf - -C build/hadoop-lzo-0.3.0/lib/native . | tar -xBvf - -C /usr/lib/hadoop-0.20/lib/native 
 
 
/etc/init.d/hadoop-0.20-datanode start 
/etc/init.d/hadoop-0.20-tasktracker start 
service munin-node restart 
 
 
#最後実行 
 
mkdir /hdfs/data   
mkdir /hdfs/local   
mkdir /hdfs/logs   
mkdir /hdfs/name   
mkdir /hdfs/pids   
mkdir /hdfs/system   
mkdir /hdfs/tmp 
 
chmod -R 777 /hdfs 
 
cd /etc/hadoop-0.20/ 
tar -zxvf /tmp/conf.yepn.tgz 
rm -rf /etc/alternatives/hadoop-0.20-conf 
ln -s /etc/hadoop-0.20/conf.yepn /etc/alternatives/hadoop-0.20-conf 
/etc/init.d/hadoop-0.20-datanode restart 
/etc/init.d/hadoop-0.20-tasktracker restart 
 
hadoop dfs -ls

库文件安装

git clone git://github.com/kevinweil/hadoop-lzo.git 
cd hadoop-lzo 
ant compile-native tar 
su 
 
cp build/hadoop-lzo-0.3.0/hadoop-lzo-0.3.0.jar /usr/lib/hadoop-0.20/lib 
tar -cBf - -C build/hadoop-lzo-0.3.0/lib/native . | tar -xBvf - -C /usr/lib/hadoop-0.20/lib/native 
 
/etc/init.d/hadoop-0.20-datanode start 
/etc/init.d/hadoop-0.20-tasktracker start

mapreducer python script

#mapper.py 
#!/usr/bin/env python 
 
import sys 
 
# input comes from STDIN (standard input) 
for line in sys.stdin: 
    # remove leading and trailing whitespace 
    line = line.strip() 
    # split the line into words 
    words = line.split() 
    # increase counters 
    for word in words: 
        # write the results to STDOUT (standard output); 
        # what we output here will be the input for the 
        # Reduce step, i.e. the input for reducer.py 
        # 
        # tab-delimited; the trivial word count is 1 
        print '%s\t%s' % (word, 1)
#reducer.py 
#!/usr/bin/env python 
 
from operator import itemgetter 
import sys 
 
# maps words to their counts 
word2count = {} 
 
# input comes from STDIN 
for line in sys.stdin: 
    # remove leading and trailing whitespace 
    line = line.strip() 
 
    # parse the input we got from mapper.py 
    word, count = line.split('\t', 1) 
    # convert count (currently a string) to int 
    try: 
        count = int(count) 
        word2count[word] = word2count.get(word, 0) + count 
    except ValueError: 
        # count was not a number, so silently 
        # ignore/discard this line 
        pass 
 
# sort the words lexigraphically; 
# 
# this step is NOT required, we just do it so that our 
# final output will look more like the official Hadoop 
# word count examples 
sorted_word2count = sorted(word2count.items(), key=itemgetter(0)) 
 
# write the results to STDOUT (standard output) 
for word, count in sorted_word2count: 
    print '%s\t%s'% (word, count)

并行计算

#hadoop jar /usr/lib/hadoop-0.20/contrib/streaming/hadoop-0.20.1+152-streaming.jar -file ./mapper.py \ 
-mapper ./mapper.py  -file ./reducer.py  -reducer ./reducer.py -input gutenberg/* \ 
-output getenberg-output 
###输出结果 
#hadoop dfs -ls getenberg-output 
Found 6 items 
drwxr-xr-x   - atlantis supergroup          0 2010-02-02 15:41 /user/atlantis/getenberg-output/_logs 
-rw-r--r--   1 atlantis supergroup      66618 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00000 
-rw-r--r--   1 atlantis supergroup      68868 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00001 
-rw-r--r--   1 atlantis supergroup      66862 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00002 
-rw-r--r--   1 atlantis supergroup      68264 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00003 
-rw-r--r--   1 atlantis supergroup      66828 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00004 
 
###查看计算内容 
#hadoop dfs -cat getenberg-output/part-00000  
###显示结果略
/home/yepnnet/public_html/wiki/data/pages/hadoop.txt · 最后更改: 2010/02/09 00:24 由 admin
到顶部
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0