rpm -ivh cdh3-repository-1.0-1.noarch.rpm
yum install hadoop-0.20-conf-pseudo hadoop-0.20-native
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://namenode.mobcon.inside:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/var/lib/hadoop-0.20/cache/${user.name}</value>
</property>
<!-- OOZIE proxy user setting
<property>
<name>hadoop.proxyuser.oozie.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.oozie.groups</name>
<value>*</value>
</property>
-->
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!-- specify this so that running 'hadoop namenode -format' formats the right dir
<property>
<name>dfs.name.dir</name>
<value>/var/lib/hadoop-0.20/cache/hadoop/dfs/name</value>
</property>
-->
<!-- Enable Hue Plugins
<property>
<name>dfs.namenode.plugins</name>
<value>org.apache.hadoop.thriftfs.NamenodePlugin</value>
<description>Comma-separated list of namenode plug-ins to be activated.
</description>
</property>
<property>
<name>dfs.datanode.plugins</name>
<value>org.apache.hadoop.thriftfs.DatanodePlugin</value>
<description>Comma-separated list of datanode plug-ins to be activated.
</description>
</property>
<property>
<name>dfs.thrift.address</name>
<value>0.0.0.0:10090</value>
</property>
-->
</configuration>
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>namenode.mobcon.inside:8021</value>
</property>
<!-- Enable Hue plugins
<property>
<name>mapred.jobtracker.plugins</name>
<value>org.apache.hadoop.thriftfs.ThriftJobTrackerPlugin</value>
<description>Comma-separated list of jobtracker plug-ins to be activated.
</description>
</property>
<property>
<name>jobtracker.thrift.address</name>
<value>0.0.0.0:9290</value>
</property>
-->
</configuration>
dark94025.mobcon.inside
dark94026.mobcon.inside
dark94027.mobcon.inside
dark94028.mobcon.inside
dark94029.mobcon.inside
dark94030.mobcon.inside
dark94031.mobcon.inside
dark94032.mobcon.inside
dark94033.mobcon.inside
dark94034.mobcon.inside
dark94035.mobcon.inside
service hadoop-0.20-namenode start
service hadoop-0.20-datanode start
su - hdfs
hadoop jar /usr/lib/hadoop/hadoop-examples.jar pi 4 2000
快速安装手顺,如果你刚接触hadoop请不要参考这篇文章,这是我工作中的命令手顺对新手没有任何帮助.
rpm -Uvh http://download.fedora.redhat.com/pub/epel/5/x86_64/epel-release-5-3.noarch.rpm
rpm -Uvh http://apt.sw.be/redhat/el5/en/x86_64/RPMS.dag/rpmforge-release-0.3.6-1.el5.rf.x86_64.rpm
rm -rf /etc/munin/plugins/sendmail_mail*
rm -rf /etc/munin/plugins/netstat
rm -rf /etc/munin/plugins/interrupts
rm -rf /etc/munin/plugins/irqstats
rm -rf /etc/munin/plugins/entropy
rm -rf /etc/munin/plugins/nfs_client
rm -rf /etc/munin/plugins/ntp_offset
sed -i '37s/.*/allow ^10\\\.40\\\.0\\\.2$/' /etc/munin/munin-node.conf
service munin-node restart
yum -y install nagios-plugins-nrpe.x86_64 nagios-nrpe.x86_64
yum -y install nagios-plugins-*
yum -y install nrpe.x86_64
sed -i '79s/.*/allowed_hosts=127\.0\.0\.1,10\.40\.0\.2/' /etc/nagios/nrpe.cfg
sed -i '204s/.*/command[check_ssh]=\/usr\/lib64\/nagios\/plugins\/check_ssh -H 127\.0\.0\.1/' /etc/nagios/nrpe.cfg
service nrpe start
yum -y install ncftp
sed -i '77s/.*/yepn ALL=(ALL) ALL/' /etc/sudoers
rpm -Uvh /tmp/jdk-6u17-linux-amd64.rpm
yum -y install compat-libstdc++-33.i386 compat-libstdc++-296.i386
wget http://ftp.riken.jp/net/apache/ant/binaries/apache-ant-1.8.0RC1-bin.tar.gz
tar xvzf apache-ant-1.8.0RC1-bin.tar.gz
mv apache-ant-1.8.0RC1 /usr/local/
ln -s /usr/local/apache-ant-1.8.0RC1 /usr/local/ant
wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.03.tar.gz
tar xvzf lzo-2.03.tar.gz && cd lzo-2.03 && ./configure --enable-shared && make && make install
mv /usr/bin/ant /usr/bin/ant.org
ln -s /usr/local/ant/bin/ant /usr/bin/ant
cd ~/
wget http://archive.cloudera.com/redhat/cdh/cloudera-testing.repo
mv cloudera-testing.repo /etc/yum.repos.d/
yum -y install hadoop-0.20 hadoop-0.20-native hadoop-0.20-namenode hadoop-0.20-secondarynamenode \
hadoop-0.20-datanode hadoop-0.20-jobtracker hadoop-0.20-tasktracker hadoop-0.20-docs
chkconfig hadoop-0.20-jobtracker off
chkconfig hadoop-0.20-namenode off
chkconfig hadoop-0.20-secondarynamenode off
for i in `chkconfig --list|grep hbase|awk '{print $1}'`; do chkconfig $i off; done
echo "# /home/yepn/.bash_profile" > /home/yepn/.bash_profile
echo "" >> /home/yepn/.bash_profile
echo "# Get the aliases and functions" >> /home/yepn/.bash_profile
echo "if [ -f ~/.bashrc ]; then" >> /home/yepn/.bash_profile
echo " . ~/.bashrc" >> /home/yepn/.bash_profile
echo "fi" >> /home/yepn/.bash_profile
echo "" >> /home/yepn/.bash_profile
echo "# User specific environment and startup programs" >> /home/yepn/.bash_profile
echo "export ANT_HOME=/usr/local/ant" >> /home/yepn/.bash_profile
echo "PATH=/usr/java/bin:$PATH:$HOME/bin:$ANT_HOME/bin" >> /home/yepn/.bash_profile
echo "LD_LIBRARY_PATH=/usr/local/lib" >> /home/yepn/.bash_profile
echo "JAVA_HOME=/usr/java/latest" >> /home/yepn/.bash_profile
echo "" >> /home/yepn/.bash_profile
echo "export LD_LIBRARY_PATH" >> /home/yepn/.bash_profile
echo "export JAVA_HOME" >> /home/yepn/.bash_profile
echo "export PATH" >> /home/yepn/.bash_profile
su - yepn
git clone git://github.com/kevinweil/hadoop-lzo.git
cd hadoop-lzo
ant compile-native tar
exit
cd /home/yepn/hadoop-lzo
cp build/hadoop-lzo-0.3.0/hadoop-lzo-0.3.0.jar /usr/lib/hadoop-0.20/lib
tar -cBf - -C build/hadoop-lzo-0.3.0/lib/native . | tar -xBvf - -C /usr/lib/hadoop-0.20/lib/native
/etc/init.d/hadoop-0.20-datanode start
/etc/init.d/hadoop-0.20-tasktracker start
service munin-node restart
#最後実行
mkdir /hdfs/data
mkdir /hdfs/local
mkdir /hdfs/logs
mkdir /hdfs/name
mkdir /hdfs/pids
mkdir /hdfs/system
mkdir /hdfs/tmp
chmod -R 777 /hdfs
cd /etc/hadoop-0.20/
tar -zxvf /tmp/conf.yepn.tgz
rm -rf /etc/alternatives/hadoop-0.20-conf
ln -s /etc/hadoop-0.20/conf.yepn /etc/alternatives/hadoop-0.20-conf
/etc/init.d/hadoop-0.20-datanode restart
/etc/init.d/hadoop-0.20-tasktracker restart
hadoop dfs -ls
git clone git://github.com/kevinweil/hadoop-lzo.git
cd hadoop-lzo
ant compile-native tar
su
cp build/hadoop-lzo-0.3.0/hadoop-lzo-0.3.0.jar /usr/lib/hadoop-0.20/lib
tar -cBf - -C build/hadoop-lzo-0.3.0/lib/native . | tar -xBvf - -C /usr/lib/hadoop-0.20/lib/native
/etc/init.d/hadoop-0.20-datanode start
/etc/init.d/hadoop-0.20-tasktracker start
#mapper.py
#!/usr/bin/env python
import sys
# input comes from STDIN (standard input)
for line in sys.stdin:
# remove leading and trailing whitespace
line = line.strip()
# split the line into words
words = line.split()
# increase counters
for word in words:
# write the results to STDOUT (standard output);
# what we output here will be the input for the
# Reduce step, i.e. the input for reducer.py
#
# tab-delimited; the trivial word count is 1
print '%s\t%s' % (word, 1)#reducer.py
#!/usr/bin/env python
from operator import itemgetter
import sys
# maps words to their counts
word2count = {}
# input comes from STDIN
for line in sys.stdin:
# remove leading and trailing whitespace
line = line.strip()
# parse the input we got from mapper.py
word, count = line.split('\t', 1)
# convert count (currently a string) to int
try:
count = int(count)
word2count[word] = word2count.get(word, 0) + count
except ValueError:
# count was not a number, so silently
# ignore/discard this line
pass
# sort the words lexigraphically;
#
# this step is NOT required, we just do it so that our
# final output will look more like the official Hadoop
# word count examples
sorted_word2count = sorted(word2count.items(), key=itemgetter(0))
# write the results to STDOUT (standard output)
for word, count in sorted_word2count:
print '%s\t%s'% (word, count)
#hadoop jar /usr/lib/hadoop-0.20/contrib/streaming/hadoop-0.20.1+152-streaming.jar -file ./mapper.py \
-mapper ./mapper.py -file ./reducer.py -reducer ./reducer.py -input gutenberg/* \
-output getenberg-output
###输出结果
#hadoop dfs -ls getenberg-output
Found 6 items
drwxr-xr-x - atlantis supergroup 0 2010-02-02 15:41 /user/atlantis/getenberg-output/_logs
-rw-r--r-- 1 atlantis supergroup 66618 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00000
-rw-r--r-- 1 atlantis supergroup 68868 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00001
-rw-r--r-- 1 atlantis supergroup 66862 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00002
-rw-r--r-- 1 atlantis supergroup 68264 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00003
-rw-r--r-- 1 atlantis supergroup 66828 2010-02-02 15:41 /user/atlantis/getenberg-output/part-00004
###查看计算内容
#hadoop dfs -cat getenberg-output/part-00000
###显示结果略