hadoopMR统计分析日志脚本一例

#! /bin/sh

公司主营业务:成都网站设计、网站建设、移动网站开发等业务。帮助企业客户真正实现互联网宣传,提高企业的竞争能力。创新互联公司是一支青春激扬、勤奋敬业、活力青春激扬、勤奋敬业、活力澎湃、和谐高效的团队。公司秉承以“开放、自由、严谨、自律”为核心的企业文化,感谢他们对我们的高要求,感谢他们从不同领域给我们带来的挑战,让我们激情的团队有机会用头脑与智慧不断的给客户带来惊喜。创新互联公司推出乌鲁木齐免费做网站回馈大家。

############################

#split today and yesterday

for i in $(seq 10)

do

  echo " " >> /u1/hadoop-stat/stat.log

done

echo "begin["`date "+%Y-%m-%d" -d "-1 days"`"]" >> /u1/hadoop-stat/stat.log

############################

#remove file

function removeFilepathNotCurrentMonth(){

month=`date "+%Y-%m" -d "-1 days"`

for file in ` ls $1 `

do

if [ "$month" != "$file" ]; then

rm -rf $1"/"$file

fi

done

}

GYLOG_PATH="/u1/hadoop-stat/gylog"

NGINXLOG_PATH="/u1/hadoop-stat/nginxlog"

echo "begin remove gylogpath's files not in current month" >> /u1/hadoop-stat/stat.log

removeFilepathNotCurrentMonth $GYLOG_PATH

echo "begin remove nginxlogpath's files not in current month" >> /u1/hadoop-stat/stat.log

removeFilepathNotCurrentMonth $NGINXLOG_PATH

############################

#scp file between hosts

day=`date "+%Y-%m-%d" -d "-1 days"`

month=`date "+%Y-%m" -d "-1 days"`

gyfilename="gylog-"$day".log"

gyfilepath=$GYLOG_PATH"/"$month

if [ ! -d "$gyfilepath" ]; then

mkdir "$gyfilepath"

fi

if [ ! -f "$gyfilepath/$gyfilename" ]; then

echo "begin scp gylog" >> /u1/hadoop-stat/stat.log

scp gy02:/u1/logs/gylog/$gyfilename $gyfilepath/

fi

nginxfilename="ngxinlog-"$day".log"

nginxfilepath=$NGINXLOG_PATH"/"$month

if [ ! -d "$nginxfilepath" ]; then

mkdir "$nginxfilepath"

fi

if [ ! -f "$nginxfilepath/$nginxfilename" ]; then

echo "begin scp nginxlog" >> /u1/hadoop-stat/stat.log

scp gy01:/u1/logs/lbnginx/gy_access.log.1 $nginxfilepath/

mv $nginxfilepath/gy_access.log.1 $nginxfilepath/$nginxfilename

fi

###########################

#copy file to hadoop

GYLOG_HADOOP_PATH="/logs/gylog"

NGINXLOG_HADOOP_PATH="/logs/nginxlog"

monthhadoop=`date "+%Y-%m-%d" -d "-1 days"`

gyhadoopfilepath=$GYLOG_HADOOP_PATH"/"$monthhadoop

gyhadoopfilepathinput=$gyhadoopfilepath"/input"

gyhadoopfilepathoutput=$gyhadoopfilepath"/output"

/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $gyhadoopfilepath

if [ $? -ne 0 ]; then

echo "begin mkdir gyhadoopfilepath in hadoop because of not exist:"$gyhadoopfilepath >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $gyhadoopfilepath

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $gyhadoopfilepathinput

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $gyhadoopfilepathoutput

fi

/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $gyhadoopfilepathinput/$gyfilename

if [ $? -ne 0 ]; then

echo "begin copy gyhadoopfile to hadoop" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -copyFromLocal $gyfilepath/$gyfilename $gyhadoopfilepathinput/

fi

nginxhadoopfilepath=$NGINXLOG_HADOOP_PATH"/"$monthhadoop

nginxhadoopfilepathinput=$nginxhadoopfilepath"/input"

nginxhadoopfilepathoutput=$nginxhadoopfilepath"/output"

/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $nginxhadoopfilepath

if [ $? -ne 0 ]; then

echo "begin mkdir nginxhadoopfilepath in hadoop because of not exist:"$nginxhadoopfilepath >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $nginxhadoopfilepath

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $nginxhadoopfilepathinput

/u1/hadoop-1.0.1/bin/hadoop dfs -mkdir $nginxhadoopfilepathoutput

fi

/u1/hadoop-1.0.1/bin/hadoop dfs -test -e $nginxhadoopfilepathinput/$nginxfilename

if [ $? -ne 0 ]; then

echo "begin copy nginxhadoopfile to hadoop" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop dfs -copyFromLocal $nginxfilepath/$nginxfilename $nginxhadoopfilepathinput/

fi

##########################

#begin hadoop stat

#echo "begin hadoop stat RequestTimeCount" >> /u1/hadoop-stat/stat.log

#/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.requestTime.RequestTimeCount $day

#echo "begin hadoop stat RequestCount" >> /u1/hadoop-stat/stat.log

#/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.request.RequestCount $day

echo "begin hadoop stat NginxCount" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.nginx.NginxCount $day

echo "begin hadoop stat GylogCount" >> /u1/hadoop-stat/stat.log

/u1/hadoop-1.0.1/bin/hadoop jar /u1/hadoop-stat/stat.jar gy.log.mr.gylog.GylogCount $day

##########################

#end for all

echo "end["`date "+%Y-%m-%d" -d "-1 days"`"]" >> /u1/hadoop-stat/stat.log

注:

/u1/hadoop-stat/stat.jar gy.log.mr.request.RequestCount

/u1/hadoop-stat/stat.jar gy.log.mr.nginx.NginxCount

/u1/hadoop-stat/stat.jar gy.log.mr.gylog.GylogCount

上面的mr是自定义的统计规则,可根据自己的需求开发

其他更多的主要是使用了hadoop的基本命令,相信大家了解hadoop的也很容易就能看懂的。


网页标题:hadoopMR统计分析日志脚本一例
文章网址:http://ybzwz.com/article/ghojhi.html