cat CARGO3.db
ThinkPad:USA:14000:2009:X301
ThinkPad:USA:14000:2009:X301
ThinkPad:USA:14000:2009:X301
HP:China:5600:2010:DM3
SumSung:korea:5400:2009:Q308
ThinkPad:USA:14000:2009:X301
LdeaPad:China:8000:2007:U450
Acer:*:8000:2010:PT210
Acer:*:8000:2010:PT210
uniq CARGO3.db #用uniq命令去除重复行
ThinkPad:USA:14000:2009:X301
HP:China:5600:2010:DM3
SumSung:korea:5400:2009:Q308
ThinkPad:USA:14000:2009:X301
LdeaPad:China:8000:2007:U450
Acer:*:8000:2010:PT210
sort -u CARGO3.db #用uniq命令去除重复行,并进行排序
Acer:*:8000:2010:PT210
HP:China:5600:2010:DM3
LdeaPad:China:8000:2007:U450
SumSung:korea:5400:2009:Q308
ThinkPad:USA:14000:2009:X301
uniq -c CARGO3.db #去除重复行,并打印重复行出现的次数
3 ThinkPad:USA:14000:2009:X301
1 HP:China:5600:2010:DM3
1 SumSung:korea:5400:2009:Q308
1 ThinkPad:USA:14000:2009:X301
1 LdeaPad:China:8000:2007:U450
2 Acer:*:8000:2010:PT210
uniq -d CARGO3.db #只显示重复的记录,每个重复的记录只显示一次
ThinkPad:USA:14000:2009:X301
Acer:*:8000:2010:PT210
uniq -u CARGO3.db #不显示重复的记录
HP:China:5600:2010:DM3
SumSung:korea:5400:2009:Q308
ThinkPad:USA:14000:2009:X301
LdeaPad:China:8000:2007:U450
cat WORDLIST
hello, caicai. world: watch, world caicai hello message
message world watch hello into the he she last into.
last save hello caicai, world: message
vi count_word.sh
#!/bin/bash
ARGS=1
E_BADARGS=55
E_NOFILE=56
#以下的if/then结构用于判断执行脚本是否带了输入参数(即需要统计的文件名)
#如果未带输入参数,刚返回55错误码
if [ $# -ne "$ARGS" ]
then
echo "Usage:`basename $0` filename"
exit $E_BADARGS
fi
#以下的if/then结构用于判断在当前目录下,输入的文件名是否存在
#若该文件不存在,则返回56错误码
if [ ! -f "$1" ]
then
echo "File \"$1\" does not exits."
exit $E_NOFILE
fi
#以下是统计文件单词数的核心命令
#sed命令用于过滤句号,逗号,分号,当然可以继续加上需要过滤的符号
#sed命令的第4个-e选项将单词间的空格转化为换行符
#sort对sed过滤后的结果排序,每行一个单词
#uniq -c输出重复行出现的次数,sort -nr按照出现频率从小到大排序
sed -e ‘s/\.//g‘ -e ‘s/\://g‘ -e ‘s/\,//g‘ -e ‘s/ /\n/g‘ "$1" |sort | uniq -c | sort -nr
exit 0