DAY8whereis ，which，sort，uniq，cut，wc，awk基本用法

which whereis

# which ls #查找ls命令的绝对路径

# whereis ls #查找命令的路径、帮助手册、等

# whereis -b ls #仅显示命令所在的路径

# type -a ls #查看命令的绝对路径(包括别名)

sort [OPTION]... [FILE]...

命令：sort

选项：-r：倒序

-n：按数字排序

-t：指定分隔符(默认空格)

-k：指定第几列, 指定几列几字符（指定1,1 3.1,3.3）

cat >> file.txt <<EOF

b:3

c:2

a:4

e:5

d:1

f:11

EOF

cat >> file2.txt <<EOF

oldxu:20

oldguo:10

oldli:30

oldboy:0

EOF

[root@oldboy ~]# sort -t ":" -k2 -nr file2.txt | head -3

oldli:30

oldxu:20

oldguo:10

[root@oldboy ~]# wget http://fj.xuliangwei.com/public/ip.txt

[root@oldboy ~]# sort -t "." -k3.1,3.1 -k4.1,4.3 -n ip.txt

uniq [OPTION]... [INPUT [OUTPUT]]

#选项：-c 计算重复的行

cat >>file3.txt <<EOF

abc

123

abc

123

EOF

实现思路：先排序，然后去重，最后统计

[root@oldboy ~]# sort file3.txt #必须先排序（不排序相同的内容不再一起，无法去重）

123

abc

[root@oldboy ~]#

[root@oldboy ~]# sort file3.txt | uniq #去重

123

abc

[root@oldboy ~]# sort file3.txt | uniq -c #统计出现的次数

2 123

2 abc

#面试题: 请统计分析如下日志，打印出访问最高前3的IP

[root@oldboy ~]# cat >>ip2.txt<<EOF

192.168.3.1

192.168.3.2

192.168.3.3

192.168.2.20

192.168.2.21

192.168.2.22

192.168.0.151

192.168.0.152

192.168.0.153

192.168.0.151

192.168.2.22

192.168.1.10

192.168.1.11

192.168.1.12

192.168.0.151

192.168.1.1

192.168.2.2

192.168.0.151

192.168.3.3

192.168.2.20

192.168.1.21

192.168.0.151

192.168.2.22

192.168.0.151

192.168.2.22

192.168.1.152

192.168.0.153

192.168.3.10

192.168.1.11

192.168.2.22

192.168.3.12

EOF

[root@oldboy ~]# sort ip2.txt | uniq -c | sort -nr | head -3

7 192.168.0.151

5 192.168.2.22

2 192.168.3.3

cut OPTION... [FILE]...

#选项：

-d 指定分隔符

-f 数字,取第几列 –f3,6三列和6列

-c 按字符取(空格也算)

1.产生文件

[root@oldboy ~]# echo "Im oldxu, is QQ 552408925" > oldboy.txt

2.需求：过滤出oldboy.txt文件里 oldxu以及552408925

3.如何实现：

实现方法1：

[root@oldboy ~]# cut -d " " -f 2,5 oldboy.txt

oldxu, 552408925

[root@oldboy ~]# cut -d " " -f 2,5 oldboy.txt | sed 's#,##g'

oldxu 552408925

实现方法2：

[root@oldboy ~]# awk '{print $2,$5}' oldboy.txt

oldxu, 552408925

[root@oldboy ~]# awk '{print $2,$5}' oldboy.txt | sed 's#,##g'

oldxu 552408925

实现方法3： awk处理

[root@oldboy ~]# awk -F "," '{print $1,$2}' oldboy.txt | awk '{print $2,$5}'

oldxu 552408925

[root@oldboy ~]# awk -F "[ ,]" '{print $2,$6}' oldboy.txt

oldxu 552408925

高级用法

[ ,]+ +表示重复前面的字符一次或多次

空格算一个分隔符

逗号算一个分隔符

空格和逗号挨在一起，也算一个分隔符

空格逗号空格，全算一个分隔符

[root@oldboy ~]# awk -F "[ ,]+" '{print $2,$5}' oldboy.txt

oldxu 552408925

wc [OPTION]... [FILE]...

#选项：

-l显示文件行数

-c显示文件字节

-w显示文件单词

方法1：统计一个文件有多少行，使用wc -l

[root@oldboy ~]# wc -l /etc/services

11176 /etc/services

方法2：

[root@oldboy ~]# cat -n /etc/services | tail -1 | awk '{print $1}'

11176

方法3：

[root@oldboy ~]# grep -n ".*" /etc/services | tail -1 | awk -F ":" '{print $1}'

11176

方法4：仅供参考！！！！！！！不纠结

NR: 行号

$0: awk是逐行处理文件的，读入一行，然后将一行的内容赋值给$0变量，

[root@oldboy ~]# awk '{print NR,$0}' /etc/services | tail -1 | awk '{print $1}'

11176

#练习题: 过滤出/etc/passwd以nologin结尾的内容，并统计有多少行

过滤： grep /etc/passwd

条件： nologin结尾的

并且：

统计出现的内容总共有多少行： wc -l

[root@oldboy ~]# grep "nologin$" /etc/passwd | wc -l

习题: 分析如下日志，统计每个域名被访问的次数。

[root@student tmp]# cat >>web.log<<EOF

http://www.xuliangwei.com/index.html

http://www.xuliangwei.com/1.html

http://post.xuliangwei.com/index.html

http://mp3.xuliangwei.com/index.html

http://www.xuliangwei.com/3.html

http://post.xuliangwei.com/2.html

EOF

实现：

[root@oldboy ~]# awk -F '/' '{print $3}' web.log | sort | uniq -c

1 mp3.xuliangwei.com

2 post.xuliangwei.com

3 www.xuliangwei.com

将访问次数最多的排在上面

[root@oldboy ~]# awk -F '/' '{print $3}' web.log | sort | uniq -c | sort -nr

3 www.xuliangwei.com

2 post.xuliangwei.com

1 mp3.xuliangwei.com

DAY8whereis ，which，sort，uniq，cut，wc，awk基本用法

推荐阅读更多精彩内容