# awk 练习
# 计算成绩平均值
# 需求
自动计算并生成成绩报表,要求少量修改适配任意新增减少学科类目,并计算出每个人的成绩总和和平均值以及展示出各个学科的平均分
student.txt
成绩表
Allen 80 90 96 98
Mike 93 98 92 91
Zhang 78 76 87 92
Jerry 86 89 68 92
Han 85 95 75 90
Li 78 88 98 100
2012级3班
student.sh
awk -f student.awk student.txt
# 超参数:(受学科长度影响)
# BEGIN的subjectStr(标题)
# BEGIN的subjectIndexArr(根据学科前面有几个字段变化,目前只有一个name)
# 匹配模式中的NF(过滤是否为成绩列)
# {command}中的printf(根据逻辑是否匹配总分、平均分或者其他)
# {command}中的subLen(根据txt文档变化)
BEGIN {
printf "%50s\n\n", "2012级调考成绩"
subjectStr="Yuwen Math English Pysical"
split(subjectStr,subjectArr)
printf "%-12s", "name"
for(i=1;i<=length(subjectArr);i++){
printf "%-12s", subjectArr[i]
subjectIndexArr[subjectArr[i]] = i + 1 # 学科成绩所对应的$n数组
}
printf "%-12s%-12s\n", "Sum","Average"
}
$0!~/^$/ && NF>=3 {
totalRowCount++ # 匹配行的总行数计算
for(i=2;i<=NF;i++){
total+=$i
}
for(str in subjectArr) {
fieldStr=subjectArr[str]
subTotalArr[fieldStr]+=$subjectIndexArr[fieldStr] # 各学科成绩和累加
}
subLen = NF - 1
AVG=total/subLen
for(i=1;i<=NF;i++){
if(i == 1) {
printf "%-12s", $1
} else {
printf "%-12d", $i
}
}
printf "%-12.1f%.2f\n",total,AVG
total=0
}
END {
print "\n各科平均分计算:"
for(str in subjectArr) {
fieldStr=subjectArr[str]
printf "%-12s%.2f\n", fieldStr":",subTotalArr[fieldStr]/totalRowCount
}
printf "%100s\n", "-- MADE IN Ya an ljc LOVE cw"
}
2012级调考成绩
name Yuwen Math English Pysical Sum Average
Allen 80 90 96 98 364.0 91.00
Mike 93 98 92 91 374.0 93.50
Zhang 78 76 87 92 333.0 83.25
Jerry 86 89 68 92 335.0 83.75
Han 85 95 75 90 345.0 86.25
Li 78 88 98 100 364.0 91.00
各科平均分计算:
Math: 89.33
English: 86.00
Pysical: 93.83
Yuwen: 83.33
-- MADE IN Ya an ljc LOVE cw
# 查看 tcp 中不同类型连接个数
netstat -an | grep "^tcp" | awk -f array.awk
{
array[$6]++ # 以$6的名称作为数组的下标,然后自增
}
END {
for(a in array) {
print a,array[a]
}
}
LISTEN 19
ESTABLISHED 17
# 统计日志中的数据示例
# 需求
统计出日志数据中每个用户成功插入数据、失败数据、总计数据以及所有用户插入、失败、总计数据。列出丢失数据的条目,输出格式如下
User Total Success Failed
han 3239949 1654359 1585590
tracy 3189298 1536173 1653125
有数据丢失的条目:
2020-07-20 11:23:00 7 Batches: user allen insert 26184 records into database:product table:detail, insert 16106 records successfully,failed 10077 records
# 生成模拟数据
模拟数据格式
2020-07-20 11:23:00 1 Batches: user allen insert 21906 records into database:product table:detail, insert 5661 records successfully,failed 16244 records
2020-07-20 11:23:00 2 Batches: user mike insert 2060 records into database:product table:detail, insert 821 records successfully,failed 1239 records
2020-07-20 11:23:00 3 Batches: user jerry insert 11310 records into database:product table:detail, insert 7741 records successfully,failed 3569 records
执行模拟数据生成脚本
sh insert.sh
# 实现
sh db.sh 20200720
db.sh
awk -f db.awk db.log.$1
db.awk
BEGIN {
printf "%-12s%-12s%-12s%-12s\n", "User","Total","Success","Failed"
}
{
Success[$6]+=$14
SuccessAll+=$14
Failed[$6]+=$17
FailedAll+=$17
Total[$6]+=$8
TotalAll+=$8
if($8!=$14+$17) {
missData[NR]=$0
}
}
END {
for (user in Success) {
printf "%-12s%-12s%-12s%-12s\n", user, Total[user], Success[user], Failed[user]
}
printf "%-12s%-12s%-12s%-12s\n\n", "", TotalAll , SuccessAll, FailedAll
print "有数据丢失的条目:"
for(i in missData){
print missData[i]
}
}
输出
User Total Success Failed
han 3239949 1654359 1585590
tracy 3189298 1536173 1653125
jerry 3138809 1580795 1558014
mike 3022417 1485509 1536916
allen 3281105 1585843 1695259
lilei 2853588 1547752 1305836
18725166 9390431 9334740
有数据丢失的条目:
2020-07-20 11:23:00 7 Batches: user allen insert 26184 records into database:product table:detail, insert 16106 records successfully,failed 10077 records
2020-07-20 11:23:01 31 Batches: user allen insert 3188 records into database:product table:detail, insert 776 records successfully,failed 2411 records
2020-07-20 11:23:00 20 Batches: user mike insert 30701 records into database:product table:detail, insert 5667 records successfully,failed 25042 records
2020-07-20 11:23:00 1 Batches: user allen insert 21906 records into database:product table:detail, insert 5661 records successfully,failed 16244 records