# awk 练习

# 计算成绩平均值

# 需求

自动计算并生成成绩报表,要求少量修改适配任意新增减少学科类目,并计算出每个人的成绩总和和平均值以及展示出各个学科的平均分

student.txt

成绩表

Allen	80	90	96	98
Mike	93	98	92	91
Zhang	78	76	87	92
Jerry	86	89	68	92
Han	85	95	75	90
Li	78	88	98	100

     2012级3班

student.sh

awk -f student.awk student.txt
# 超参数:(受学科长度影响)
#   BEGIN的subjectStr(标题)
#   BEGIN的subjectIndexArr(根据学科前面有几个字段变化,目前只有一个name)
#   匹配模式中的NF(过滤是否为成绩列)
#   {command}中的printf(根据逻辑是否匹配总分、平均分或者其他)
#   {command}中的subLen(根据txt文档变化)

BEGIN {
    printf "%50s\n\n", "2012级调考成绩"
    subjectStr="Yuwen Math English Pysical"
    split(subjectStr,subjectArr)
    printf "%-12s", "name"
    for(i=1;i<=length(subjectArr);i++){
        printf "%-12s", subjectArr[i]
        subjectIndexArr[subjectArr[i]] = i + 1 # 学科成绩所对应的$n数组
    }
    printf "%-12s%-12s\n", "Sum","Average"
}
$0!~/^$/ && NF>=3 {
    totalRowCount++ # 匹配行的总行数计算

    for(i=2;i<=NF;i++){
        total+=$i
    }

    for(str in subjectArr) {
        fieldStr=subjectArr[str]
        subTotalArr[fieldStr]+=$subjectIndexArr[fieldStr] # 各学科成绩和累加
    }

    subLen = NF - 1
    AVG=total/subLen

    for(i=1;i<=NF;i++){
        if(i == 1) {
            printf "%-12s", $1
        } else {
            printf "%-12d", $i
        }
    }
    printf "%-12.1f%.2f\n",total,AVG
    total=0
}
END {
    print "\n各科平均分计算:"
    for(str in subjectArr) {
        fieldStr=subjectArr[str]
        printf "%-12s%.2f\n", fieldStr":",subTotalArr[fieldStr]/totalRowCount
    }
    printf "%100s\n", "-- MADE IN Ya an ljc LOVE cw"
}
                               2012级调考成绩

name        Yuwen       Math        English     Pysical     Sum         Average
Allen       80          90          96          98          364.0       91.00
Mike        93          98          92          91          374.0       93.50
Zhang       78          76          87          92          333.0       83.25
Jerry       86          89          68          92          335.0       83.75
Han         85          95          75          90          345.0       86.25
Li          78          88          98          100         364.0       91.00

各科平均分计算:
Math:       89.33
English:    86.00
Pysical:    93.83
Yuwen:      83.33
                                                    -- MADE IN Ya an ljc LOVE cw

# 查看 tcp 中不同类型连接个数

netstat -an | grep "^tcp" | awk -f array.awk
{
    array[$6]++ # 以$6的名称作为数组的下标,然后自增
}
END {
    for(a in array) {
     print a,array[a]
    }
}

LISTEN 19
ESTABLISHED 17

# 统计日志中的数据示例

# 需求

统计出日志数据中每个用户成功插入数据、失败数据、总计数据以及所有用户插入、失败、总计数据。列出丢失数据的条目,输出格式如下

User        Total       Success     Failed
han         3239949     1654359     1585590
tracy       3189298     1536173     1653125

有数据丢失的条目:
2020-07-20 11:23:00 7 Batches: user allen insert 26184 records into database:product table:detail, insert 16106 records successfully,failed 10077 records

# 生成模拟数据

模拟数据格式

2020-07-20 11:23:00 1 Batches: user allen insert 21906 records into database:product table:detail, insert 5661 records successfully,failed 16244 records
2020-07-20 11:23:00 2 Batches: user mike insert 2060 records into database:product table:detail, insert 821 records successfully,failed 1239 records
2020-07-20 11:23:00 3 Batches: user jerry insert 11310 records into database:product table:detail, insert 7741 records successfully,failed 3569 records

执行模拟数据生成脚本

sh insert.sh

# 实现

sh db.sh 20200720

db.sh

awk -f db.awk db.log.$1

db.awk

BEGIN {
    printf "%-12s%-12s%-12s%-12s\n", "User","Total","Success","Failed"
}
{
    Success[$6]+=$14
    SuccessAll+=$14
    Failed[$6]+=$17
    FailedAll+=$17
    Total[$6]+=$8
    TotalAll+=$8

    if($8!=$14+$17) {
        missData[NR]=$0
    }
}
END {
    for (user in Success) {
        printf "%-12s%-12s%-12s%-12s\n", user, Total[user], Success[user], Failed[user]
    }
    printf "%-12s%-12s%-12s%-12s\n\n", "", TotalAll , SuccessAll, FailedAll

    print "有数据丢失的条目:"
    for(i in missData){
        print missData[i]
    }
}

输出

User        Total       Success     Failed
han         3239949     1654359     1585590
tracy       3189298     1536173     1653125
jerry       3138809     1580795     1558014
mike        3022417     1485509     1536916
allen       3281105     1585843     1695259
lilei       2853588     1547752     1305836
            18725166    9390431     9334740

有数据丢失的条目:
2020-07-20 11:23:00 7 Batches: user allen insert 26184 records into database:product table:detail, insert 16106 records successfully,failed 10077 records
2020-07-20 11:23:01 31 Batches: user allen insert 3188 records into database:product table:detail, insert 776 records successfully,failed 2411 records
2020-07-20 11:23:00 20 Batches: user mike insert 30701 records into database:product table:detail, insert 5667 records successfully,failed 25042 records
2020-07-20 11:23:00 1 Batches: user allen insert 21906 records into database:product table:detail, insert 5661 records successfully,failed 16244 records