开始
首先把我们要采集的日志拿出来,大概这个样子
123.131.xx.xxx 307 0.012 [2018-01-16T10:42:50+08:00] POST /login HTTP/1.1 - 0 .......
121.19.xx.xx 200 0.010 [2018-01-16T10:42:51+08:00] GET / HTTP/1.1 - 4228 ........
120.221.xxx.xx 200 0.007 [2018-01-16T10:42:56+08:00] GET / HTTP/1.1 - 4227 .........
而我所接触的服务中一个服务大概每天产生90万条访问日志,而类似的服务有6个左右。其他一些林散的服务每个每天大概产生日志30-40万条左右。再来看看我的机器性能,4核8G带宽1M的一台机器,上面运行了zabbix,jenkins,mysql等程序,白天有日志查看需求的时候,带宽占用也比较大。
所以一次性运行完,而且还要让统计后的结果尽量的小,就成了需要思考的问题。不是收集所有日志而只是把相同的统计到一起,所以时间粒度也就需要放大一点,这里我统计的每个小时不同URL的访问时间,IP,状态码等。当然如果需要更加精确的统计数据比如说秒,分也是可以做的,这个放到后面再说。
为什么用go?
1.我刚好开始学go语言,才把语法弄清楚了一些。
2.看到一片讲词频统计的代码片段 地址https://studygolang.com/articles/3393 觉得这个刚好能解决我的问题就照着写了。
3.一次编译到处运行,这一点是我觉得最爽的地方
首先是引入需要用到的包
package main
import (
"bufio"
"fmt"
"os"
"strings"
"flag"
"github.com/astaxie/beego/orm"
_ "github.com/go-sql-driver/mysql"
"strconv"
"time"
)
之后定义数据的格式
\\用于Nginx响应时间
type ngx_res struct {
Id int64
Date time.Time
Url string
Project string
Xiaoyu10 int
Xiaoyu50 int
Xiaoyu100 int
Xiaoyu500 int
Dayu500 int
}
\\用于IP访问次数
type ngx_ip struct {
Id int64
Date time.Time
Project string
Ip string
Times string
}
\\用于状态码,Url,次数
type ngx_access struct {
Id int64
Date time.Time
Project string
Code int64
Url string
Times int
}
type time_res struct {
times_10 int
times_50 int
times_100 int
times_500 int
times_dayu_500 int
}
然后定义三个map 之后会把统计的东西放进去
var hourmap map[string]int = make(map[string]int, 0)
var resmap map[string]time_res = make(map[string]time_res, 0)
var ipmap map[string]int = make(map[string]int, 0)
定义一个读取及统计文件的函数
func read(filename string) {
//根据文件名读取文件
fi, err := os.Open(filename)
if err != nil {
fmt.Printf("Error: %s\n", err)
return
}
defer fi.Close()
br := bufio.NewReader(fi)
for {
//按照\n为分隔符来for循环
a, err := br.ReadString('\n')
if err != nil {
break
}
log := string(a)
//这里就开始分日志了
//这里可以看作再每行里面操作
split := strings.Split(log, " ")
ip := split[0]
//取出日期
date_tmp := strings.Split(split[3], "[")[1]
date_string := strings.Split(date_tmp, ":")[0]
//取出url,并且去掉=符号和?号后带的参数
url1 := strings.Split(split[5], "?")[0]
url := strings.Split(url1, "=")[0]
//取出状态码
code := split[1]
//把日志中取出的响应时间转化微float64类型
resp, err := strconv.ParseFloat(split[2], 64)
if err != nil {
break
}
//这里开始就是写入到map中了
//把 日期:url:状态码 作为键 访问次数作为值 传入hourmap中
hourmap[date_string+":"+url+":"+code]++
//把 日期:访问IP 作为键 访问次数作为值 传入ipmap中
ipmap[date_string+":"+ip]++
//把 日期:URL 作为键 把之前定义的time_res作为值 传入resmap中
v, ok := resmap[date_string+":"+url]
//判断,如果这个键存在就把resp拿出来做下面的判断在相应的地方加1,如果这个键不存在就再判断后创建这个键值对
if ok {
if resp <= 0.01 {
a := time_res{v.times_10 + 1, v.times_50, v.times_100, v.times_500, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else if resp > 0.01 && resp <= 0.05 {
a := time_res{v.times_10, v.times_50 + 1, v.times_100, v.times_500, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else if resp > 0.05 && resp <= 0.1 {
a := time_res{v.times_10, v.times_50, v.times_100 + 1, v.times_500, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else if resp > 0.1 && resp <= 0.5 {
a := time_res{v.times_10, v.times_50, v.times_100, v.times_500 + 1, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else {
a := time_res{v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500 + 1}
resmap[date_string+":"+url] = a
}
} else {
if resp <= 0.01 {
a := time_res{1, 0, 0, 0, 0}
resmap[date_string+":"+url] = a
} else if resp > 0.01 && resp <= 0.05 {
a := time_res{0, 1, 0, 0, 0}
resmap[date_string+":"+url] = a
} else if resp > 0.1 && resp <= 0.5 {
a := time_res{0, 0, 1, 0, 0}
resmap[date_string+":"+url] = a
} else if resp > 0.1 && resp <= 0.5 {
a := time_res{0, 0, 0, 1, 0}
resmap[date_string+":"+url] = a
} else {
a := time_res{0, 0, 0, 0, 1}
resmap[date_string+":"+url] = a
}
}
}
}
写入数据库
需要在这里说下的是如果你是统计的完全不相干的项目的日志,我认为不放在一个表里面是比较好的,也就是修改一下上面的数据格式名称,再下面初始化数据库的时候再修改new()中的东西再在后面改下sql中的表名。
这里写入数据库我使用beego提供的orm,事实上我只会着一种方式 。选用的数据库是mariadb.这里有个坑,mariadb的timezone CST 是美国中部时间。。。。。
初始化数据库
func RegisterDb(uname string, passwd string, ipaddr string, port string, databasename string) {
orm.RegisterDriver("mysql", orm.DRMySQL)
orm.RegisterDataBase("default", "mysql", uname+":"+passwd+"@tcp("+ipaddr+":"+port+")/"+databasename+"?charset=utf8", 10)
orm.RegisterModel(new(ngx_access), new(ngx_ip), new(ngx_res))
}
定义插入数据的函数
func Add_access(project string, date string, code string, url string, times int) error {
o := orm.NewOrm()
codes, err := strconv.ParseInt(code, 10, 64)
if err != nil {
return err
}
_, error := o.Raw("INSERT INTO `ngx_access` (`date`, `project`, `code`, `url`, `times`) VALUES (?, ?, ?, ?, ?);", date, project, codes, url, times).Exec()
return error
}
func Add_ip(project string, date string, ip string, times int) error {
o := orm.NewOrm()
_, error := o.Raw("INSERT INTO `ngx_ip` (`date`, `project`, `ip`,`times`) VALUES (?, ?, ?, ?);", date, project, ip, times).Exec()
return error
}
func Add_res(project string, date string, url string, xiaoyu10 int, xiaoyu50 int, xiaoyu100 int, xiaoyu500 int, dayu500 int) error {
o := orm.NewOrm()
_, error := o.Raw("INSERT INTO `ngx_res` (`date`, `project`,`url`,`xiaoyu10`,`xiaoyu50`,`xiaoyu100`,`xiaoyu500`,`dayu500`) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", date, project, url, xiaoyu10, xiaoyu50, xiaoyu100, xiaoyu500, dayu500).Exec()
return error
}
再定义一个时间替换函数,作用是把字符串转换为时间类型
func time_tihuan(date_hour string) time.Time {
//输入时间字符串并拼接
//time_string := date_hour
//获取服务器时区
//loc, _ := time.LoadLocation("Asia/Chongqing")
//字符串转为时间类型
theTime, err := time.Parse("2006-01-02T15:04:05 -0700", date_hour)
if err != nil {
fmt.Println(err)
}
return theTime
}
初始化数据库填入数据库的连接信息
func init() {
RegisterDb("uername", "password", "xxx.xxx.xxx.xxx", "xxxx", "databasename")
}
主函数定义
func main() {
//定义一个从命令行传入参数函数把filename从命令行传入
var filename string
flag.StringVar(&filename, "filename", "2017-12-35_xxxxx.log", "nginx access log filename!")
flag.Parse()
//read函数 执行后数据统计入map中
read(filename)
orm.Debug = true
orm.RunSyncdb("default", false, true)
//更具filename 来确定project 的名字
project1 := strings.Split(filename, ".")[0]
project := strings.Split(project1, "_")[1]
//定义一个map 用来存放一小时只有一次访问的URL,用于去除类似扫描器之类的无效访问。
var hourmap_one map[string]int = make(map[string]int, 0)
for k, v := range hourmap {
//hourmap如果键的值不等于1则写入数据库,反之写入hourmap_one
if v != 1 {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_access(project, date, a[2], a[1], v)
} else {
a := strings.Split(k, ":")
hourmap_one[a[0]+":oneurl:200"]++
}
}
//把hourmap_one写入数据库
for k, v := range hourmap_one {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_access(project, date, a[2], a[1], v)
}
//把ipmap每小时大于5次访问的IP写入数据库
for k, v := range ipmap {
if v > 5 {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_ip(project, date, a[1], v)
}
}
//把resmap写入数据库
for k, v := range resmap {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_res(project, date, a[1], v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500)
}
}
完整的代码
package main
import (
"bufio"
"fmt"
"os"
"strings"
//"sort"
"flag"
"github.com/astaxie/beego/orm"
_ "github.com/go-sql-driver/mysql"
"strconv"
"time"
)
//写入数据库
type ngx_res struct {
Id int64
Date time.Time
Url string
Project string
Xiaoyu10 int
Xiaoyu50 int
Xiaoyu100 int
Xiaoyu500 int
Dayu500 int
}
type ngx_ip struct {
Id int64
Date time.Time
Project string
Ip string
Times string
}
type ngx_access struct {
Id int64
Date time.Time
Project string
Code int64
Url string
Times int
}
type time_res struct {
times_10 int
times_50 int
times_100 int
times_500 int
times_dayu_500 int
}
func Add_access(project string, date string, code string, url string, times int) error {
o := orm.NewOrm()
codes, err := strconv.ParseInt(code, 10, 64)
if err != nil {
return err
}
_, error := o.Raw("INSERT INTO `ngx_access` (`date`, `project`, `code`, `url`, `times`) VALUES (?, ?, ?, ?, ?);", date, project, codes, url, times).Exec()
return error
}
func Add_ip(project string, date string, ip string, times int) error {
o := orm.NewOrm()
_, error := o.Raw("INSERT INTO `ngx_ip` (`date`, `project`, `ip`,`times`) VALUES (?, ?, ?, ?);", date, project, ip, times).Exec()
return error
}
func Add_res(project string, date string, url string, xiaoyu10 int, xiaoyu50 int, xiaoyu100 int, xiaoyu500 int, dayu500 int) error {
o := orm.NewOrm()
_, error := o.Raw("INSERT INTO `ngx_res` (`date`, `project`,`url`,`xiaoyu10`,`xiaoyu50`,`xiaoyu100`,`xiaoyu500`,`dayu500`) VALUES (?, ?, ?, ?, ?, ?, ?, ?);", date, project, url, xiaoyu10, xiaoyu50, xiaoyu100, xiaoyu500, dayu500).Exec()
return error
}
//初始化数据库
func RegisterDb(uname string, passwd string, ipaddr string, port string, databasename string) {
orm.RegisterDriver("mysql", orm.DRMySQL)
orm.RegisterDataBase("default", "mysql", uname+":"+passwd+"@tcp("+ipaddr+":"+port+")/"+databasename+"?charset=utf8", 10)
orm.RegisterModel(new(ngx_access), new(ngx_ip), new(ngx_res))
}
var hourmap map[string]int = make(map[string]int, 0)
var resmap map[string]time_res = make(map[string]time_res, 0)
var ipmap map[string]int = make(map[string]int, 0)
//读取文件
func read(filename string) {
fi, err := os.Open(filename)
if err != nil {
fmt.Printf("Error: %s\n", err)
return
}
defer fi.Close()
br := bufio.NewReader(fi)
for {
a, err := br.ReadString('\n')
if err != nil {
break
}
log := string(a)
//计算每小时访问次数
split := strings.Split(log, " ")
ip := split[0]
date_tmp := strings.Split(split[3], "[")[1]
date_string := strings.Split(date_tmp, ":")[0]
// date_time := time_tihuan(date_string[0],date_string[1])
url1 := strings.Split(split[5], "?")[0]
url := strings.Split(url1, "=")[0]
code := split[1]
resp, err := strconv.ParseFloat(split[2], 64)
if err != nil {
break
}
hourmap[date_string+":"+url+":"+code]++
ipmap[date_string+":"+ip]++
v, ok := resmap[date_string+":"+url]
if ok {
if resp <= 0.01 {
a := time_res{v.times_10 + 1, v.times_50, v.times_100, v.times_500, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else if resp > 0.01 && resp <= 0.05 {
a := time_res{v.times_10, v.times_50 + 1, v.times_100, v.times_500, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else if resp > 0.05 && resp <= 0.1 {
a := time_res{v.times_10, v.times_50, v.times_100 + 1, v.times_500, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else if resp > 0.1 && resp <= 0.5 {
a := time_res{v.times_10, v.times_50, v.times_100, v.times_500 + 1, v.times_dayu_500}
resmap[date_string+":"+url] = a
} else {
a := time_res{v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500 + 1}
resmap[date_string+":"+url] = a
}
} else {
if resp <= 0.01 {
a := time_res{1, 0, 0, 0, 0}
resmap[date_string+":"+url] = a
} else if resp > 0.01 && resp <= 0.05 {
a := time_res{0, 1, 0, 0, 0}
resmap[date_string+":"+url] = a
} else if resp > 0.1 && resp <= 0.5 {
a := time_res{0, 0, 1, 0, 0}
resmap[date_string+":"+url] = a
} else if resp > 0.1 && resp <= 0.5 {
a := time_res{0, 0, 0, 1, 0}
resmap[date_string+":"+url] = a
} else {
a := time_res{0, 0, 0, 0, 1}
resmap[date_string+":"+url] = a
}
}
}
}
//时间转换函数
func time_tihuan(date_hour string) time.Time {
//输入时间字符串并拼接
//time_string := date_hour
//获取服务器时区
//loc, _ := time.LoadLocation("Asia/Chongqing")
//字符串转为时间类型
theTime, err := time.Parse("2006-01-02T15:04:05 -0700", date_hour)
if err != nil {
fmt.Println(err)
}
return theTime
}
func init() {
RegisterDb("username", "password", "ipaddr", "port", "databasename")
}
func main() {
var filename string
flag.StringVar(&filename, "filename", "2017-12-35_mobile.log", "nginx access log filename!")
flag.Parse()
//read函数 执行后数据统计入map中
read(filename)
orm.Debug = true
orm.RunSyncdb("default", false, true)
project1 := strings.Split(filename, ".")[0]
project := strings.Split(project1, "_")[1]
var hourmap_one map[string]int = make(map[string]int, 0)
for k, v := range hourmap {
if v != 1 {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_access(project, date, a[2], a[1], v)
} else {
a := strings.Split(k, ":")
hourmap_one[a[0]+":oneurl:200"]++
}
}
for k, v := range hourmap_one {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_access(project, date, a[2], a[1], v)
}
for k, v := range ipmap {
if v > 5 {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_ip(project, date, a[1], v)
}
}
for k, v := range resmap {
a := strings.Split(k, ":")
date := time_tihuan(a[0] + ":00:00 +0800").Format("2006-01-02 15:04:05 -0700")
Add_res(project, date, a[1], v.times_10, v.times_50, v.times_100, v.times_500, v.times_dayu_500)
}
}