第一个故事:趋势
小明每十秒钟向es存入当前网卡的信息。该如何统计该网卡某天每小时接受到字节的趋势图。
注:(网卡信息中的接受字节数是一直累加的)
{
"query": {
"bool": {
"must": [{
"range": {
"@timestamp": {
"gt": "2019-01-13T00:00:00.000+08:00",
"lt": "2019-01-13T23:59:59.999+08:00"
}
}
}
]
}
},
"size": 0,
"aggs": {
"groupByInterval": {
"date_histogram": {
"field": "@timestamp",
"interval": "1h",
"format": "yyyy-MM-dd HH:mm:ss",
"time_zone": "+08:00",
"min_doc_count": 0
},
"aggs": {
"maxin": {
"max": {
"field": "system.network.in.bytes"
}
},
"in_deriv": {
"derivative": {
"buckets_path": "maxin",
"unit": "1s"
}
}
}
}
}
}
上面的查询语句将返回:
{
"took" : 260,
"timed_out" : false,
"_shards" : {
"total" : 1211,
"successful" : 1211,
"skipped" : 1205,
"failed" : 0
},
"hits" : {
"total" : 8640,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"groupByInterval" : {
"buckets" : [
{
"key_as_string" : "2019-01-13 00:00:00",
"key" : 1547308800000,
"doc_count" : 360,
"maxin" : {
"value" : 1.5438929488E10
}
},
...
...
{
"key_as_string" : "2019-01-13 23:00:00",
"key" : 1547391600000,
"doc_count" : 360,
"maxin" : {
"value" : 1.5990460333E10
},
"in_deriv" : {
"value" : 2883272.0,
"normalized_value" : 800.9088888888889
}
}
]
}
}
}
知识点:
derivative:用于histogram (or date_histogram)的子聚合。可以对histogram聚合中的指标类聚合进行求导。(简单来说就是每个时间段的值减去上一个时间段的值)其中“buckets_path”是描述需要求导的聚合名。因为“unit”设置为1s,所以返回结果中“normalized_value”是平均每秒的变化。
第二个故事:听说你要每个的最后一条?
小明每十秒钟向es存入当前cpu使用的百分比信息。现有10台主机,该如何获取每台主机最新的一条cpu使用信息。
{
"aggs": {
"groupByHostName": {
"terms": {
"field": "host.name"
},
"aggs": {
"lastOne": {
"top_hits": {
"size":1,
"sort":[
{
"@timestamp":{
"order":"desc"
}
}
],
"_source": {
"includes": [ "system.cpu.total.pct"]
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"term": {
"metricset.name": "cpu"
}
},
{
"range": {
"@timestamp": {
"gt": "2019-01-13T00:00:00.000+08:00",
"lt": "2019-01-13T23:59:59.999+08:00"
}
}
}
]
}
},
"size": 0
}
上面的查询语句将返回:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 14,
"successful" : 14,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 164136,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"groupByHostName" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 77736,
"buckets" : [
{
"key" : "RedHat6.4-02",
"doc_count" : 8640,
"lastOne" : {
"hits" : {
"total" : 8640,
"max_score" : null,
"hits" : [
{
"_index" : "metricbeat-6.5.1-2019.01.13",
"_type" : "doc",
"_id" : "c1zwR2gB7bWvjZhWp3RJ",
"_score" : null,
"_source" : {
"system" : {
"cpu" : {
"total" : {
"pct" : 0.0655
}
}
}
},
"sort" : [
1547395192576
]
}
]
}
}
},
...
...
{
"key" : "docker185",
"doc_count" : 8640,
"lastOne" : {
"hits" : {
"total" : 8640,
"max_score" : null,
"hits" : [
{
"_index" : "metricbeat-6.5.1-2019.01.13",
"_type" : "doc",
"_id" : "xVzwR2gB7bWvjZhWqHSz",
"_score" : null,
"_source" : {
"system" : {
"cpu" : {
"total" : {
"pct" : 0.0509
}
}
}
},
"sort" : [
1547395192917
]
}
]
}
}
}
]
}
}
}
知识点:
top_hits聚合实现了在相同的hostname组中取得最新一条上报的文档。其中“sort”指定了按照上传时间倒序,“size”指定了取每组的最后一条,而“_source”中的“includes”则指定了只获取“system.cpu.total.pct”的值,不关心该条文档的其他字段。
第三个故事:一骑红尘妃子笑
家住长安的小杨经常在网上购买岭南的荔枝。从岭南到长安的路上有许多个驿站,小杨的快递每经过一个驿站,该驿站的工作人员就会向es中记录一条包含快递单号和当前时间的信息。那么如何计算出每次从发货到收货的平均运输时间?
{
"size": 0,
"aggs": {
"groupById": {
"terms": {
"field": "id"
},
"aggs": {
"maxCreateTime": {
"max": {
"field": "createTime"
}
},
"minCreateTime": {
"min": {
"field": "createTime"
}
},
"resultValue": {
"bucket_script": {
"buckets_path": {
"min": "minCreateTime",
"max": "maxCreateTime"
},
"script": {
"source": "params.max - params.min"
}
}
}
}
},
"avgValue": {
"avg_bucket": {
"buckets_path": "groupById>resultValue"
}
}
}
}
上面的查询语句将返回:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"groupById": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "1",
"doc_count": 2,
"minCreateTime": {
"value": 1.547366698E12,
"value_as_string": "2019-01-13 08:04:58"
},
"maxCreateTime": {
"value": 1.547539498E12,
"value_as_string": "2019-01-15 08:04:58"
},
"resultValue": {
"value": 1.728E8
}
}, {
"key": "2",
"doc_count": 2,
"minCreateTime": {
"value": 1.547193898E12,
"value_as_string": "2019-01-11 08:04:58"
},
"maxCreateTime": {
"value": 1.547371938E12,
"value_as_string": "2019-01-13 09:32:18"
},
"resultValue": {
"value": 1.7804E8
}
}]
},
"avgValue": {
"value": 1.7542E8
}
}
}
知识点:
bucket_script聚合它执行一个脚本,该脚本可以执行对每个桶的计算。其中buckets_path将minCreateTime和maxCreateTime的结果作为参数,参数名分别是min和max。script中的source则指定了具体的计算内容。
外层的avg_bucket聚合将计算出所有桶的平均耗时,其中buckets_path指定了对groupById聚合的resultValue子聚合做取平均值计算。
除avg_bucket外,es还提供了max_bucket,min_bucket,sum_bucket,stats_bucket,derivative等其他操作。