Elasticsearch 入门教程 – 实战分析_9

Elasticsearch 统计代码例子

 aggs

avg 平均数

最近15分钟的平均访问时间,upstream_time_ms是每次访问时间,单位毫秒

{
  "query": {
    "bool": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "avg": {
        "field": "upstream_time_ms"
      }
    }
  }}//当然你也可以直接将过滤器写在aggs里面{
  "size": 0,
  "aggs": {
    "bool": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      },
      "aggs": {
        "execute_time": {
          "avg": {
            "field": "upstream_time_ms"
          }
        }
      }
    }
  }}

cartinality metric :

对每个bucket中的指定的field进行去重,取去重后的count,类似于count(distcint)

每月销售品牌数量统计:

GET /tvs/sales/_search
{
  "size" : 0,
  "aggs" : {
      "months" : {
        "date_histogram": {
          "field": "sold_date",
          "interval": "month"
        },
        "aggs": {
          "distinct_colors" : {
              "cardinality" : {
                "field" : "brand"
              }
          }
        }
      }
  }
}

你可能注意到了size:0,如果你只需要统计数据,不要数据本身,就设置它,这不是我投机取巧

{
  "size": 0,
  "aggs": {
    "bool": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      },
      "aggs": {
        "ipv": {
          "cardinality": {
            "field": "ip"
          }
        }
      }
    }
  }}


cardinality,count(distinct),5%的错误率,性能在100ms左右

GET /tvs/sales/_search
{
    "size" : 0,
    "aggs" : {
        "distinct_brand" : {
            "cardinality" : {
              "field" : "brand",
              "precision_threshold" : 100 
            }
        }
    }
}

percentiles 基于百分比统计

最近15分钟,99.9的请求的执行时间不超过多少

{
  "size": 0,
  "query": {
    "bool": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "percentiles": {
        "field": "upstream_time_ms",
        "percents": [
          90,
          95,
          99.9
        ]
      }
    }
  }}//返回值,0.1%的请求超过了159ms{
  "took": 620,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 679400,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "execute_time": {
      "values": {
        "90.0": 24.727003484320534,
        "95.0": 72.6200981699678,
        "99.9": 159.01065773524886 //99.9的数据落在159以内,是系统计算出来159
      }
    }
  }}



需求:比如有一个网站,记录下了每次请求的访问的耗时,需要统计tp50,tp90,tp99

tp50:50%的请求的耗时最长在多长时间

tp90:90%的请求的耗时最长在多长时间

tp99:99%的请求的耗时最长在多长时间

PUT /website
{
    "mappings": {
        "logs": {
            "properties": {
                "latency": {
                    "type": "long"
                },
                "province": {
                    "type": "keyword"
                },
                "timestamp": {
                    "type": "date"
                }
            }
        }
    }
}

POST /website/logs/_bulk
{ "index": {}}
{ "latency" : 105, "province" : "江苏", "timestamp" : "2016-10-28" }
{ "index": {}}
{ "latency" : 83, "province" : "江苏", "timestamp" : "2016-10-29" }
{ "index": {}}
{ "latency" : 92, "province" : "江苏", "timestamp" : "2016-10-29" }
{ "index": {}}
{ "latency" : 112, "province" : "江苏", "timestamp" : "2016-10-28" }
{ "index": {}}
{ "latency" : 68, "province" : "江苏", "timestamp" : "2016-10-28" }
{ "index": {}}
{ "latency" : 76, "province" : "江苏", "timestamp" : "2016-10-29" }
{ "index": {}}
{ "latency" : 101, "province" : "新疆", "timestamp" : "2016-10-28" }
{ "index": {}}
{ "latency" : 275, "province" : "新疆", "timestamp" : "2016-10-29" }
{ "index": {}}
{ "latency" : 166, "province" : "新疆", "timestamp" : "2016-10-29" }
{ "index": {}}
{ "latency" : 654, "province" : "新疆", "timestamp" : "2016-10-28" }
{ "index": {}}
{ "latency" : 389, "province" : "新疆", "timestamp" : "2016-10-28" }
{ "index": {}}
{ "latency" : 302, "province" : "新疆", "timestamp" : "2016-10-29" }

pencentiles

GET /website/logs/_search 
{
  "size": 0,
  "aggs": {
    "latency_percentiles": {
      "percentiles": {
        "field": "latency",
        "percents": [
          50,
          95,
          99
        ]
      }
    },
    "latency_avg": {
      "avg": {
        "field": "latency"
      }
    }
  }
}

{
  "took": 31,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 12,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "latency_avg": {
      "value": 201.91666666666666
    },
    "latency_percentiles": {
      "values": {
        "50.0": 108.5,
        "95.0": 508.24999999999983,
        "99.0": 624.8500000000001
      }
    }
  }
}

50%的请求,数值的最大的值是多少,不是完全准确的

GET /website/logs/_search 
{
  "size": 0,
  "aggs": {
    "group_by_province": {
      "terms": {
        "field": "province"
      },
      "aggs": {
        "latency_percentiles": {
          "percentiles": {
            "field": "latency",
            "percents": [
              50,
              95,
              99
            ]
          }
        },
        "latency_avg": {
          "avg": {
            "field": "latency"
          }
        }
      }
    }
  }
}

{
  "took": 33,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 12,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_province": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "新疆",
          "doc_count": 6,
          "latency_avg": {
            "value": 314.5
          },
          "latency_percentiles": {
            "values": {
              "50.0": 288.5,
              "95.0": 587.75,
              "99.0": 640.75
            }
          }
        },
        {
          "key": "江苏",
          "doc_count": 6,
          "latency_avg": {
            "value": 89.33333333333333
          },
          "latency_percentiles": {
            "values": {
              "50.0": 87.5,
              "95.0": 110.25,
              "99.0": 111.65
            }
          }
        }
      ]
    }
  }
}


percentile_ranks 指定一个范围,有多少数据落在这里

需求:在200ms以内的,有百分之多少,在1000毫秒以内的有百分之多少,percentile ranks metric

这个percentile ranks,其实比pencentile还要常用

按照品牌分组,计算,电视机,售价在1000占比,2000占比,3000占比

GET /website/logs/_search 
{
  "size": 0,
  "aggs": {
    "group_by_province": {
      "terms": {
        "field": "province"
      },
      "aggs": {
        "latency_percentile_ranks": {
          "percentile_ranks": {
            "field": "latency",
            "values": [
              200,
              1000
            ]
          }
        }
      }
    }
  }
}

{
  "took": 38,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 12,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "group_by_province": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "新疆",
          "doc_count": 6,
          "latency_percentile_ranks": {
            "values": {
              "200.0": 29.40613026819923,
              "1000.0": 100
            }
          }
        },
        {
          "key": "江苏",
          "doc_count": 6,
          "latency_percentile_ranks": {
            "values": {
              "200.0": 100,
              "1000.0": 100
            }
          }
        }
      ]
    }
  }
}

percentile的优化

TDigest算法,用很多节点来执行百分比的计算,近似估计,有误差,节点越多,越精准

compression

限制节点数量最多 compression * 20 = 2000个node去计算

默认100

越大,占用内存越多,越精准,性能越差

一个节点占用32字节,100 * 20 * 32 = 64KB

如果你想要percentile算法越精准,compression可以设置的越大

{
  "size": 0,
  "query": {
    "bool": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "percentile_ranks": {
        "field": "upstream_time_ms",
        "values": [
          50,
          160
        ]
      }
    }
  }}//返回值{
  "took": 666,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "failed": 0
  },
  "hits": {
    "total": 681014,
    "max_score": 0,
    "hits": []
  },
  "aggregations": {
    "execute_time": {
      "values": {
        "50.0": 94.14716385885366,
        "160.0": 99.91130872493076 //99.9的数据落在了160以内,这次,160是我指定的,系统计算出99.9
      }
    }
  }}


参考链接:

 https://www.cnblogs.com/didda/p/5485681.html