聚合分析
1
聚合类型概述
Elasticsearch提供强大的聚合分析功能
聚合分类
- Bucket Aggregations(桶聚合) - 将文档分组到不同的桶中,类似SQL的GROUP BY
- Metric Aggregations(指标聚合) - 对文档进行数值计算,如sum、avg、max、min
- Pipeline Aggregations(管道聚合) - 对其他聚合结果进行二次聚合
- Matrix Aggregations(矩阵聚合) - 对多个字段进行统计分析
2
Metric Aggregations(指标聚合)
# 统计平均值
{
"aggs": {
"avg_views": {
"avg": {
"field": "views"
}
}
}
}
# 多指标统计
{
"aggs": {
"stats_views": {
"stats": {
"field": "views"
}
}
}
}
# 扩展统计
{
"aggs": {
"extended_stats": {
"extended_stats": {
"field": "price"
}
}
}
}
# 百分位数
{
"aggs": {
"load_time_percentiles": {
"percentiles": {
"field": "load_time",
"percents": [50, 95, 99]
}
}
}
}
# 去重计数
{
"aggs": {
"unique_authors": {
"cardinality": {
"field": "author"
}
}
}
}
3
Bucket Aggregations(桶聚合)
# Terms聚合(分组统计)
{
"aggs": {
"by_category": {
"terms": {
"field": "category",
"size": 10
}
}
}
}
# 范围聚合
{
"aggs": {
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{ "to": 50, "key": "cheap" },
{ "from": 50, "to": 100, "key": "medium" },
{ "from": 100, "key": "expensive" }
]
}
}
}
}
# 日期直方图
{
"aggs": {
"articles_over_time": {
"date_histogram": {
"field": "publish_date",
"calendar_interval": "month",
"format": "yyyy-MM"
}
}
}
}
# 嵌套聚合
{
"aggs": {
"by_category": {
"terms": {
"field": "category"
},
"aggs": {
"avg_views": {
"avg": {
"field": "views"
}
},
"max_views": {
"max": {
"field": "views"
}
}
}
}
}
}
4
复杂聚合示例
# 完整聚合示例:按分类统计,计算平均值,并排序
{
"size": 0,
"aggs": {
"categories": {
"terms": {
"field": "category",
"size": 10,
"order": {
"avg_views": "desc"
}
},
"aggs": {
"avg_views": {
"avg": {
"field": "views"
}
},
"sum_views": {
"sum": {
"field": "views"
}
},
"top_articles": {
"top_hits": {
"size": 3,
"sort": [
{ "views": "desc" }
],
"_source": ["title", "views"]
}
}
}
}
}
}
# 时间序列分析
{
"size": 0,
"aggs": {
"sales_over_time": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "day"
},
"aggs": {
"total_sales": {
"sum": {
"field": "amount"
}
},
"moving_avg": {
"moving_avg": {
"buckets_path": "total_sales",
"window": 7,
"model": "linear"
}
}
}
}
}
}
5
Pipeline Aggregations(管道聚合)
# 移动平均
{
"aggs": {
"my_date_histo": {
"date_histogram": {
"field": "timestamp",
"calendar_interval": "day"
},
"aggs": {
"the_sum": {
"sum": {
"field": "value"
}
},
"the_movavg": {
"moving_avg": {
"buckets_path": "the_sum",
"window": 30
}
}
}
}
}
}
# 导数计算(增长率)
{
"aggs": {
"sales_per_month": {
"date_histogram": {
"field": "date",
"calendar_interval": "month"
},
"aggs": {
"sales": {
"sum": {
"field": "price"
}
},
"sales_deriv": {
"derivative": {
"buckets_path": "sales"
}
}
}
}
}
}
# 累计和
{
"aggs": {
"sales_per_day": {
"date_histogram": {
"field": "date",
"calendar_interval": "day"
},
"aggs": {
"sales": {
"sum": {
"field": "price"
}
},
"cumulative_sales": {
"cumulative_sum": {
"buckets_path": "sales"
}
}
}
}
}
}