# 聚合函数

``````array_agg(x ORDER BY y DESC)
array_agg(x ORDER BY x, y, z)
``````

## 一般聚合函数

arbitrary(x) -> [与输入相同]

array_agg(x) -> array<[与输入相同]>

avg(x) -> double

avg(时间间隔类型) -> 时间间隔类型

bool_and(boolean) -> boolean

bool_or(boolean) -> boolean

checksum(x) -> varbinary

count(*) -> bigint

count(x) -> bigint

count_if(x) -> bigint

every(boolean) -> boolean

geometric_mean(x) -> double

max_by(x, y) -> [与 x 相同]

max_by(x, y, n) -> array<[与 x 相同]>

min_by(x, y) -> [与 x 相同]

min_by(x, y, n) -> array<[与 x 相同]>

max(x) -> [与输入相同]

max(x, n) -> array<[与 x 相同]>

min(x) -> [与输入相同]

min(x, n) -> array<[与 x 相同]>

sum(x) -> [与输入相同]

## 按位聚合函数

bitwise_and_agg(x) -> bigint

bitwise_or_agg(x) -> bigint

## 映射聚合函数

histogram(x) -> map(K,bigint)

map_agg(key, value) -> map(K,V)

map_union(x(K,V)) -> map(K,V)

multimap_agg(key, value) -> map(K,array(V))

## 近似聚合函数

approx_distinct(x) -> bigint

approx_distinct(x, e) -> bigint

approx_percentile(x, percentage) -> [与 x 相同]

approx_percentile(x, percentages) -> array<[与 x 相同]>

approx_percentile(x, w, percentage) -> [与 x 相同]

approx_percentile(x, w, percentage, accuracy) -> [与 x 相同]

approx_percentile(x, w, percentages) -> array<[与 x 相同]>

approx_set(x) -> HyperLogLog

merge(x) -> HyperLogLog

merge(qdigest(T)) -> qdigest(T)

qdigest_agg(x) -> qdigest<[与 x 相同]>

qdigest_agg(x, w) -> qdigest<[与 x 相同]>

qdigest_agg(x, w, accuracy) -> qdigest<[与 x 相同]>

numeric_histogram(buckets, value, weight) -> map<double, double>

``````Yael Ben-Haim and Elad Tom-Tov, "A streaming parallel decision tree algorithm",
J. Machine Learning Research 11 (2010), pp. 849--872.
``````

`buckets` 必须是 `bigint``value``weight` 必须是数字。

numeric_histogram(buckets, value) -> map<double, double>

## 统计聚合函数

corr(y, x) -> double

covar_pop(y, x) -> double

covar_samp(y, x) -> double

kurtosis(x) -> double

``````kurtosis(x) = n(n+1)/((n-1)(n-2)(n-3))sum[(x_i-mean)^4]/stddev(x)^4-3(n-1)^2/((n-2)(n-3))
``````

regr_intercept(y, x)-> double

regr_slope(y, x) -> double

skewness(x) -> double

stddev(x) -> double

stddev_pop(x)-> double

stddev_samp(x) -> double

variance(x)-> double

var_pop(x)-> double

var_samp(x)-> double

## lambda 聚合函数

reduce_agg(inputValue T, initialState S, inputFunction(S, T, S), combineFunction(S, S, S)) -> S

``````SELECT id, reduce_agg(value, 0, (a, b) -> a + b, (a, b) -> a + b)
FROM (
VALUES
(1, 3),
(1, 4),
(1, 5),
(2, 6),
(2, 7)
) AS t(id, value)
GROUP BY id;
-- (1, 12)
-- (2, 13)

SELECT id, reduce_agg(value, 1, (a, b) -> a * b, (a, b) -> a * b)
FROM (
VALUES
(1, 3),
(1, 4),
(1, 5),
(2, 6),
(2, 7)
) AS t(id, value)
GROUP BY id;
-- (1, 60)
-- (2, 42)
``````