This repository was archived by the owner on Sep 9, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 162
Expand file tree
/
Copy pathstats.rs
More file actions
106 lines (92 loc) · 2.89 KB
/
stats.rs
File metadata and controls
106 lines (92 loc) · 2.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// -*- coding: utf-8 -*-
// ------------------------------------------------------------------------------------------------
// Copyright © 2023, stack-graphs authors.
// Licensed under either of Apache License, Version 2.0, or MIT license, at your option.
// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details.
// ------------------------------------------------------------------------------------------------
use std::collections::HashMap;
use std::hash::Hash;
use itertools::Itertools;
/// Frequency distribution maintains the frequency of T values.
#[derive(Clone, Debug, Default)]
pub struct FrequencyDistribution<T>
where
T: Eq + Hash,
{
values: HashMap<T, usize>,
total: usize,
}
impl<T: Eq + Hash> FrequencyDistribution<T> {
pub fn record(&mut self, value: T) {
*self.values.entry(value).or_default() += 1;
self.total += 1;
}
// The number of recorded values.
pub fn count(&self) -> usize {
return self.total;
}
// The number of unique recorded values.
pub fn unique(&self) -> usize {
return self.values.len();
}
pub fn frequencies(&self) -> FrequencyDistribution<usize> {
let mut fs = FrequencyDistribution::default();
for count in self.values.values() {
fs.record(*count);
}
fs
}
}
impl<T: Eq + Hash + Ord> FrequencyDistribution<T> {
pub fn quantiles(&self, q: usize) -> Vec<&T> {
if q == 0 || self.total == 0 {
return vec![];
}
let mut it = self.values.iter().sorted_by_key(|e| e.0);
let mut total_count = 0;
let mut last_value;
let mut result = Vec::new();
if let Some((value, count)) = it.next() {
total_count += count;
last_value = value;
} else {
return vec![];
}
result.push(last_value);
for k in 1..=q {
let limit = ((self.total as f64 * k as f64) / q as f64).round() as usize;
while total_count < limit {
if let Some((value, count)) = it.next() {
total_count += count;
last_value = value;
} else {
break;
}
}
result.push(last_value);
}
result
}
}
impl<T> std::ops::AddAssign<Self> for FrequencyDistribution<T>
where
T: Eq + Hash,
{
fn add_assign(&mut self, rhs: Self) {
for (value, count) in rhs.values {
*self.values.entry(value).or_default() += count;
}
self.total += rhs.total;
}
}
impl<T> std::ops::AddAssign<&Self> for FrequencyDistribution<T>
where
T: Eq + Hash + Clone,
{
fn add_assign(&mut self, rhs: &Self) {
for (value, count) in &rhs.values {
*self.values.entry(value.clone()).or_default() += count;
}
self.total += rhs.total;
}
}