forked from glamp/bashplotlib
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhistogram.py
More file actions
123 lines (112 loc) · 3.95 KB
/
histogram.py
File metadata and controls
123 lines (112 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""histogram.py
"""
import math
import sys
import os
from bashplotlib.core import helpers
def calc_bins(n, min_val, max_val, h=None):
"calculate number of bins for the histogram"
if not h:
h = max(10, math.log(n + 1, 2))
bin_width = (max_val - min_val) / h
for b in helpers.drange(min_val, max_val, bin_width):
yield b
def read_numbers(numbers):
"read input optimally; skip NA values. Takes a list() or a file."
if not numbers:
numbers = []
if isinstance(numbers, basestring):
try:
# read numbers from file
# ignore empty rows
numbers = [line for line in open(numbers, 'r') if line.strip()]
except Exception, err:
pass
if helpers.isiterable(numbers):
for number in numbers:
number = helpers.try_cast_str_to_number(number)
if number:
yield number
def plot_hist(data, height=20.0, bincount=None, pch="o", colour="white", title="", xlab=None, showSummary=False):
"""make a histogram for continuous variable.
Arguments:
data: List of numbers or file with numbers
height: The height of the histogram in # of lines
bincount: Number of bins in the histogram
pch: Shape of the bars in the plot
colour: Colour of the bars in the terminal
title: Title at the top of the plot
xlab: Boolen value for whether or not to display x-axis labels
showSummary: Boolean value for whether or not to display a summary
"""
if pch is None:
pch = "o"
colour = helpers.get_colour(colour)
min_val, max_val = None, None
n, mean = 0., 0.
for number in read_numbers(data):
n += 1
if not min_val or number < min_val:
min_val = number
if not max_val or number > max_val:
max_val = number
mean += number
mean /= n
bins = list(calc_bins(n, min_val, max_val, bincount))
hist = {}
for i in range(len(bins)):
hist[i] = 0
for number in read_numbers(data):
for i, b in enumerate(bins):
if number < b:
hist[i] += 1
break
min_y, max_y = min(hist.values()), max(hist.values())
ys = list(helpers.drange(min_y, max_y, (max_y-min_y)/height))
ys.reverse()
nlen = max(len(str(min_y)), len(str(max_y))) + 1
if title:
print helpers.box_text(title, len(hist)*2, nlen)
print
used_labs = set()
for y in ys:
ylab = str(int(y))
if ylab in used_labs:
ylab = ""
else:
used_labs.add(ylab)
ylab = " "*(nlen - len(ylab)) + ylab + "|"
print ylab,
for i in range(len(hist)):
if y < hist[i]:
helpers.printcolor(pch, True, colour)
else:
helpers.printcolor(" ", True, colour)
print
xs = hist.keys() * 2
print " "*(nlen+1) + "-"*len(xs)
if xlab:
for i in range(0, nlen):
helpers.printcolor(" "*(nlen+1), True, colour)
for x in range(0, len(hist)):
num = str(bins[x])
if x%2==0:
print " ",
elif i < len(num):
print num[i],
print
center = max(map(len, map(str, [n, min_val, mean, max_val])))
center += 15
if showSummary:
print
print "-"*(2 + center)
print "|" + "Summary".center(center) + "|"
print "-"*(2 + center)
summary = "|" + ("observations: %d" % n).center(center) + "|\n"
summary += "|" + ("min value: %f" % min_val).center(center) + "|\n"
summary += "|" + ("mean : %f" % mean).center(center) + "|\n"
summary += "|" + ("max value: %f" % max_val).center(center) + "|\n"
summary += "-"*(2 + center)
print summary