|
| 1 | +# Copyright 2009-2013 Ram Rachum., |
| 2 | +# This program is distributed under the MIT license. |
| 3 | + |
| 4 | +import operator |
| 5 | +import _heapq |
| 6 | +import itertools |
| 7 | +import collections |
| 8 | + |
| 9 | +from .frozen_dict import FrozenDict |
| 10 | + |
| 11 | + |
| 12 | +class FrozenCounter(FrozenDict): |
| 13 | + ''' |
| 14 | + An immutable counter. |
| 15 | + |
| 16 | + A counter that can't be changed. The advantage of this over |
| 17 | + `collections.Counter` is mainly that it's hashable, and thus can be used as |
| 18 | + a key in dicts and sets. |
| 19 | + |
| 20 | + In other words, `FrozenCounter` is to `Counter` what `frozenset` is to |
| 21 | + `set`. |
| 22 | + ''' |
| 23 | + |
| 24 | + def __init__(self, iterable=None, **kwargs): |
| 25 | + super(FrozenCounter, self).__init__() |
| 26 | + |
| 27 | + if iterable is not None: |
| 28 | + if isinstance(iterable, collections.Mapping): |
| 29 | + self._dict.update(iterable) |
| 30 | + else: |
| 31 | + self_get = self._dict.get |
| 32 | + for element in iterable: |
| 33 | + self._dict[element] = self_get(element, 0) + 1 |
| 34 | + if kwargs: |
| 35 | + self._dict.update(kwargs) |
| 36 | + |
| 37 | + for key, value in self.items(): |
| 38 | + if value == 0: |
| 39 | + del self._dict[key] |
| 40 | + |
| 41 | + |
| 42 | + __getitem__ = lambda self, key: self._dict.get(key, 0) |
| 43 | + |
| 44 | + def most_common(self, n=None): |
| 45 | + ''' |
| 46 | + List the `n` most common elements and their counts, sorted. |
| 47 | + |
| 48 | + Results are sorted from the most common to the least. If `n is None`, |
| 49 | + then list all element counts. |
| 50 | +
|
| 51 | + >>> FrozenCounter('abcdeabcdabcaba').most_common(3) |
| 52 | + [('a', 5), ('b', 4), ('c', 3)] |
| 53 | +
|
| 54 | + ''' |
| 55 | + # Emulate Bag.sortedByCount from Smalltalk |
| 56 | + if n is None: |
| 57 | + return sorted(self.iteritems(), key=operator.itemgetter(1), |
| 58 | + reverse=True) |
| 59 | + return _heapq.nlargest(n, self.iteritems(), |
| 60 | + key=operator.itemgetter(1)) |
| 61 | + |
| 62 | + def elements(self): |
| 63 | + ''' |
| 64 | + Iterate over elements repeating each as many times as its count. |
| 65 | +
|
| 66 | + >>> c = FrozenCounter('ABCABC') |
| 67 | + >>> sorted(c.elements()) |
| 68 | + ['A', 'A', 'B', 'B', 'C', 'C'] |
| 69 | + |
| 70 | + # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1 |
| 71 | + >>> prime_factors = FrozenCounter({2: 2, 3: 3, 17: 1}) |
| 72 | + >>> product = 1 |
| 73 | + >>> for factor in prime_factors.elements(): # loop over factors |
| 74 | + ... product *= factor # and multiply them |
| 75 | + >>> product |
| 76 | + 1836 |
| 77 | +
|
| 78 | + Note, if an element's count has been set to zero or is a negative |
| 79 | + number, `.elements()` will ignore it. |
| 80 | + ''' |
| 81 | + # Emulate Bag.do from Smalltalk and Multiset.begin from C++. |
| 82 | + return itertools.chain.from_iterable( |
| 83 | + itertools.starmap(itertools.repeat, self.iteritems()) |
| 84 | + ) |
| 85 | + |
| 86 | + @classmethod |
| 87 | + def fromkeys(cls, iterable, v=None): |
| 88 | + # There is no equivalent method for counters because setting v=1 |
| 89 | + # means that no element can have a count greater than one. |
| 90 | + raise NotImplementedError( |
| 91 | + 'FrozenCounter.fromkeys() is undefined. Use ' |
| 92 | + 'FrozenCounter(iterable) instead.' |
| 93 | + ) |
| 94 | + |
| 95 | + def __repr__(self): |
| 96 | + if not self: |
| 97 | + return '%s()' % self.__class__.__name__ |
| 98 | + try: |
| 99 | + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) |
| 100 | + return '%s({%s})' % (self.__class__.__name__, items) |
| 101 | + except TypeError: |
| 102 | + # handle case where values are not orderable |
| 103 | + return '{0}({1!r})'.format(self.__class__.__name__, dict(self)) |
| 104 | + |
| 105 | + |
| 106 | + __pos__ = lambda self: self |
| 107 | + __neg__ = lambda self: type(self)({key: -value for key, value |
| 108 | + in self.iteritems()}) |
| 109 | + |
| 110 | + # Multiset-style mathematical operations discussed in: |
| 111 | + # Knuth TAOCP Volume II section 4.6.3 exercise 19 |
| 112 | + # and at http://en.wikipedia.org/wiki/Multiset |
| 113 | + # |
| 114 | + # Outputs guaranteed to only include positive counts. |
| 115 | + # |
| 116 | + # To strip negative and zero counts, add-in an empty counter: |
| 117 | + # c += FrozenCounter() |
| 118 | + |
| 119 | + def __add__(self, other): |
| 120 | + ''' |
| 121 | + Add counts from two counters. |
| 122 | +
|
| 123 | + >>> FrozenCounter('abbb') + FrozenCounter('bcc') |
| 124 | + FrozenCounter({'b': 4, 'c': 2, 'a': 1}) |
| 125 | + |
| 126 | + ''' |
| 127 | + if not isinstance(other, FrozenCounter): |
| 128 | + return NotImplemented |
| 129 | + result = collections.Counter() |
| 130 | + for element, count in self.items(): |
| 131 | + new_count = count + other[element] |
| 132 | + if new_count > 0: |
| 133 | + result[element] = new_count |
| 134 | + for element, count in other.items(): |
| 135 | + if element not in self and count > 0: |
| 136 | + result[element] = count |
| 137 | + return FrozenCounter(result) |
| 138 | + |
| 139 | + def __sub__(self, other): |
| 140 | + ''' |
| 141 | + Subtract count, but keep only results with positive counts. |
| 142 | +
|
| 143 | + >>> FrozenCounter('abbbc') - FrozenCounter('bccd') |
| 144 | + FrozenCounter({'b': 2, 'a': 1}) |
| 145 | + |
| 146 | + ''' |
| 147 | + if not isinstance(other, FrozenCounter): |
| 148 | + return NotImplemented |
| 149 | + result = collections.Counter() |
| 150 | + for element, count in self.items(): |
| 151 | + new_count = count - other[element] |
| 152 | + if new_count > 0: |
| 153 | + result[element] = new_count |
| 154 | + for element, count in other.items(): |
| 155 | + if element not in self and count < 0: |
| 156 | + result[element] = 0 - count |
| 157 | + return FrozenCounter(result) |
| 158 | + |
| 159 | + def __or__(self, other): |
| 160 | + ''' |
| 161 | + Get the maximum of value in either of the input counters. |
| 162 | +
|
| 163 | + >>> FrozenCounter('abbb') | FrozenCounter('bcc') |
| 164 | + FrozenCounter({'b': 3, 'c': 2, 'a': 1}) |
| 165 | + |
| 166 | + ''' |
| 167 | + if not isinstance(other, FrozenCounter): |
| 168 | + return NotImplemented |
| 169 | + result = collections.Counter() |
| 170 | + for element, count in self.items(): |
| 171 | + other_count = other[element] |
| 172 | + new_count = other_count if count < other_count else count |
| 173 | + if new_count > 0: |
| 174 | + result[element] = new_count |
| 175 | + for element, count in other.items(): |
| 176 | + if element not in self and count > 0: |
| 177 | + result[element] = count |
| 178 | + return FrozenCounter(result) |
| 179 | + |
| 180 | + def __and__(self, other): |
| 181 | + ''' |
| 182 | + Get the minimum of corresponding counts. |
| 183 | +
|
| 184 | + >>> FrozenCounter('abbb') & FrozenCounter('bcc') |
| 185 | + FrozenCounter({'b': 1}) |
| 186 | + |
| 187 | + ''' |
| 188 | + if not isinstance(other, FrozenCounter): |
| 189 | + return NotImplemented |
| 190 | + result = collections.Counter() |
| 191 | + for element, count in self.items(): |
| 192 | + other_count = other[element] |
| 193 | + new_count = count if count < other_count else other_count |
| 194 | + if new_count > 0: |
| 195 | + result[element] = new_count |
| 196 | + return FrozenCounter(result) |
0 commit comments