running sum of np.array with widely varying elements

Question

I need to calculate the running sum of np.array. Let's say a is a given np.array of length l and n is an integer step, then I need an array b of length l - n + 1, where

b= [a[0]+a[1]+...+a[n-1], a[1]+a[2]+...+a[n], ..., a[l-n]+a[l-n+1]+...+a[l-1]]

I tried the following:

def run_sum(a, n):     
    a_cumsum = np.r_[0, np.cumsum(a)]     
    b = a_cumsum[n:] - a_cumsum[:-n]     
    return b

It works fine BUT if array a has very different elements (for example some of them are 1e8 and some of them are 1e-8) like in my case then this method gives too much error, which is accumulated in np.cumsum. Do you have any ideas on how to do this directly, without using np.cumsum?

Since a has lengh int(1e8) I don't want to use for cycle. Woud like to have something like:

ind = np.arange(a.size - n + 1)
b[ind] = np.sum(a[ind:ind+n])

but it doesn't work(

Update, example of a loss of precision:

a = np.array([-9999.999966666666, -5.7847991744735696e-08, -2.983181169098924e-08, -2.093995193995538e-08, -1.6894298480290635e-08, -1.4869270858640853e-08, -1.3961300790320832e-08, -1.3834651535607362e-08, -1.4395718046705324e-08, -1.570696320519037e-08])

run_sum(a, 3) = 
np.array([-9999.999966754345, -1.0861913324333727e-07, -6.766640581190586e-08, -5.2703398978337646e-08, -4.5723936636932194e-08, -4.2664396460168064e-08, -4.219145921524614e-08, -4.393768904265016e-08])

a[:-2] + a[1:1] + a[2:] = 
np.array([-9999.999966754345, -1.0861975537568031e-07, -6.766606211123524e-08, -5.2703521278886865e-08, -4.572487012925232e-08, -4.266522318456905e-08, -4.2191670372633516e-08, -4.393733278750305e-08])

The difference in the 5th figure is important for me!

Update 2: Think this will work. At least this toy example is processed well. The idea is to split initial array into arrays with large and small elements and to count cumsum independently.

def run_sum_new(a, n, max_exp_sep=3, exp_sep_lim=4):
    """
    Finds the running sum of n consecutive elements of array a. If exponents of array elements
    vary greatly, divides them into different arrays to minimize the final error.

    Receives:
    a - the original array
    n - number of elements to sum
    max_exp_sep - the maximum number of arrays the original array can be divided into (regardless for positive and negative elements).
                  In total, the maximum number of arrays is 2*max_exp_sep+1
    exp_sep_lim - the minimum of the difference between the exponents of two elements at which they must be separated.

    Returns:
    run_sum - array of running sum of the original array
    """

    #finds maximal exponents of pos and neg elements and minimal exponent of elements of a
    a_pos = a[a>0]
    a_neg = a[a<0]
    if a_pos.size == 0:
        a_pos_max = np.nan
    else:
        a_pos_max = np.max(a_pos)
    if a_neg.size == 0:
        a_neg_max = np.nan
    else:
        a_neg_max = np.max(np.abs(a_neg))
    a_min = np.min(np.abs(a))
    exp_pos_max = np.ceil(np.log10(a_pos_max))
    exp_neg_max = np.ceil(np.log10(a_neg_max))
    exp_min = np.floor(np.log10(a_min))
    del(a_pos)
    del(a_neg)

    #finds separation limits list
    d_exp_pos = exp_pos_max - exp_min
    if np.isnan(d_exp_pos):
        exp_pos_sep_list = []
    elif d_exp_pos <= exp_sep_lim * (max_exp_sep + 1):
        exp_pos_sep_list = [10**(exp_min + exp_sep_lim * i) for i in range(1, max_exp_sep+1) if 10**(exp_min + exp_sep_lim * i) < a_pos_max]
    else:
        new_exp_sep_lim = np.ceil(d_exp_pos / (max_exp_sep + 1))
        print(f"Warning: positive elements of array have too large exponent spread {d_exp_pos} for the given max_exp_sep={max_exp_sep} and exp_sep_lim={exp_sep_lim}.",
              f"New exp_sep_lim={new_exp_sep_lim} has been taken.", sep='\n')
        exp_pos_sep_list = [10**(exp_min + new_exp_sep_lim * i) for i in range(1, max_exp_sep+1)]
    d_exp_neg = exp_neg_max - exp_min
    if np.isnan(d_exp_neg):
        exp_neg_sep_list = []
    elif d_exp_neg <= exp_sep_lim * (max_exp_sep + 1):
        exp_neg_sep_list = [-10**(exp_min + exp_sep_lim * i) for i in range(1, max_exp_sep+1) if 10**(exp_min + exp_sep_lim * i) < a_neg_max]
    else:
        new_exp_sep_lim = np.ceil(d_exp_neg / (max_exp_sep + 1))
        print(f"Warning: negative elements of array have too large exponent spread {d_exp_neg} for the given max_exp_sep={max_exp_sep} and exp_sep_lim={exp_sep_lim}.",
              f"New exp_sep_lim={new_exp_sep_lim} has been taken.", sep='\n')
        exp_neg_sep_list = [10**(exp_min + new_exp_sep_lim * i) for i in range(1, max_exp_sep+1)]
    exp_neg_sep_list.reverse()
    exp_sep_list = [-np.inf, ] + exp_neg_sep_list + exp_pos_sep_list + [np.inf,]

    #separates a
    a_sep_list = [np.where((i <= a) & (a < j), a, 0) for (i, j) in zip(exp_sep_list[:-1], exp_sep_list[1:])]
    a_sep_arr = np.array(a_sep_list)

    #finds run sum
    a_sep_cumsum = np.cumsum(a_sep_arr, axis=-1)
    a_sep_cumsum = np.hstack(([[0]]*a_sep_cumsum.shape[0], a_sep_cumsum))
    run_sum = np.sum(a_sep_cumsum[:,n:] - a_sep_cumsum[:,:-n], axis=0)

    return run_sum

run_sum_new(a, 3) = np.array([-9999.999966754345, -1.0861975537568031e-07, -6.766606211123526e-08, -5.270352127888686e-08, -4.5724870129252317e-08, -4.266522318456904e-08, -4.2191670372633516e-08, -4.393733278750304e-08])

a[:-2] + a[1:1] + a[2:] = 
np.array([-9999.999966754345, -1.0861975537568031e-07, -6.766606211123524e-08, -5.2703521278886865e-08, -4.572487012925232e-08, -4.266522318456905e-08, -4.2191670372633516e-08, -4.393733278750305e-08])

timeit.timeit('run_sum_new(a, 3)', number=1000, globals=globals()) = 0.12470354699999997

maybe you can try increase a precision by specifying data types of longdouble? np.cumsum(a, dtype=np.longdouble) ? — Johnny Cheesecutter
– Johnny Cheesecutter, Commented Jun 4, 2024 at 17:23
Also are you sure that data lost the precision? I mean how do you check that precision is not enough? Could you give an example? For example if you use print(b) and see only very large values it doesn't mean that individual items are rounded, it only says that print function truncates decimals. — Johnny Cheesecutter
– Johnny Cheesecutter, Commented Jun 4, 2024 at 17:26
Can you provide a minimal reproducible example that demonstrates this lack of precision? — mozway
– mozway, Commented Jun 4, 2024 at 17:30
I meant a short, well crafted example that show the issue of precision — mozway
– mozway, Commented Jun 4, 2024 at 18:02
Could you explain what your actual application is? If some of your elements are O(1e8) and some are O(1e-8) and you are looking for 5 sig fig precision in the cumulative sum then you are probably asking for trouble with precision. If you explain your application then maybe someone can suggest "error-avoidance tactics". — lastchance
– lastchance, Commented Jun 5, 2024 at 9:02

mozway · Accepted Answer · 2024-06-04 18:21:16Z

0

If you want to do something similar to a for loop, you could use sliding_window_view:

from numpy.lib.stride_tricks import sliding_window_view as swv

b = swv(a, n).sum(axis=1)

This gives a very similar result to your function, which doesn't seem to lack precision:

# generate an input with a large variability
np.random.seed(0)
a = 1/np.random.random(size=100_000_000)*1e-8

a.min(), a.max()
# (1.0000000074212765e-08, 0.9650719974971247)

b1 = run_sum(a, n)
b2 = swv(a, n).sum(axis=1)

# are the two outputs almost equal?
np.allclose(b1, b2)
# True

# what is the greatest difference?
np.abs(b2-b1).max()
# 7.059628683189656e-15

but it is quite slower:

# run_sum(a, n)
895 ms ± 11.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

# swv(a, n).sum(axis=1)
2.29 s ± 93.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

answered Jun 4, 2024 at 18:21

mozway

267k13 gold badges56 silver badges106 bronze badges

Sign up to request clarification or add additional context in comments.

2 Comments

Oleg Over a year ago

Thank you. Didn't know about this trick. Will try it. But the speed is also crusial

Oleg Over a year ago

Thank you for the help. Guess I finally found more quick solution. The idea is to split the array into large and small elements and independently calculate the cumsum. See update to my question.

Nick ODell · Accepted Answer · 2024-06-04 21:51:19Z

Here's the idea I would suggest: have a fast path and an accurate path, and choose between them based on whether the fast path will produce the right answer.

A simple and fast way that you can implement a sum like this is to take the value that you used at the previous step, and subtract the element which will be removed from the window, and add the element which is entering the window.

However, the elements in the array vary by many orders of magnitude, this will result in a loss of precision, because subtracting a large element from the running sum will not completely remove it.

Therefore, I would suggest tracking how much error the result could have, relative to summing the numbers in the window, and in those cases, computing the sum. (I call this "restarting" the running sum, because it removes error accumulated from previous steps.)

I also used Numba so that I could write an efficient loop.

Example:

import numpy as np
import numba as nb


a = np.array([-9999.999966666666, -5.7847991744735696e-08, -2.983181169098924e-08, -2.093995193995538e-08, -1.6894298480290635e-08, -1.4869270858640853e-08, -1.3961300790320832e-08, -1.3834651535607362e-08, -1.4395718046705324e-08, -1.570696320519037e-08])

@nb.njit()
def run_sum_restart(a, n, rtol):
    eps = np.finfo(a.dtype).eps
    sum_ = a[:n].sum()
    out = np.zeros(len(a) - n + 1)
    out[0] = sum_
    addptr = n
    subptr = 0
    err = 0
    for i in range(1, len(out)):
        sum_ += a[addptr] - a[subptr]
        # Calculate max error for a[addptr] - a[subptr]
        err += (max(abs(a[addptr]), abs(a[subptr]))) * eps
        # Calculate max error for adding that to sum
        err += abs(sum_) * eps
        addptr += 1
        subptr += 1
        if err > rtol * abs(sum_):
            # Restart sum
            new_sum = a[subptr:addptr].sum()
            # print("Max relative error at ", err / sum_)
            # print("Restarted, found error of", (new_sum - sum_) / sum_)
            sum_ = new_sum
            err = 0
        out[i] = sum_
    return out


print(run_sum_restart(a, 3, rtol=1e-5))

Output:

[-9.99999997e+03 -1.08619755e-07 -6.76660621e-08 -5.27035213e-08
 -4.57248701e-08 -4.26652232e-08 -4.21916704e-08 -4.39373328e-08]

Benchmark timings for @mozway's benchmark:

Original
634 ms ± 5.66 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
Restart sum version
582 ms ± 9.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

So this is both more accurate and faster.

But I think I've found an even better way to do it. The idea is to split the array into large and small elements and independently calculate the cumsum. See update to my question.

Soudipta Dutta · Accepted Answer · 2024-06-13 10:09:34Z

import numpy as np
import pandas as pd
# Large dataset
data = {'values': np.random.randint(1, 100000, size=1000000)}  
df = pd.DataFrame(data)
print(df)
"""
        values
0        63699
1        43508
2        42814
3        41022
4        21850
...        ...
999995    8363
999996   48538
999997   81965
999998   16214
999999    4172
"""
n = 5  # Window size
b = np.lib.stride_tricks.sliding_window_view(df['values'].to_numpy(),n)
print('b :',b)
"""
b :  [[61846 93026 10783 15297  2129]
 [93026 10783 15297  2129 19732]
 [10783 15297  2129 19732 24161]
 ...
 [22289 12440 46910 58152 24534]
 [12440 46910 58152 24534 73633]
 [46910 58152 24534 73633 85820]]
"""
cumsum = np.cumsum(b,axis=1)

# Calculate the running sum using np.cumsum and select the last element of each window
Output = cumsum_last_elements = np.cumsum(b,axis=1)[:,-1]
#this the Output
print(cumsum_last_elements)
#CHECK. 1,000,000 - 5 + 1 = 999996 = Output shape should be (len(a) - n + 1) which is Correct
print(cumsum_last_elements.shape)

Collectives™ on Stack Overflow

running sum of np.array with widely varying elements

3 Answers 3

2 Comments

2 Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

3 Answers 3

2 Comments

2 Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related