Proportionately redistributing values

Question

(The last) follow up question: Adjusting values in a table based on another table, Splitting multiple values among multiple values

I have these tables in R:

myt_with_counts <- data.frame(
 name = c("a", "a", "a", "b", "b", "c", "c", "c", "d", "e", "e"),
 var1 = c(120, 80, 100, 20, 30, 15, 25, 35, 40, 50, 60), 
 var2 = c(90, 60, 70, 25, 35, 20, 30, 40, 45, 55, 65),
 count = c(10, 5, 15, 8, 12, 6, 9, 3, 7, 11, 4)
)

  rel_table <- data.frame(
      name = c("a", "b", "c"),
      proportion = c(0.5, 0.4, 0.1)
    )

My Question: I want to redistribute var1 and var2 values from group "a" to other groups based on specified proportions (50% stays, 40% to "b", 10% to "C"), where the amount removed from each "a" row and added to each receiving row is weighted by a count variable. The total sum of each value column remains unchanged after redistribution.

I really struggled to do this:

redistribute <- function(x, val.cols, name.col, count.col, prop, from) {
   val.col <- match(val.cols, names(x))
   x.names <- x[[name.col]]
   prop.names <- prop[[1]]
   rows <- match(prop.names, x.names)
   
   from_totals <- colSums(x[x.names == from, val.col, drop=FALSE])
   from_counts <- x[x.names == from, count.col]
   total_from_counts <- sum(from_counts)
   
   for(i in seq_along(prop.names)) {
       group_name <- prop.names[i]
       group_prop <- prop[[2]][i]
       group_rows <- x.names == group_name
       
       if(group_name == from) {
           proportion_to_remove <- 1 - group_prop
           amount_to_remove <- from_totals * proportion_to_remove
           for(j in seq_along(val.col)) {
               removal_per_count <- amount_to_remove[j] / total_from_counts
               x[group_rows, val.col[j]] <- x[group_rows, val.col[j]] - from_counts * removal_per_count
           }
       } else {
           group_counts <- x[group_rows, count.col]
           total_group_counts <- sum(group_counts)
           amount_to_add <- from_totals * group_prop
           for(j in seq_along(val.col)) {
               addition_per_count <- amount_to_add[j] / total_group_counts
               x[group_rows, val.col[j]] <- x[group_rows, val.col[j]] + group_counts * addition_per_count
           }
       }
   }
   x
}

result <- redistribute(myt_with_counts, val.cols=c("var1", "var2"), name.col="name", count.col="count", prop=rel_table, from="a")

M-- · Accepted Answer · 2025-07-25 23:33:15Z

Again, same concept as the previous question. Only here, instead of dividing by number of rows, we multiply by current count divided by total count.

library(dplyr)
library(rlang)

distribute_fn_weighted <- \(dat, idCol, fromRow, toRows, 
                            colVars, props, countCol) {
  all_targets <- c(fromRow, toRows)
  prop_vec <- setNames(props, all_targets)
  source_totals <- dat %>% 
    filter({{ idCol }} == fromRow) %>% 
    summarise(across({{ colVars }}, sum)) %>%
    as.list()
  group_count_totals <- dat %>%
    filter({{ idCol }} %in% all_targets) %>%
    group_by({{ idCol }}) %>%
    summarise(total_count = sum({{ countCol }}), .groups = "drop")
  
  dat %>%
    left_join(group_count_totals, by = as_label(enquo(idCol))) %>%
    mutate(
      across({{ colVars }}, ~ {
        current_id <- {{ idCol }}
        current_count <- {{ countCol }}
        case_when(
          current_id == fromRow ~ 
            .x - (source_totals[[cur_column()]] *
                    (1 - prop_vec[as.character(fromRow)]) * 
                    current_count / total_count),
          current_id %in% toRows ~ .x + 
            (source_totals[[cur_column()]] * 
               prop_vec[as.character(current_id)] * 
               current_count / total_count),
          TRUE ~ .x
        )
      })
    ) %>%
    select(-total_count)
}

distribute_fn_weighted(
  dat = myt_with_counts, 
  idCol = name, 
  fromRow = "a", 
  toRows = c("b", "c"), 
  colVars = c(var1, var2), 
  props = c(0.5, 0.4, 0.1),
  countCol = count
)
#>    name var1     var2 count
#> 1     a   70 53.33333    10
#> 2     a   55 41.66667     5
#> 3     a   25 15.00000    15
#> 4     b   68 60.20000     8
#> 5     b  102 87.80000    12
#> 6     c   25 27.33333     6
#> 7     c   40 41.00000     9
#> 8     c   40 43.66667     3
#> 9     d   40 45.00000     7
#> 10    e   50 55.00000    11
#> 11    e   60 65.00000     4

^{Created on 2025-07-25 with reprex v2.1.1}

Collectives™ on Stack Overflow

Proportionately redistributing values

1 Answer 1

Comments

Your Answer

Linked

Hot Network Questions

Collectives™ on Stack Overflow

1 Answer 1

Comments

Your Answer

Sign up or log in

Post as a guest

Linked

Related