0

(The last) follow up question: Adjusting values in a table based on another table, Splitting multiple values among multiple values

I have these tables in R:

myt_with_counts <- data.frame(
 name = c("a", "a", "a", "b", "b", "c", "c", "c", "d", "e", "e"),
 var1 = c(120, 80, 100, 20, 30, 15, 25, 35, 40, 50, 60), 
 var2 = c(90, 60, 70, 25, 35, 20, 30, 40, 45, 55, 65),
 count = c(10, 5, 15, 8, 12, 6, 9, 3, 7, 11, 4)
)

  rel_table <- data.frame(
      name = c("a", "b", "c"),
      proportion = c(0.5, 0.4, 0.1)
    )

My Question: I want to redistribute var1 and var2 values from group "a" to other groups based on specified proportions (50% stays, 40% to "b", 10% to "C"), where the amount removed from each "a" row and added to each receiving row is weighted by a count variable. The total sum of each value column remains unchanged after redistribution.

I really struggled to do this:

redistribute <- function(x, val.cols, name.col, count.col, prop, from) {
   val.col <- match(val.cols, names(x))
   x.names <- x[[name.col]]
   prop.names <- prop[[1]]
   rows <- match(prop.names, x.names)
   
   from_totals <- colSums(x[x.names == from, val.col, drop=FALSE])
   from_counts <- x[x.names == from, count.col]
   total_from_counts <- sum(from_counts)
   
   for(i in seq_along(prop.names)) {
       group_name <- prop.names[i]
       group_prop <- prop[[2]][i]
       group_rows <- x.names == group_name
       
       if(group_name == from) {
           proportion_to_remove <- 1 - group_prop
           amount_to_remove <- from_totals * proportion_to_remove
           for(j in seq_along(val.col)) {
               removal_per_count <- amount_to_remove[j] / total_from_counts
               x[group_rows, val.col[j]] <- x[group_rows, val.col[j]] - from_counts * removal_per_count
           }
       } else {
           group_counts <- x[group_rows, count.col]
           total_group_counts <- sum(group_counts)
           amount_to_add <- from_totals * group_prop
           for(j in seq_along(val.col)) {
               addition_per_count <- amount_to_add[j] / total_group_counts
               x[group_rows, val.col[j]] <- x[group_rows, val.col[j]] + group_counts * addition_per_count
           }
       }
   }
   x
}

result <- redistribute(myt_with_counts, val.cols=c("var1", "var2"), name.col="name", count.col="count", prop=rel_table, from="a")
0

1 Answer 1

1

Again, same concept as the previous question. Only here, instead of dividing by number of rows, we multiply by current count divided by total count.

library(dplyr)
library(rlang)

distribute_fn_weighted <- \(dat, idCol, fromRow, toRows, 
                            colVars, props, countCol) {
  all_targets <- c(fromRow, toRows)
  prop_vec <- setNames(props, all_targets)
  source_totals <- dat %>% 
    filter({{ idCol }} == fromRow) %>% 
    summarise(across({{ colVars }}, sum)) %>%
    as.list()
  group_count_totals <- dat %>%
    filter({{ idCol }} %in% all_targets) %>%
    group_by({{ idCol }}) %>%
    summarise(total_count = sum({{ countCol }}), .groups = "drop")
  
  dat %>%
    left_join(group_count_totals, by = as_label(enquo(idCol))) %>%
    mutate(
      across({{ colVars }}, ~ {
        current_id <- {{ idCol }}
        current_count <- {{ countCol }}
        case_when(
          current_id == fromRow ~ 
            .x - (source_totals[[cur_column()]] *
                    (1 - prop_vec[as.character(fromRow)]) * 
                    current_count / total_count),
          current_id %in% toRows ~ .x + 
            (source_totals[[cur_column()]] * 
               prop_vec[as.character(current_id)] * 
               current_count / total_count),
          TRUE ~ .x
        )
      })
    ) %>%
    select(-total_count)
}
distribute_fn_weighted(
  dat = myt_with_counts, 
  idCol = name, 
  fromRow = "a", 
  toRows = c("b", "c"), 
  colVars = c(var1, var2), 
  props = c(0.5, 0.4, 0.1),
  countCol = count
)
#>    name var1     var2 count
#> 1     a   70 53.33333    10
#> 2     a   55 41.66667     5
#> 3     a   25 15.00000    15
#> 4     b   68 60.20000     8
#> 5     b  102 87.80000    12
#> 6     c   25 27.33333     6
#> 7     c   40 41.00000     9
#> 8     c   40 43.66667     3
#> 9     d   40 45.00000     7
#> 10    e   50 55.00000    11
#> 11    e   60 65.00000     4

Created on 2025-07-25 with reprex v2.1.1

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.