I'm doing matrix calculations, and in order to improve performance, I'm programming this process using Rcpp. However, I found that the results of the two calculations could not be consistent because of the accuracy problem.
In order to reduce the memory footprint, I use big.memory to save data to the hard disk. To speed up the computation, use openmpi to speed up the for loop. The intermediate calculation involves only two steps, one is the Hadamard product operation and the other is the normal matrix multiplication. The data type I used was double, and according to the IEEE 754 standard, the number of decimal places after the decimal point should be the same in R and Rcpp. I don't know what went wrong.
I found that this inconsistency was not due to R and Rcpp passing, when I read TRAN (where it is decimal) matrices using R in Linux, the data was already inconsistent, executing the code on Windows would be consistent. Even though I tried to calculate with R in Linux from the very raw data, the resulting data was still not what I expected.
Code in Rcpp:
// [[Rcpp::depends(RcppArmadillo)]]
// [[Rcpp::depends(bigmemory)]]
#include <RcppArmadillo.h>
// #include <Rcpp.h>
#include <bigmemory/MatrixAccessor.hpp>
#include <omp.h>
using namespace std;
using namespace Rcpp;
// using namespace arma;
// using namespace Eigen;
// function for convert R matrix class to arma::mat
arma::mat rMatrixToArmaMat(const Rcpp::NumericMatrix& rMat) {
return as<arma::mat>(rMat);
}
// [[Rcpp::export]]
arma::mat convertRMatrixToArmaMat(Rcpp::NumericMatrix rMat) {
return rMatrixToArmaMat(rMat);
}
// Logic for ca_epi_data
template <typename T>
void ca_epi_data(Rcpp::XPtr<BigMatrix> epi_data,
const Rcpp::XPtr<BigMatrix> combos,
const arma::mat& epi1,
const arma::mat& epi2,
const arma::mat& TRAN,
const NumericMatrix& Y,
const NumericMatrix& trdata1,
const NumericMatrix& trdata2,
int threads=0) {
omp_set_num_threads(threads);
MatrixAccessor<T> epi_col = MatrixAccessor<T>(*epi_data);
MatrixAccessor<int> combos_col = MatrixAccessor<int>(*combos);
// Check dimensions
if (epi_data->nrow() != TRAN.n_rows) {
Rcpp::stop("Mismatch between TRAN rows and epi_data rows.");
}
if (epi_data->ncol() != combos->ncol()+1+2*trdata1.ncol()) {
Rcpp::stop("Mismatch between combos columns and epi_data columns.");
}
// copy Y, trdata1 and trdata2 to epi_data
#pragma omp parallel for
for (size_t j = 0; j < Y.ncol(); j++) {
if (j >= Y.ncol()) {
Rcpp::stop("Col index exceeds Y cols.");
}
for (size_t k = 0; k < Y.nrow(); k++){
if (k >= Y.nrow()) {
Rcpp::stop("Row index exceeds trdata1 rows.");
}
// epi_col[i] is pointer,epi_col[i][j] is value.
epi_col[j][k] = Y(k,j);
}
}
#pragma omp parallel for
for (size_t j = 0; j < trdata1.ncol(); j++) {
if (j >= trdata1.ncol()) {
Rcpp::stop("Col index exceeds trdata1 cols.");
}
for (size_t k = 0; k < trdata1.nrow(); k++){
if (k >= trdata1.nrow()) {
Rcpp::stop("Row index exceeds trdata1 rows.");
}
// epi_col[i] is pointer,epi_col[i][j] is value.
epi_col[j+1][k] = trdata1(k,j);
}
}
#pragma omp parallel for
for (size_t j = 0; j < trdata2.ncol(); j++) {
if (j >= trdata2.ncol()) {
Rcpp::stop("Col index exceeds trdata1 cols.");
}
for (size_t k = 0; k < trdata2.nrow(); k++){
if (k >= trdata2.nrow()) {
Rcpp::stop("Row index exceeds trdata2 rows.");
}
// epi_col[i] is pointer,epi_col[i][j] is value.
epi_col[j+1+trdata1.ncol()][k] = trdata2(k,j);
}
}
#pragma omp parallel for
for(size_t i=0; i < combos->ncol();i++) {
arma::colvec col_epi1 = epi1.col(combos_col[i][0]-1);
arma::colvec col_epi2 = epi2.col(combos_col[i][1]-1);
// calculate hadamard product
arma::colvec hadamard_product = col_epi1 % col_epi2;
// matrix *
arma::colvec result = TRAN * hadamard_product;
// apply the result to big.matrix
if (result.n_elem != epi_data->nrow()) {
Rcpp::stop("Result vector length does not match epi_data row count.");
}
for (size_t j = 0; j < result.n_elem; j++) {
if (j >= epi_data->nrow()) {
Rcpp::stop("Row index exceeds epi_data rows.");
}
// epi_col[i] is pointer,epi_col[i][j] is value.
epi_col[i+Y.ncol()+trdata1.ncol()+trdata2.ncol()][j] = result[j];
}
// #pragma omp critical
// {
// Rcpp::Rcout << "Processed column " << i << " of " << m << std::endl;
// }
}
// return epi_data;
}
// [[Rcpp::export]]
void ca_epi_data(SEXP pEpi_data,
SEXP pCombos,
arma::mat& epi1,
arma::mat& epi2,
arma::mat& TRAN,
NumericMatrix& Y,
NumericMatrix& trdata1,
NumericMatrix& trdata2,
int threads=0){
XPtr<BigMatrix> xpEpi_data(pEpi_data);
XPtr<BigMatrix> xpCombos(pCombos);
if(4 != xpCombos->matrix_type()){
Rcpp::stop("big.matrix object of pCombos should be int type");
}
switch(xpEpi_data->matrix_type()) {
case 1:
return ca_epi_data<char>(xpEpi_data, xpCombos, epi1, epi2, TRAN,
Y, trdata1, trdata2, threads);
case 2:
return ca_epi_data<short>(xpEpi_data, xpCombos, epi1, epi2, TRAN,
Y, trdata1, trdata2, threads);
case 4:
return ca_epi_data<int>(xpEpi_data, xpCombos, epi1, epi2, TRAN,
Y, trdata1, trdata2, threads);
case 8:
return ca_epi_data<double>(xpEpi_data, xpCombos, epi1, epi2, TRAN,
Y, trdata1, trdata2, threads);
default:
throw Rcpp::exception("unknown type detected for big.matrix object!");
}
}
Call in R:
library(Rcpp)
library(bigmemory)
# load data
trait = "PH"
load(file = "Adata.RData")
load(file = "Ddata.RData")
load(paste0("trAdata_",trait,".RData"))
load(paste0("trDdata_",trait,".RData"))
load(paste0("TRAN_",trait,".RData"))
load(paste0("Y_",trait,".RData"))
load(file = "./total_ma_names.rdata")
nmar=500
TRAN = TRAN
eff_type = "ad"
epi1 = Adata
epi2 = Ddata
trdata1 = trAdata
trdata2 = trDdata
epi_data <- filebacked.big.matrix(
nrow = nrow(TRAN),
ncol = ncol(combos)+2*nmar+ncol(Y),
type = "double", # char是c++的单个字符
backingfile = "epi_data.bin",
backingpath = dirname("epi_data"),
descriptorfile = "epi_data.des",
dimnames = c(NULL, NULL)
)
# source Cpp file
sourceCpp(file = "/mnt/f/07-CAUS/01-Linux-service/project_18DH-heterosis/Analysis/31-gwas_mph/test/code_for_epi/source/epi.cpp")
# convert R matrix to arma::mat
epi1 <- convertRMatrixToArmaMat(epi1)
epi2 <- convertRMatrixToArmaMat(epi2)
TRAN <- convertRMatrixToArmaMat(TRAN)
# generate a new matrix
tryCatch({
ca_epi_data(epi_data@address, combos@address, epi1, epi2, TRAN,
Y, trdata1,trdata2, threads = 1)
}, error = function(e) {
print(e)
})
The results of R and Rcpp are compared.
The resulting matrix should consist of two columns that are negative to each other. However, in the Rcpp, although the results appear to be the same, the operation + can show that they are slightly different. This affects my subsequent matrix qr decomposition, and I can't get a real rank.
Result of r:
>epidesign <- TRAN%*%(epi1[,m]*epi2[,n])
>subdata <- data.frame(y=Y,trdata1[,m],trdata2[,n],epi=epidesign)
> head(subdata[,c(3,4)])
S3_229468875 epi
1 -0.19933658 0.19933658
2 0.15319562 -0.15319562
3 0.11634120 -0.11634120
4 0.14881466 -0.14881466
5 0.07223461 -0.07223461
6 0.19493439 -0.19493439
> subdata[,4]+subdata[,3]
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
Result of Rcpp matrix operation:
> epi_data[,c(682,3661)]
[,1] [,2]
[1,] -0.1993365796 0.1993365796
[2,] 0.1531956224 -0.1531956224
[3,] 0.1163411954 -0.1163411954
[4,] 0.1488146626 -0.1488146626
[5,] 0.0722346106 -0.0722346106
[6,] 0.1949343899 -0.1949343899
[7,] 0.2105010152 -0.2105010152
[8,] -0.2105474946 0.2105474946
[9,] -0.1859160503 0.1859160503
[10,] -0.0950061868 0.0950061868
> epi_data[,682]+epi_data[,3661]
[1] -2.775558e-17 2.775558e-17 5.551115e-17 2.775558e-17 0.000000e+00 5.551115e-17
[7] 2.775558e-17 0.000000e+00 -5.551115e-17 2.775558e-17 0.000000e+00 0.000000e+00
[13] 1.387779e-17 -6.938894e-17 2.775558e-17 1.110223e-16 -1.387779e-17 -2.775558e-17
[19] 3.469447e-18 -1.387779e-17 2.775558e-17 1.110223e-16 -2.220446e-16 -2.775558e-17
[25] 2.775558e-17 6.938894e-18 -3.469447e-17 8.326673e-17 0.000000e+00 -2.775558e-17
[31] 1.110223e-16 2.775558e-17 1.110223e-16 0.000000e+00 2.775558e-17 6.938894e-18
[37] -2.775558e-16 2.775558e-17 5.551115e-17 0.000000e+00 -2.775558e-17 0.000000e+00