patternpythonMinor
Nonlinear Scaling Normalization
Viewed 0 times
normalizationscalingnonlinear
Problem
I have a procedure for normalizing variables, the details of which can be viewed in this white paper.
I would like to be able to make the routine scalable to handle any number of variables. Below is the R code for a 4 variable example with a regular correlation coefficient substituted for the preferred nonlinear coefficient (for simplicity and demonstration purposes). Any insights or comments are appreciated.
```
VN_Normalization <- function(A1, A2, A3, A4){
#Array1 Scaling Factor
RG_Factor_A1_A2<- mean(A1)/mean(A2)
RG_Factor_A1_A3<- mean(A1)/mean(A3)
RG_Factor_A1_A4<- mean(A1)/mean(A4)
#Array2 Scaling Factor
RG_Factor_A2_A1<- mean(A2)/mean(A1)
RG_Factor_A2_A3<- mean(A2)/mean(A3)
RG_Factor_A2_A4<- mean(A2)/mean(A4)
#Array3 Scaling Factor
RG_Factor_A3_A1<- mean(A3)/mean(A1)
RG_Factor_A3_A2<- mean(A3)/mean(A2)
RG_Factor_A3_A4<- mean(A3)/mean(A4)
#Array4 Scaling Factor
RG_Factor_A4_A1<- mean(A4)/mean(A1)
RG_Factor_A4_A2<- mean(A4)/mean(A2)
RG_Factor_A4_A3<- mean(A4)/mean(A3)
#A1 as Reference Gene
A1_1 <- A1
A2_1 <- A2RG_Factor_A1_A2abs((cor(A1,A2)))
A3_1 <- A3RG_Factor_A1_A3abs((cor(A1,A3)))
A4_1 <- A4RG_Factor_A1_A4abs((cor(A1,A4)))
#A2 as Reference Gene
A1_2 <- A1RG_Factor_A2_A1abs((cor(A1,A2)))
A2_2 <- A2
A3_2 <- A3RG_Factor_A2_A3abs((cor(A2,A3)))
A4_2 <- A4RG_Factor_A2_A4abs((cor(A2,A4)))
#A3 as Reference Gene
A1_3 <- A1RG_Factor_A3_A1abs((cor(A1,A3)))
A2_3 <- A2RG_Factor_A3_A2abs((cor(A3,A2)))
A3_3 <- A3
A4_3 <- A4RG_Factor_A3_A4abs((cor(A3,A4)))
#A4 as Reference Gene
A1_4 <- A1RG_Factor_A4_A1abs((cor(A1,A4)))
A2_4 <- A2RG_Factor_A4_A2abs((cor(A4,A2)))
A3_4 <- A3RG_Factor_A4_A3abs((cor(A4,A3)))
A4_4 <- A4
A1_Normalized <- (A1_1+A1_2+A1_3+A1_4)/4
A2_Normalized <- (A2_1+A2_2+A2_3+A2_4)/4
A3_Normalized <- (A3_1+A3_2+A3_3+A3_4)/4
A4_Normalized <- (A4_1+A4_2+A4_3+A4_4)/4
p = sample(rainbow(10))
boxplot(list(A1,A2,A3,A4,A1_Normalized,A2_Normalized,A3_Normalized,A4_Normalized),
las=2, names=c("Array1","Array2","Array3","Array4",
I would like to be able to make the routine scalable to handle any number of variables. Below is the R code for a 4 variable example with a regular correlation coefficient substituted for the preferred nonlinear coefficient (for simplicity and demonstration purposes). Any insights or comments are appreciated.
```
VN_Normalization <- function(A1, A2, A3, A4){
#Array1 Scaling Factor
RG_Factor_A1_A2<- mean(A1)/mean(A2)
RG_Factor_A1_A3<- mean(A1)/mean(A3)
RG_Factor_A1_A4<- mean(A1)/mean(A4)
#Array2 Scaling Factor
RG_Factor_A2_A1<- mean(A2)/mean(A1)
RG_Factor_A2_A3<- mean(A2)/mean(A3)
RG_Factor_A2_A4<- mean(A2)/mean(A4)
#Array3 Scaling Factor
RG_Factor_A3_A1<- mean(A3)/mean(A1)
RG_Factor_A3_A2<- mean(A3)/mean(A2)
RG_Factor_A3_A4<- mean(A3)/mean(A4)
#Array4 Scaling Factor
RG_Factor_A4_A1<- mean(A4)/mean(A1)
RG_Factor_A4_A2<- mean(A4)/mean(A2)
RG_Factor_A4_A3<- mean(A4)/mean(A3)
#A1 as Reference Gene
A1_1 <- A1
A2_1 <- A2RG_Factor_A1_A2abs((cor(A1,A2)))
A3_1 <- A3RG_Factor_A1_A3abs((cor(A1,A3)))
A4_1 <- A4RG_Factor_A1_A4abs((cor(A1,A4)))
#A2 as Reference Gene
A1_2 <- A1RG_Factor_A2_A1abs((cor(A1,A2)))
A2_2 <- A2
A3_2 <- A3RG_Factor_A2_A3abs((cor(A2,A3)))
A4_2 <- A4RG_Factor_A2_A4abs((cor(A2,A4)))
#A3 as Reference Gene
A1_3 <- A1RG_Factor_A3_A1abs((cor(A1,A3)))
A2_3 <- A2RG_Factor_A3_A2abs((cor(A3,A2)))
A3_3 <- A3
A4_3 <- A4RG_Factor_A3_A4abs((cor(A3,A4)))
#A4 as Reference Gene
A1_4 <- A1RG_Factor_A4_A1abs((cor(A1,A4)))
A2_4 <- A2RG_Factor_A4_A2abs((cor(A4,A2)))
A3_4 <- A3RG_Factor_A4_A3abs((cor(A4,A3)))
A4_4 <- A4
A1_Normalized <- (A1_1+A1_2+A1_3+A1_4)/4
A2_Normalized <- (A2_1+A2_2+A2_3+A2_4)/4
A3_Normalized <- (A3_1+A3_2+A3_3+A3_4)/4
A4_Normalized <- (A4_1+A4_2+A4_3+A4_4)/4
p = sample(rainbow(10))
boxplot(list(A1,A2,A3,A4,A1_Normalized,A2_Normalized,A3_Normalized,A4_Normalized),
las=2, names=c("Array1","Array2","Array3","Array4",
Solution
I feel the key to generalizing your code is to store your variables into a matrix. Then let vectorized functions (
Please let me know if I missed anything (I assumed
colMeans, cor, *, etc.) do their magic:A <- cbind(A1, A2, A3, A4)
VN_Normalization <- function(A) {
m <- colMeans(A)
RG <- m %o% (1/m)
scales <- colMeans(RG * abs(cor(A)))
A_Normalized <- t(t(A) * scales)
n <- ncol(A)
i <- seq_len(n)
labels <- c(sprintf("Array%i", i),
sprintf("Array%i_Normalized", i))
boxplot(cbind(A, A_Normalized),
las = 2, names = labels,
col = c(rep("white", n), rainbow(n)))
}Please let me know if I missed anything (I assumed
A1, A2, etc. were numeric vectors of equal lengths.)Code Snippets
A <- cbind(A1, A2, A3, A4)
VN_Normalization <- function(A) {
m <- colMeans(A)
RG <- m %o% (1/m)
scales <- colMeans(RG * abs(cor(A)))
A_Normalized <- t(t(A) * scales)
n <- ncol(A)
i <- seq_len(n)
labels <- c(sprintf("Array%i", i),
sprintf("Array%i_Normalized", i))
boxplot(cbind(A, A_Normalized),
las = 2, names = labels,
col = c(rep("white", n), rainbow(n)))
}Context
StackExchange Code Review Q#88466, answer score: 4
Revisions (0)
No revisions yet.