# code for
# C. Kooperberg, T. G. Fazzio, J. J. Delrow, and T. Tskuiyama (2002)
# "Improved background correction for spotted DNA microarrays"
# Journal of Computational Biology, 9, 55-66.
# Copyright (C) 2001--2002 Charles Kooperberg
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# The text of the GNU General Public License, version 2, is available
# as http://www.gnu.org/copyleft or by writing to the Free Software
# Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# 1) rewrite organize.data to fit your own data structure
# 2) run
# > orgdata <- organize.data(mydata)
# > bcorrecteddata <- bground(orgdata)
# 3) column 1 of bcorrecteddata has the background corrected data
# on a log(2) scale (if log2=TRUE) or a regular scale (if log2=FALSE)
# for the 635nm channel; column 2 has the same data for the 532nm channel
# column 3 contains the quantity in equation (5) of Kooperberg et al. (2002)
# for whichever channel this quantity was largest
# 4) note that the log2 option doesn't just take the log-base 2 of the data
# from when log2=FALSE, as log2(E(X)) does not equal E(log2(X))
#
# Charles Kooperberg, 12/11/2002, clk@fhcrc.org
#
bground <- function(x,log2=TRUE)
{
dx <- mtopfunct(x,mvarfactor(x),log2)[,c(4,11,18)]
if(log2)dx[,1:2] <- dx[,1:2]/log(2)
dx <- data.frame(dx)
names(dx) <- c("x635","x532","warning")
dx
}
organize.data <- function(datafile)
{
xx <- datafile[,c(1,2,3,9,11,12,14,18,20,21,23,34,35)]
#1#column 1 - block
#2#column 2 - column nr
#3#column 3 - row nr
#4#column 9 - foreground 635nm - median
#5#column 11 - foreground 635nm - SD
#6#column 12 - background 635nm - median
#7#column 14 - background 635nm - SD
#8#column 18 - foreground 532nm - median
#9#column 20 - foreground 532nm - SD
#10#column 21 - background 532nm - median
#11#column 23 - background 532nm - SD
#12#column 34 - forground pixels
#13#column 35 - background pixels
yy <- is.na(xx[,1])
for(i in 2:13) yy <- yy+is.na(xx[,i])
xx <- xx[yy==0,]
xx
}
mvarfactor <- function(x)
{
xmm <- max(x[,1])
b1 <- varaux1(x,6,xmm)
b2 <- varaux1(x,10,xmm)
c1 <- x[, 7]/sqrt(x[, 13])
c2 <- x[, 11]/sqrt(x[, 13])
m1 <- lm(b1 ~ c1 - 1, weights = 1/(c1 + 1))
m2 <- lm(b2 ~ c2 - 1, weights = 1/(c2 + 1))
c(m1$coef,m2$coef)
}
varaux1 <- function(x, j,xmm)
{
uu <- varaux2(x, 1, j)
for(i in 2:xmm) uu <- c(uu, varaux2(x, i, j))
uu
}
varaux2 <- function(x, i, j)
{
v1 <- x[x[, 1] == i, c(2, 3, j)]
mm <- max(c(x[,2],x[,3]))+5
v2 <- rep(-100, mm*mm)
v3 <- 1 + v1[, 1] + v1[, 2] * mm
v2[v3] <- v1[, 3]
mm1 <- mm-1
v4 <- matrix(v2, ncol = mm)
v4a <- v4[c(-1, -mm), c(-1, -mm)]
v4b <- v4[c(-1, -2), c(-1, -mm)]
v4c <- v4[c(-1, -mm), c(-1, -2)]
v4d <- v4[c(-mm1, -mm), c(-1, -mm)]
v4e <- v4[c(-1, -mm), c(-mm1, -mm)]
v4x <- cbind(as.vector(v4a), as.vector(v4b), as.vector(v4c), as.vector(v4d), as.vector(v4e))
v4x[v4x > -100] <- v4x[v4x > -100] + 1
v4x[v4x == -100] <- 0
v5 <- apply(v4x, 1, sum)
v6 <- apply(v4x != 0, 1, sum)
v5 <- v5/v6
v7 <- v4x - v5
v7[v4x == 0] <- 0
v8 <- apply(v7^2, 1, sum)/(v6 - 1)
v9 <- matrix(0, ncol = mm, nrow = mm)
v9[2:mm1, 2:mm1] <- v8
v9[v4 <= 0] <- 0
v10 <- as.vector(v9[v4 > 0])
sqrt(v10)
}
mtopfunct <- function(x,mx,log2)
{
y1 <- x[, c(4, 5, 12, 6, 7, 13)]
y2 <- x[, c(8, 9, 12, 10, 11, 13)]
y1[, c(2, 5)] <- y1[, c(2, 5)] * mx[1]
y2[, c(2, 5)] <- y2[, c(2, 5)] * mx[2]
t1 <- vtopaux1(y1,log2)
t2 <- vtopaux1(y2,log2)
x2 <- cbind(t1, t2)
vv <- x2[, 7]
vv[x2[, 14] > vv] <- x2[x2[, 14] > vv, 14]
vv[vv < 0] <- 0
x2 <- cbind(x2, x2[, 4] - x2[, 11], sqrt(x2[, 5]^2 + x2[, 12]^2),
1 * (x2[ , 6] + x2[, 13] > 0), vv)
x2
}
vtopaux1 <- function(x,log2)
{
ww7 <- rep(0,32)
yy7 <- rep(0,32)
ww7[1 ]<- 0.00178328072169643; yy7[1 ]<- 0.99930504173577217;
ww7[2 ]<- 0.00414703326056247; yy7[2 ]<- 0.99634011677195533;
ww7[3 ]<- 0.00650445796897836; yy7[3 ]<- 0.99101337147674429;
ww7[4 ]<- 0.00884675982636395; yy7[4 ]<- 0.98333625388462598;
ww7[5 ]<- 0.01116813946013113; yy7[5 ]<- 0.97332682778991098;
ww7[6 ]<- 0.01346304789671864; yy7[6 ]<- 0.96100879965205377;
ww7[7 ]<- 0.01572603047602472; yy7[7 ]<- 0.94641137485840277;
ww7[8 ]<- 0.01795171577569734; yy7[8 ]<- 0.92956917213193957;
ww7[9 ]<- 0.02013482315353021; yy7[9 ]<- 0.91052213707850282;
ww7[10]<- 0.02227017380838325; yy7[10]<- 0.88931544599511414;
ww7[11]<- 0.02435270256871087; yy7[11]<- 0.86599939815409277;
ww7[12]<- 0.02637746971505466; yy7[12]<- 0.84062929625258032;
ww7[13]<- 0.02833967261425948; yy7[13]<- 0.81326531512279754;
ww7[14]<- 0.03023465707240248; yy7[14]<- 0.78397235894334139;
ww7[15]<- 0.03205792835485155; yy7[15]<- 0.75281990726053194;
ww7[16]<- 0.03380516183714161; yy7[16]<- 0.71988185017161088;
ww7[17]<- 0.03547221325688239; yy7[17]<- 0.68523631305423327;
ww7[18]<- 0.03705512854024005; yy7[18]<- 0.64896547125465731;
ww7[19]<- 0.03855015317861563; yy7[19]<- 0.61115535517239328;
ww7[20]<- 0.03995374113272034; yy7[20]<- 0.57189564620263400;
ww7[21]<- 0.04126256324262353; yy7[21]<- 0.53127946401989457;
ww7[22]<- 0.04247351512365359; yy7[22]<- 0.48940314570705296;
ww7[23]<- 0.04358372452932345; yy7[23]<- 0.44636601725346409;
ww7[24]<- 0.04459055816375657; yy7[24]<- 0.40227015796399163;
ww7[25]<- 0.04549162792741814; yy7[25]<- 0.35722015833766813;
ww7[26]<- 0.04628479658131442; yy7[26]<- 0.31132287199021097;
ww7[27]<- 0.04696818281621002; yy7[27]<- 0.26468716220876742;
ww7[28]<- 0.04754016571483031; yy7[28]<- 0.21742364374000708;
ww7[29]<- 0.04799938859645831; yy7[29]<- 0.16964442042399283;
ww7[30]<- 0.04834476223480295; yy7[30]<- 0.12146281929612056;
ww7[31]<- 0.04857546744150343; yy7[31]<- 0.07299312178779904;
ww7[32]<- 0.04869095700913972; yy7[32]<- 0.02435029266342443;
ww7 <- c(ww7,ww7)
yy7 <- (c(yy7,-yy7))/2+0.5
yy700 <- yy7
for(i in 1:99)yy700 <- c(yy700,yy7+i)
xx <- x[, 1]
xy <- x[, 4]
xs1 <- x[, 2]/sqrt(x[, 3])
xs2 <- x[, 5]/sqrt(x[, 6])
t1 <- rep(0, length(xx))
t1 <- cbind(t1, t1, t1, t1, t1, t1, t1)
for(i in 1:length(xx)) {
t1[i, ] <- vtopaux2(xx[i], xy[i], xs1[i], xs2[i],ww7,yy700,log2)
if(i/1000 == floor(i/1000))
print(i)
}
t1
}
vtopaux2 <- function(x, y, sig1, sig2,ww7,yy7x,log2)
{
upp <- 10 * sig1 + x
nl <- 11
wwh <- rep(ww7,nl)
yyh <- (yy7x[1:(nl*length(ww7))])/nl
b1 <- yyh * upp
x1 <- vtopaux4(x, y, sig1, sig2)
c1 <- (vtopaux3(b1, x, y, sig1, sig2) * upp)/x1
c1w <- c1*wwh
mwwh <- mean(wwh)
m1 <- mean(c1w)/mwwh
m1w <- m1*mwwh
m2 <- mean(c1w * b1)/m1w
tmp <- b1-m2
m3 <- mean(c1w * tmp * tmp)/m1w
if(log2){
b2 <- log(b1)
}
else {
b2 <- b1
}
m4 <- mean(c1w * b2)/(m1*mwwh)
tmp <- b2-m4
m5 <- sqrt(mean(c1w * tmp * tmp)/m1w)
m6 <- 1 * ((abs(m1) - 1) > 0.05)
m7 <- (y - x)/sqrt(sig1^2 + sig2^2)
c(m1, m2, sqrt(m3), m4, m5, m6, m7)
}
vtopaux3 <- function(a, x, y, sig1, sig2)
{
u <- x - a
v <- y
s <- sig1^2
t <- sig2^2
d10 <- 1/sqrt(s + t) * dnorm((u - v)/sqrt(s + t))
d4 <- (u * t + v * s)/(s + t)
d5 <- sqrt((s * t)/(s + t))
d6 <- d10 * (1 - pnorm(0, d4, d5))
d6
}
vtopaux4 <- function(x, y, sig1, sig2)
{
upp <- 4 * sig2 + y
b1 <- (((1:10000) - 0.5) * upp)/10000
m1 <- mean(vtopaux5(b1, x, y, sig1, sig2)) * upp
m1
}
vtopaux5 <- function(x, y1, y2, sig1, sig2)
{
(1 - pnorm((x - y1)/sig1)) * dnorm((x - y2), sd = sig2)
}