R Code

# code for
# C. Kooperberg, T. G. Fazzio, J. J. Delrow, and T. Tskuiyama (2002)
# "Improved background correction for spotted DNA microarrays"
# Journal of Computational Biology, 9, 55-66.
# Copyright (C) 2001--2002 Charles Kooperberg
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# The text of the GNU General Public License, version 2, is available
# as http://www.gnu.org/copyleft or by writing to the Free Software
# Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# 1) rewrite organize.data to fit your own data structure
# 2) run
# > orgdata <- organize.data(mydata)
# > bcorrecteddata <- bground(orgdata)
# 3) column 1 of bcorrecteddata has the background corrected data
#     on a log(2) scale (if log2=TRUE) or a regular scale (if log2=FALSE)
#     for the 635nm channel; column 2 has the same data for the 532nm channel
#     column 3 contains the quantity in equation (5) of Kooperberg et al. (2002)
#     for whichever channel this quantity was largest
# 4) note that the log2 option doesn't just take the log-base 2 of the data
#     from when log2=FALSE, as log2(E(X)) does not equal E(log2(X))
#
# Charles Kooperberg, 12/11/2002, clk@fhcrc.org
#


bground <- function(x,log2=TRUE)
{
        dx <- mtopfunct(x,mvarfactor(x),log2)[,c(4,11,18)]
        if(log2)dx[,1:2] <- dx[,1:2]/log(2)
        dx <- data.frame(dx)
        names(dx) <- c("x635","x532","warning")
        dx
}
organize.data <- function(datafile)
{
xx <- datafile[,c(1,2,3,9,11,12,14,18,20,21,23,34,35)]
#1#column 1 - block
#2#column 2 - column nr
#3#column 3 - row nr
#4#column 9 - foreground 635nm - median
#5#column 11 - foreground 635nm - SD
#6#column 12 - background 635nm - median
#7#column 14 - background 635nm - SD
#8#column 18 - foreground 532nm - median
#9#column 20 - foreground 532nm - SD
#10#column 21 - background 532nm - median
#11#column 23 - background 532nm - SD
#12#column 34 - forground pixels
#13#column 35 - background pixels
yy <- is.na(xx[,1])
for(i in 2:13) yy <- yy+is.na(xx[,i])
xx <- xx[yy==0,]
xx
}
mvarfactor <- function(x)
{
    xmm <- max(x[,1])
    b1 <- varaux1(x,6,xmm)
    b2 <- varaux1(x,10,xmm)
    c1 <- x[, 7]/sqrt(x[, 13])
    c2 <- x[, 11]/sqrt(x[, 13])
    m1 <- lm(b1 ~ c1 - 1, weights = 1/(c1 + 1))
    m2 <- lm(b2 ~ c2 - 1, weights = 1/(c2 + 1))
    c(m1$coef,m2$coef)
}
varaux1 <- function(x, j,xmm)
{
    uu <- varaux2(x, 1, j)
    for(i in 2:xmm) uu <- c(uu, varaux2(x, i, j))
    uu
}
varaux2 <- function(x, i, j)
{
    v1 <- x[x[, 1] == i, c(2, 3, j)]
    mm <- max(c(x[,2],x[,3]))+5
    v2 <- rep(-100, mm*mm)
    v3 <- 1 + v1[, 1] + v1[, 2] * mm
    v2[v3] <- v1[, 3]
    mm1 <- mm-1
    v4 <- matrix(v2, ncol = mm)
    v4a <- v4[c(-1, -mm), c(-1, -mm)]
    v4b <- v4[c(-1, -2), c(-1, -mm)]
    v4c <- v4[c(-1, -mm), c(-1, -2)]
    v4d <- v4[c(-mm1, -mm), c(-1, -mm)]
    v4e <- v4[c(-1, -mm), c(-mm1, -mm)]
    v4x <- cbind(as.vector(v4a), as.vector(v4b), as.vector(v4c), as.vector(v4d), as.vector(v4e))
    v4x[v4x > -100] <- v4x[v4x > -100] + 1
    v4x[v4x == -100] <- 0
    v5 <- apply(v4x, 1, sum)
    v6 <- apply(v4x != 0, 1, sum)
    v5 <- v5/v6
    v7 <- v4x - v5
    v7[v4x == 0] <- 0
    v8 <- apply(v7^2, 1, sum)/(v6 - 1)
    v9 <- matrix(0, ncol = mm, nrow = mm)
    v9[2:mm1, 2:mm1] <- v8
    v9[v4 <= 0] <- 0
    v10 <- as.vector(v9[v4 > 0])
    sqrt(v10)
}
mtopfunct <- function(x,mx,log2)
{
    y1 <- x[, c(4, 5, 12, 6, 7, 13)]
    y2 <- x[, c(8, 9, 12, 10, 11, 13)]
    y1[, c(2, 5)] <- y1[, c(2, 5)] * mx[1]
    y2[, c(2, 5)] <- y2[, c(2, 5)] * mx[2]
    t1 <- vtopaux1(y1,log2)
    t2 <- vtopaux1(y2,log2)
    x2 <- cbind(t1, t2)
    vv <- x2[, 7]
    vv[x2[, 14] > vv] <- x2[x2[, 14] > vv, 14]
    vv[vv < 0] <- 0
    x2 <- cbind(x2, x2[, 4] - x2[, 11], sqrt(x2[, 5]^2 + x2[, 12]^2),
        1 * (x2[ , 6] + x2[, 13] > 0), vv)
    x2
}
vtopaux1 <- function(x,log2)
{
    ww7 <- rep(0,32)
    yy7 <- rep(0,32)
    ww7[1 ]<- 0.00178328072169643; yy7[1 ]<- 0.99930504173577217;
    ww7[2 ]<- 0.00414703326056247; yy7[2 ]<- 0.99634011677195533;
    ww7[3 ]<- 0.00650445796897836; yy7[3 ]<- 0.99101337147674429;
    ww7[4 ]<- 0.00884675982636395; yy7[4 ]<- 0.98333625388462598;
    ww7[5 ]<- 0.01116813946013113; yy7[5 ]<- 0.97332682778991098;
    ww7[6 ]<- 0.01346304789671864; yy7[6 ]<- 0.96100879965205377;
    ww7[7 ]<- 0.01572603047602472; yy7[7 ]<- 0.94641137485840277;
    ww7[8 ]<- 0.01795171577569734; yy7[8 ]<- 0.92956917213193957;
    ww7[9 ]<- 0.02013482315353021; yy7[9 ]<- 0.91052213707850282;
    ww7[10]<- 0.02227017380838325; yy7[10]<- 0.88931544599511414;
    ww7[11]<- 0.02435270256871087; yy7[11]<- 0.86599939815409277;
    ww7[12]<- 0.02637746971505466; yy7[12]<- 0.84062929625258032;
    ww7[13]<- 0.02833967261425948; yy7[13]<- 0.81326531512279754;
    ww7[14]<- 0.03023465707240248; yy7[14]<- 0.78397235894334139;
    ww7[15]<- 0.03205792835485155; yy7[15]<- 0.75281990726053194;
    ww7[16]<- 0.03380516183714161; yy7[16]<- 0.71988185017161088;
    ww7[17]<- 0.03547221325688239; yy7[17]<- 0.68523631305423327;
    ww7[18]<- 0.03705512854024005; yy7[18]<- 0.64896547125465731;
    ww7[19]<- 0.03855015317861563; yy7[19]<- 0.61115535517239328;
    ww7[20]<- 0.03995374113272034; yy7[20]<- 0.57189564620263400;
    ww7[21]<- 0.04126256324262353; yy7[21]<- 0.53127946401989457;
    ww7[22]<- 0.04247351512365359; yy7[22]<- 0.48940314570705296;
    ww7[23]<- 0.04358372452932345; yy7[23]<- 0.44636601725346409;
    ww7[24]<- 0.04459055816375657; yy7[24]<- 0.40227015796399163;
    ww7[25]<- 0.04549162792741814; yy7[25]<- 0.35722015833766813;
    ww7[26]<- 0.04628479658131442; yy7[26]<- 0.31132287199021097;
    ww7[27]<- 0.04696818281621002; yy7[27]<- 0.26468716220876742;
    ww7[28]<- 0.04754016571483031; yy7[28]<- 0.21742364374000708;
    ww7[29]<- 0.04799938859645831; yy7[29]<- 0.16964442042399283;
    ww7[30]<- 0.04834476223480295; yy7[30]<- 0.12146281929612056;
    ww7[31]<- 0.04857546744150343; yy7[31]<- 0.07299312178779904;
    ww7[32]<- 0.04869095700913972; yy7[32]<- 0.02435029266342443;
    ww7 <- c(ww7,ww7)
    yy7 <- (c(yy7,-yy7))/2+0.5
    yy700 <- yy7
    for(i in 1:99)yy700 <- c(yy700,yy7+i)
    xx <- x[, 1]
    xy <- x[, 4]
    xs1 <- x[, 2]/sqrt(x[, 3])
    xs2 <- x[, 5]/sqrt(x[, 6])
    t1 <- rep(0, length(xx))
    t1 <- cbind(t1, t1, t1, t1, t1, t1, t1)
    for(i in 1:length(xx)) {
        t1[i, ] <- vtopaux2(xx[i], xy[i], xs1[i], xs2[i],ww7,yy700,log2)
        if(i/1000 == floor(i/1000))
            print(i)
    }
    t1
}
vtopaux2 <- function(x, y, sig1, sig2,ww7,yy7x,log2)
{
    upp <- 10 * sig1 + x
    nl <- 11
    wwh <- rep(ww7,nl)
    yyh <- (yy7x[1:(nl*length(ww7))])/nl
    b1 <- yyh * upp
    x1 <- vtopaux4(x, y, sig1, sig2)
    c1 <- (vtopaux3(b1, x, y, sig1, sig2) * upp)/x1
    c1w <- c1*wwh
    mwwh <- mean(wwh)
    m1 <- mean(c1w)/mwwh
    m1w <- m1*mwwh
    m2 <- mean(c1w * b1)/m1w
    tmp <- b1-m2
    m3 <- mean(c1w * tmp * tmp)/m1w
    if(log2){
        b2 <- log(b1)
    }
    else {
        b2 <- b1
    }
    m4 <- mean(c1w * b2)/(m1*mwwh)
    tmp <- b2-m4
    m5 <- sqrt(mean(c1w * tmp * tmp)/m1w)
    m6 <- 1 * ((abs(m1) - 1) > 0.05)
    m7 <- (y - x)/sqrt(sig1^2 + sig2^2)
    c(m1, m2, sqrt(m3), m4, m5, m6, m7)
}
vtopaux3 <- function(a, x, y, sig1, sig2)
{
    u <- x - a
    v <- y
    s <- sig1^2
    t <- sig2^2
    d10 <- 1/sqrt(s + t) * dnorm((u - v)/sqrt(s + t))
    d4 <- (u * t + v * s)/(s + t)
    d5 <- sqrt((s * t)/(s + t))
    d6 <- d10 * (1 - pnorm(0, d4, d5))
    d6
}
vtopaux4 <- function(x, y, sig1, sig2)
{
    upp <- 4 * sig2 + y
    b1 <- (((1:10000) - 0.5) * upp)/10000
    m1 <- mean(vtopaux5(b1, x, y, sig1, sig2)) * upp
    m1
}
vtopaux5 <- function(x, y1, y2, sig1, sig2)
{
    (1 - pnorm((x - y1)/sig1)) * dnorm((x - y2), sd = sig2)
}