ratio {BGcom}R Documentation

Function for calculating the ratio T between the observed genes in common and the expected one

Description

This function take the lists of pvalues and calculate the ratio T(q) for each threshold q

Usage

ratio(dir, data, interval, name, pvalue)

Arguments

dir directory for storing the plots
data Lists of pvalues to be compared
interval The interval between two threshold
name The name to be used in the plots
pvalue Indicate if the data are pvalues (TRUE) or posterior probability (FALSE). If they are posterior probability they are transformed in pvalues

Details

This function calculate the ratio T(q) of observed number of genes in common vs expected one for each threshold q.

Value

Returns an object of class list with the ratio and the threshold. In particular:

ratios vector or T values for each threshold
q threshold corresponding to T values
DE differentially expressed features in each experiment
DECommon features in common corresponding to the T values
interval interval on the p-value scale defined by the user (default is 0.01)
name names to be used in the plots (defined by the user)
pvalue Logical: TRUE if the measures used for the analysis are pvalue, FALSE if they are posterior probabilities

Author(s)

M. Blangiardo

References

Stone et al.(1988), Investigations of excess environmental risks around putative sources: statistical problems and a proposed test,Statistics in Medicine, 7, 649-660.

M.Blangiardo and S.Richardson Statistical tools for synthesizing lists of differentially expressed features in related experiments, Genome Biology, 8, R54

Examples

data = simulation(n=500,GammaA=1,GammaB=1,r1=0.5,r2=0.8,DEfirst=300,DEsecond=200,DEcommon=100)
T<- ratio(data$Pval,interval=0.01,dir="D:/",name="CompData1Data2",pvalue=FALSE)

## The function is currently defined as
function(dir,data,interval=0.01,name,pvalue){
#Define how many lists for the comparison
lists = ncol(data)

if(pvalue==TRUE){
data=data;
}
if(pvalue==FALSE){
data=1-data;
}

ID=seq(1,dim(data)[1])
l=length(seq(0,1,interval))
threshold = seq(0,1,interval)
int=c()
L=matrix(0,l,lists)

    for(i in 1:l){
                temp = data<=threshold[i]
        for(j in 1:lists){
        L[i,j] <- sum(temp[,j])
        temp[temp[,j]==FALSE,j]<-0
        temp[temp[,j]==TRUE,j]<-1
        }
        int[i] <- sum(apply(temp,1,sum)==lists) 
            }

#Calculate the ratio for the number of observed genes/number of expected ones
expected = apply(L,1,prod)/(dim(data)[1])^(lists-1)
ratios = matrix(0,l,1)

for(i in 1:l){
    ratios[i,1] <- int[i]/expected[i]
    }

    #Plot
    ratios=ratios[int>0]
    thresh.ratios=threshold[int>0]
    L = L[int>0,] 
    int=int[int>0]

    if(length(thresh.ratios[ratios>=2])>0){
    q2 = max(thresh.ratios[ratios>=2])
    T2 = ratios[thresh.ratios==q2]
    Tmax = max(ratios)
    qmax = thresh.ratios[ratios==Tmax]
    ps.options(paper="a4",horizontal=TRUE)
    setwd(dir)
    ps.options(horizontal=FALSE)
    postscript(paste("Ratio","_",name,".ps"))
    plot(thresh.ratios,ratios,type="l",
    ylab= "T",xlab="P value",main="",yaxt="n",xaxt="n",cex.main=0.7,cex.axis=1.2,ylim=c(0,(max(ratios,na.rm=TRUE)+sd(ratios,na.rm=TRUE))))
    axis(2, at = c(0,0.5,1,1.5,T2,Tmax), labels = c(0,0.5,1,1.5,expression(T[2]),expression(T[max])), tick = TRUE,cex.axis=0.9)
    if(pvalue==TRUE){
    axis(1, at = c(qmax,q2,seq((q2+0.1),1,0.2)), labels = c(expression(q[max]),expression(q[2]),seq((q2+0.1),1,0.2)), tick=TRUE,cex=0.9)
    }
    
    if(pvalue==FALSE){
    axis(1, at = c(qmax,q2,seq((q2+0.1),1,0.2)), labels = c(expression(q[max]),expression(q[2]),1- seq((q2+0.1),1,0.2)), tick=TRUE,cex=0.9)
    }
    axis(4, at = c(1,T2,Tmax),labels = c(dim(data)[1],int[thresh.ratios==q2],int[thresh.ratios==qmax]),tick=TRUE,cex=0.9)
    dev.off()
    }
    
    if(length(thresh.ratios[ratios>=2])==0){
    Tmax = max(ratios)
    qmax = thresh.ratios[ratios==Tmax]
    ps.options(paper="a4",horizontal=TRUE)
    setwd(dir)
    ps.options(horizontal=FALSE)
    postscript(paste("Ratio","_",name,".ps"))
    plot(thresh.ratios,ratios,type="l",
    ylab= "T",xlab="P value",main="",yaxt="n",xaxt="n",cex.main=0.7,cex.axis=1.2,ylim=c(0,(max(ratios,na.rm=TRUE)+sd(ratios,na.rm=TRUE))))
    axis(2, at = c(0,0.5,1,1.5,Tmax), labels = c(0,0.5,1,1.5,expression(T[max])), tick = TRUE,cex.axis=0.9)
    if(pvalue==TRUE){
    axis(1, at = c(qmax,seq((qmax+0.1),1,0.2)), labels = c(expression(q[max]),seq((qmax+0.1),1,0.2)), tick=TRUE,cex=0.9)
    }
    
    if(pvalue==FALSE){
    axis(1, at = c(qmax,seq((qmax+0.1),1,0.2)), labels = c(expression(q[max]),1- seq((qmax+0.1),1,0.2)), tick=TRUE,cex=0.9)
    }
    axis(4, at = c(1,Tmax),labels = c(dim(data)[1],int[thresh.ratios==qmax]),tick=TRUE,cex=0.9)
    dev.off() 
    }

    return(list(DE = L, ratios=ratios,q=thresh.ratios,DECommon = int,interval=interval,name=name,pvalue=pvalue))

}

[Package BGcom version 1.0 Index]