作者: | Chen, S. X. and Y. L. Qin |
标题: | Chen, S. X. and Y. L. Qin (2010). A two sample test for high dimensional data with application to gene-set testing, The Annals of Statistics, 38, 808-835. |
代码 | |
R-code: #CQ.stat returns the value of the CQ statistic in Chen and Qin (2010) #Inputs are two data matrices: p by n1 and p by n2 #Each column of a data matrix is a p-dim observation CQ.stat=function(X1,X2) { p=dim(X1)[1] # dim n1=dim(X1)[2] # sample size n1 n2=dim(X2)[2] # sample size n2 X1mean=apply(X1,1,mean) # First sample mean X2mean=apply(X2,1,mean) # Second sample mean T1=n1^2*sum(X1mean^2)-sum(X1^2) T2=n2^2*sum(X2mean^2)-sum(X2^2) T12=sum(t(X1mean)%*%X2mean) Tn=T1/(n1*(n1-1))+T2/(n2*(n2-1))-2*T12 # Tn in the numerator of the CQ statistic # estimate of tr(Sigma1^2) trSig1sq=0 seq1=1:n1 for(j in seq1){ for(k in seq1[-j]){ cv1mean=apply(X1[,-c(j,k)],1,mean) trSig1sq=trSig1sq+sum(t(X1[,j])%*%(X1[,k]-cv1mean))*sum(t(X1[,k])%*%(X1[,j]-cv1mean))/(n1*(n1-1)) } } # estimate of tr(Sigma2^2) trSig2sq=0 seq2=1:n2 for(j in seq2){ for(k in seq2[-j]){ cv2mean=apply(X2[,-c(j,k)],1,mean) trSig2sq=trSig2sq+sum(t(X2[,j])%*%(X2[,k]-cv2mean))*sum(t(X2[,k])%*%(X2[,j]-cv2mean))/(n2*(n2-1)) } } # estimate of tr(Sigma1Sigma2) trSig1Sig2=0 for(l in seq1){ cv1mean=apply(X1[,-l],1,mean) for(k in seq2){ cv2mean=apply(X2[,-k],1,mean) trSig1Sig2=trSig1Sig2+sum(t(X1[,l]%*%(X2[,k]-cv2mean)))*sum(t(X2[,k]%*%(X1[,l]-cv1mean)))/(n1*n2) } } # estimate of sigma_n^2=Var(Tn) signsq=2*trSig1sq/(n1*(n1-1))+2*trSig2sq/(n2*(n2-1))+4*trSig1Sig2/(n1*n2) # CQ statistic CQstat=Tn/sqrt(signsq) return(CQstat) } |