###############
# R-code for Peng, LH, S.X. Chen and W, Zhou (2016)
# More Powerful Tests for Sparse High-Dimensional Covariances Matrices,
# Journal of Multivariate Analysis, 149, 124-143.
###############
# function that calculate the test statistic for the identity hypothesis,
# X is the N times P data matrix, and k is the banding width
###############
testIden = function(X, k) {
n = nrow(X)
p = ncol(X)
Y = apply(X, 2, function(y) y-mean(y))
l_i = colSums(Y)
l_ii = colSums(Y*Y)
l_ij = matrix(0, p, p)
l_ijj = matrix(0, p, p)
l_iijj = matrix(0, p, p)
for (i in 1:p) {
for (j in 1:p) {
if (abs(i-j)<=k) {
for (l in 1:n) {
l_ij[i,j] = l_ij[i,j]+Y[l,i]*Y[l,j]
l_ijj[i,j] = l_ijj[i,j]+Y[l,i]*Y[l,j]*Y[l,j]
l_iijj[i,j] = l_iijj[i,j]+Y[l,i]*Y[l,j]*Y[l,i]*Y[l,j]
}
}
}
}
L4 = sum(l_ii)/n
L5 = sum(l_i*l_i-l_ii)/(n*(n-1))
L1=L2=L3=0
for (i in 1:p) {
for (j in 1:p) {
if (abs(i-j)<=k) {
L1 = L1+l_ij[i,j]*l_ij[i,j]-l_iijj[i,j]
L2 = L2+l_i[i]*l_i[j]*l_ij[i,j]-l_i[i]*l_ijj[i,j]+
2*l_iijj[i,j]-l_ij[i,j]*l_ij[i,j]-l_i[j]*l_ijj[j,i]
L3 = L3+l_i[i]*l_i[i]*l_i[j]*l_i[j]-2*l_i[i]*l_i[i]*l_ii[j]+l_ii[i]*l_ii[j]+
2*l_ij[i,j]^2-6*l_iijj[i,j]+8*l_i[i]*l_ijj[i,j]-4*l_i[i]*l_i[j]*l_ij[i,j]
}
}
}
L1 = L1/(n*(n-1))
L2 = L2/(n*(n-1)*(n-2))
L3 = L3/(n*(n-1)*(n-2)*(n-3))
testStat1 = (L1-2*L2+L3)-2*(L4-L5)+p
l_cal1 = rep(0, 2*p*k-k*k-k+p)
l_cal11 = rep(0, 2*p*k-k*k-k+p)
l_cal12 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
l_cal121 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
l_cal1212 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
for (i in 1:p) {
for (j in 1:p) {
if (abs(i-j)<=k) {
s1 = loc_ij_k(i,j,p,k)
for (l in 1:n) {
l_cal1[s1] = l_cal1[s1]+Y[l,i]*Y[l,j]
l_cal11[s1] = l_cal11[s1]+Y[l,i]^2*Y[l,j]^2
}
}
}
}
for (i1 in 1:p) {
for (j1 in 1:p) {
if (abs(i1-j1)<=k) {
for (i2 in 1:p) {
for (j2 in 1:p) {
if (abs(i2-j2)<=k) {
s1 = loc_ij_k(i1,j1,p,k)
s2 = loc_ij_k(i2,j2,p,k)
for (l in 1:n) {
l_cal12[s1,s2] = l_cal12[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]*Y[l,j2]
l_cal121[s1,s2] =l_cal121[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]^2*Y[l,j2]^2
l_cal1212[s1,s2] = l_cal1212[s1,s2]+Y[l,i1]^2*Y[l,j1]^2*Y[l,i2]^2*Y[l,j2]^2
}
}
}
}
}
}
}
tmp1=tmp2=tmp3=0
for (i1 in 1:p) {
for (j1 in 1:p) {
if (abs(i1-j1)<=k) {
for (i2 in 1:p) {
for (j2 in 1:p) {
if (abs(i2-j2)<=k) {
s1 = loc_ij_k(i1,j1,p,k)
s2 = loc_ij_k(i2,j2,p,k)
for (l in 1:n) {
tmp1 = tmp1+l_cal12[s1,s2]^2-l_cal1212[s1,s2]
tmp2 = tmp2+l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]-l_cal1[s1]*l_cal121[s1,s2]+
2*l_cal1212[s1,s2]-l_cal12[s1,s2]^2-l_cal1[s2]*l_cal121[s2,s1]
tmp3 = tmp3+l_cal1[s1]^2*l_cal1[s2]^2-l_cal1[s1]^2*l_cal11[s2]-
l_cal1[s2]^2*l_cal11[s1]+l_cal11[s1]*l_cal11[s2]+2*l_cal12[s1,s2]^2-
6*l_cal1212[s1,s2]+4*l_cal1[s1]*l_cal121[s1,s2]+4*l_cal1[s2]*l_cal121[s2,s1]-
4*l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]
}
}
}
}
}
}
}
tmp1 = tmp1/(n*(n-1))
tmp2 = tmp2/(n*(n-1)*(n-2))
tmp3 = tmp3/(n*(n-1)*(n-2)*(n-3))
varEst = 2*(tmp1-2*tmp2+tmp3)/(n*(n-1))
res = testStat1/sqrt(varEst)
return(res)
}
###############
# function that calculate the test statistic for the shpericity hypothesis,
# X is the N times P data matrix, and k is the banding width
###############
testSphe = function(X, k) {
n = nrow(X)
p = ncol(X)
Y = apply(X, 2, function(y) y-mean(y))
l_i = colSums(Y)
l_ii = colSums(Y*Y)
l_ij = matrix(0, p, p)
l_ijj = matrix(0, p, p)
l_iijj = matrix(0, p, p)
for (i in 1:p) {
for (j in 1:p) {
if (abs(i-j)<=k) {
for (l in 1:n) {
l_ij[i,j] = l_ij[i,j]+Y[l,i]*Y[l,j]
l_ijj[i,j] = l_ijj[i,j]+Y[l,i]*Y[l,j]*Y[l,j]
l_iijj[i,j] = l_iijj[i,j]+Y[l,i]*Y[l,j]*Y[l,i]*Y[l,j]
}
}
}
}
L4 = sum(l_ii)/n
L5 = sum(l_i*l_i-l_ii)/(n*(n-1))
L1=L2=L3=0
for (i in 1:p) {
for (j in 1:p) {
if (abs(i-j)<=k) {
L1 = L1+l_ij[i,j]*l_ij[i,j]-l_iijj[i,j]
L2 = L2+l_i[i]*l_i[j]*l_ij[i,j]-l_i[i]*l_ijj[i,j]+
2*l_iijj[i,j]-l_ij[i,j]*l_ij[i,j]-l_i[j]*l_ijj[j,i]
L3 = L3+l_i[i]*l_i[i]*l_i[j]*l_i[j]-2*l_i[i]*l_i[i]*l_ii[j]+l_ii[i]*l_ii[j]+
2*l_ij[i,j]^2-6*l_iijj[i,j]+8*l_i[i]*l_ijj[i,j]-4*l_i[i]*l_i[j]*l_ij[i,j]
}
}
}
L1 = L1/(n*(n-1))
L2 = L2/(n*(n-1)*(n-2))
L3 = L3/(n*(n-1)*(n-2)*(n-3))
testStat1 = p*(L1-2*L2+L3)/((L4-L5)^2)-1
l_cal1 = rep(0, 2*p*k-k*k-k+p)
l_cal11 = rep(0, 2*p*k-k*k-k+p)
l_cal12 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
l_cal121 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
l_cal1212 = matrix(0, 2*p*k-k*k-k+p, 2*p*k-k*k-k+p)
for (i in 1:p) {
for (j in 1:p) {
if (abs(i-j)<=k) {
s1 = loc_ij_k(i,j,p,k)
for (l in 1:n) {
l_cal1[s1] = l_cal1[s1]+Y[l,i]*Y[l,j]
l_cal11[s1] = l_cal11[s1]+Y[l,i]^2*Y[l,j]^2
}
}
}
}
for (i1 in 1:p) {
for (j1 in 1:p) {
if (abs(i1-j1)<=k) {
for (i2 in 1:p) {
for (j2 in 1:p) {
if (abs(i2-j2)<=k) {
s1 = loc_ij_k(i1,j1,p,k)
s2 = loc_ij_k(i2,j2,p,k)
for (l in 1:n) {
l_cal12[s1,s2] = l_cal12[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]*Y[l,j2]
l_cal121[s1,s2] =l_cal121[s1,s2]+Y[l,i1]*Y[l,j1]*Y[l,i2]^2*Y[l,j2]^2
l_cal1212[s1,s2] = l_cal1212[s1,s2]+Y[l,i1]^2*Y[l,j1]^2*Y[l,i2]^2*Y[l,j2]^2
}
}
}
}
}
}
}
tmp1=tmp2=tmp3=0
for (i1 in 1:p) {
for (j1 in 1:p) {
if (abs(i1-j1)<=k) {
for (i2 in 1:p) {
for (j2 in 1:p) {
if (abs(i2-j2)<=k) {
s1 = loc_ij_k(i1,j1,p,k)
s2 = loc_ij_k(i2,j2,p,k)
for (l in 1:n) {
tmp1 = tmp1+l_cal12[s1,s2]^2-l_cal1212[s1,s2]
tmp2 = tmp2+l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]-l_cal1[s1]*l_cal121[s1,s2]+
2*l_cal1212[s1,s2]-l_cal12[s1,s2]^2-l_cal1[s2]*l_cal121[s2,s1]
tmp3 = tmp3+l_cal1[s1]^2*l_cal1[s2]^2-l_cal1[s1]^2*l_cal11[s2]-
l_cal1[s2]^2*l_cal11[s1]+l_cal11[s1]*l_cal11[s2]+2*l_cal12[s1,s2]^2-
6*l_cal1212[s1,s2]+4*l_cal1[s1]*l_cal121[s1,s2]+4*l_cal1[s2]*l_cal121[s2,s1]-
4*l_cal1[s1]*l_cal1[s2]*l_cal12[s1,s2]
}
}
}
}
}
}
}
tmp1 = tmp1/(n*(n-1))
tmp2 = tmp2/(n*(n-1)*(n-2))
tmp3 = tmp3/(n*(n-1)*(n-2)*(n-3))
varEst = 2*(tmp1-2*tmp2+tmp3)/((L1-2*L2+L3)^2*n*(n-1))
res = testStat1/sqrt(varEst)
return(res)
}
###############
# a useful function
###############
loc_ij_k = function(i, j, p, k) {
tmp = 0
if (k==0) {
tmp = i
} else {
if (i==j) {
tmp = i
} else {
for (l in 1:(k+1)) {
if ((j-i)==l) {
tmp = (2*l-1)*p-l*l+l+i
} else if ((i-j)==l) {
tmp = 2*l*p-l*l+j
}
}
}
}
return(tmp)
}
|