###################################################################### # STAT 7010 - Experimental Statistics II # Peng Zeng (Auburn University) # 2023-03-30 ###################################################################### # 6033 x 102 data matrix # The first 50 columns are the gene expression levels 50 control subjects, # while the last 52 columns represent the prostate cancer subjects. filename = "http://hastie.su.domains/CASI_files/DATA/prostmat.csv" prostmat = read.csv(filename) str(prostmat) n.gene = nrow(prostmat) pvalue = numeric(n.gene) for(i in 1:n.gene) { pvalue[i] = t.test(prostmat[i, 1:50], prostmat[i, 51:102], var.equal = TRUE)$p.value } summary(pvalue) sum(pvalue < 0.05) # 477 significant genes sum(pvalue < 0.05 / n.gene) # 2 significant genes using Bonferroni's sum(sort(pvalue) < 0.05 / (n.gene:1)) # 2 significant genes using Holm's sum(sort(pvalue) < 0.05 * (1:n.gene) / n.gene) # 21 significant genes using FDR bon = p.adjust(pvalue, method = "bonferroni") holm = p.adjust(pvalue, method = "holm") BH = p.adjust(pvalue, method = "fdr") par(mar = c(4.5, 4.5, 0.5, 0.5)) plot(sort(pvalue)[1:50], ylab = "p-value") lines(1:50, 1:50 * 0.05/n.gene, col = "red") lines(1:50, 0.05 / (n.gene - 1:50 + 1), col = "blue") legend(0, max(sort(pvalue)[1:50]), c("FDR", "Holm's"), lty = c(1, 1), col = c("red", "blue")) ###################################################################### # THE END ######################################################################