Instead of top 20, this time, I am going to calculate p-value smaller than 0.005 combined.
fileNames <- dir(file.path(getwd()),pattern = "combined.txt", full.names = F)
for (i in 1:length(fileNames)){
data <- read.table(fileNames[i], comment="", header = TRUE)
print(fileNames[i])
print(data)
}
## [1] "smallPvalue_combined.txt"
## tumorType gene trait p_value
## 1 BLCA HIST1H1E DrinksPerWeek_res.txt 0.0011632889
## 2 BLCA FMN2 EA_NG_2018_excluding_23andMe_res.txt 0.0038084374
## 3 BLCA PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 4 BLCA TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 5 BLCA PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 6 BLCA PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 7 BLCA KIAA1522 PD_GWAS_2017_N300k_res.txt 0.0028537287
## 8 BLCA TRAF3IP2 PD_GWAS_2017_N300k_res.txt 0.0007739485
## 9 BLCA ACTB Prostate_cancer_2017_common_res.txt 0.0013771109
## 10 BLCA CREBBP SCZ_Cell_2018_res.txt 0.0010783204
## 11 BLCA RXRA SCZ_Cell_2018_res.txt 0.0047212319
## 12 BLCA MKI67 SmokingCessation_res.txt 0.0026282406
## 13 BLCA TP53 SmokingCessation_res.txt 0.0007401188
## 14 BLCA PHF3 T2D_DIAGRAM_2017_res.txt 0.0021262579
## 15 BLCA RNF111 T2D_DIAGRAM_2017_res.txt 0.0003455066
## 16 BLCA CREBBP UC_NG_2015_res.txt 0.0008594374
## 17 BLCA ARID1B ukbEUR_ASTHMA_res.txt 0.0029883908
## 18 BLCA RNF111 ukbEUR_DIA2_res.txt 0.0008347651
## 19 BLCA RNF111 ukbEUR_HEMORRHOIDS_res.txt 0.0028709435
## 20 BLCA PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 21 BLCA BAP1 ukbEUR_PSYCHIATRIC_res.txt 0.0034998197
## 22 BLCA PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 23 BLCA SF1 ukbEUR_TI_cojo_res.txt 0.0019107998
## 24 BLCA ACTB Vitiligo_res.txt 0.0023826722
## 25 BLCA FMN2 Vitiligo_res.txt 0.0011396484
## 26 BLCA FTH1 Vitiligo_res.txt 0.0002769139
## 27 BRCA PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 28 BRCA TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 29 BRCA PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 30 BRCA PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 31 BRCA TP53 SmokingCessation_res.txt 0.0007401188
## 32 BRCA CBFB T2D_DIAGRAM_2017_res.txt 0.0010473054
## 33 BRCA MAP2K4 ukbEUR_CANCER_res.txt 0.0037016650
## 34 BRCA CDH1 ukbEUR_OSTIOP_res.txt 0.0018216125
## 35 BRCA PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 36 BRCA PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 37 CESC PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 38 CESC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 39 CESC PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 40 CESC PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 41 CESC TP53 SmokingCessation_res.txt 0.0007401188
## 42 CESC ABCA12 T2D_DIAGRAM_2017_res.txt 0.0002164297
## 43 CESC FLG ukbEUR_CARD_res.txt 0.0023720818
## 44 CESC FLG ukbEUR_Height_res.txt 0.0007812806
## 45 CESC PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 46 CESC BAP1 ukbEUR_PSYCHIATRIC_res.txt 0.0034998197
## 47 CESC PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 48 CHOL DNAH5 RA_NG2010.hap3_res.txt 0.0020602637
## 49 CHOL DNAH5 SCZ_Cell_2018_res.txt 0.0047871641
## 50 CHOL BAP1 ukbEUR_PSYCHIATRIC_res.txt 0.0034998197
## 51 CHOL DNAH5 Vitiligo_res.txt 0.0004671867
## 52 ESCA TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 53 ESCA PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 54 ESCA PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 55 ESCA TP53 SmokingCessation_res.txt 0.0007401188
## 56 ESCA PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 57 GBM EGFR EA_NG_2018_excluding_23andMe_res.txt 0.0004740343
## 58 GBM PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 59 GBM TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 60 GBM EGFR IQ_NG_2018_res.txt 0.0006591103
## 61 GBM PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 62 GBM EGFR IQ_NG_2018.1_res.txt 0.0006591103
## 63 GBM PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 64 GBM LZTR1 PD_GWAS_2017_N300k_res.txt 0.0021066294
## 65 GBM GABRA6 SCZ_Cell_2018_res.txt 0.0044397037
## 66 GBM TP53 SmokingCessation_res.txt 0.0007401188
## 67 GBM PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 68 GBM LZTR1 ukbEUR_PSYCHIATRIC_res.txt 0.0005327073
## 69 GBM PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 70 HNSC THSD7A CigarettesPerDay_res.txt 0.0012157936
## 71 HNSC PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 72 HNSC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 73 HNSC PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 74 HNSC PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 75 HNSC KEAP1 PD_GWAS_2017_N300k_res.txt 0.0021675894
## 76 HNSC GPATCH8 RA_NG2010.hap3_res.txt 0.0026242661
## 77 HNSC TP53 SmokingCessation_res.txt 0.0007401188
## 78 HNSC HLA-A ukbEUR_BMI_res.txt 0.0028476582
## 79 HNSC HLA-A ukbEUR_CI_cojo_res.txt 0.0002212435
## 80 HNSC GPATCH8 ukbEUR_OSTIOP_res.txt 0.0023927649
## 81 HNSC PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 82 HNSC NSD1 ukbEUR_PEPTIC_ULCERS_res.txt 0.0048931031
## 83 HNSC NSD1 ukbEUR_PVD_res.txt 0.0011092138
## 84 HNSC PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 85 KICH TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 86 KICH TP53 SmokingCessation_res.txt 0.0007401188
## 87 KICH FRG1 ukbEUR_DYSLIPID_res.txt 0.0033358421
## 88 KIRC PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 89 KIRC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 90 KIRC TP53 SmokingCessation_res.txt 0.0007401188
## 91 KIRC NOS1 ukbEUR_OBESITY_res.txt 0.0006276049
## 92 KIRC VHL ukbEUR_OBESITY_res.txt 0.0023679925
## 93 KIRC PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 94 KIRC BAP1 ukbEUR_PSYCHIATRIC_res.txt 0.0034998197
## 95 LIHC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 96 LIHC KEAP1 PD_GWAS_2017_N300k_res.txt 0.0021675894
## 97 LIHC TP53 SmokingCessation_res.txt 0.0007401188
## 98 LIHC PCDHB16 ukbEUR_CANCER_res.txt 0.0021761148
## 99 LIHC BAP1 ukbEUR_PSYCHIATRIC_res.txt 0.0034998197
## 100 LUAD EGFR EA_NG_2018_excluding_23andMe_res.txt 0.0004740343
## 101 LUAD TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 102 LUAD EGFR IQ_NG_2018_res.txt 0.0006591103
## 103 LUAD EGFR IQ_NG_2018.1_res.txt 0.0006591103
## 104 LUAD KEAP1 PD_GWAS_2017_N300k_res.txt 0.0021675894
## 105 LUAD TP53 SmokingCessation_res.txt 0.0007401188
## 106 LUAD BRAF Vitiligo_res.txt 0.0046410362
## 107 LUSC PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 108 LUSC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 109 LUSC PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 110 LUSC PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 111 LUSC KEAP1 PD_GWAS_2017_N300k_res.txt 0.0021675894
## 112 LUSC TP53 SmokingCessation_res.txt 0.0007401188
## 113 LUSC PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 114 LUSC PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 115 PAAD TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 116 PAAD TP53 SmokingCessation_res.txt 0.0007401188
## 117 PAAD RNF43 Vitiligo_res.txt 0.0022529261
## 118 PRAD PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 119 PRAD TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 120 PRAD PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 121 PRAD PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 122 PRAD TP53 SmokingCessation_res.txt 0.0007401188
## 123 PRAD ETV3 ukbEUR_CANCER_res.txt 0.0038410392
## 124 PRAD PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 125 PRAD PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 126 SARC PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 127 SARC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 128 SARC TP53 SmokingCessation_res.txt 0.0007401188
## 129 SARC PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 130 SKCM PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 131 SKCM TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 132 SKCM TP53 SmokingCessation_res.txt 0.0007401188
## 133 SKCM PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 134 SKCM BRAF Vitiligo_res.txt 0.0046410362
## 135 UCEC PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 136 UCEC TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 137 UCEC PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 138 UCEC PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 139 UCEC LZTR1 PD_GWAS_2017_N300k_res.txt 0.0021066294
## 140 UCEC TP53 SmokingCessation_res.txt 0.0007401188
## 141 UCEC SIN3A SWB_NG_2015_res.txt 0.0046834651
## 142 UCEC RAB3GAP1 ukbEUR_OSTIOA_res.txt 0.0020103956
## 143 UCEC PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 144 UCEC LZTR1 ukbEUR_PSYCHIATRIC_res.txt 0.0005327073
## 145 UCEC PIK3CA ukbEUR_PVD_res.txt 0.0024078293
## 146 UCS PTEN EA_NG_2018_excluding_23andMe_res.txt 0.0014878141
## 147 UCS TP53 EA_NG_2018_excluding_23andMe_res.txt 0.0034598303
## 148 UCS FOXA2 IQ_NG_2018_res.txt 0.0031145590
## 149 UCS PIK3CA IQ_NG_2018_res.txt 0.0028632984
## 150 UCS FOXA2 IQ_NG_2018.1_res.txt 0.0031145590
## 151 UCS PIK3CA IQ_NG_2018.1_res.txt 0.0028632984
## 152 UCS TP53 SmokingCessation_res.txt 0.0007401188
## 153 UCS PTEN ukbEUR_OSTIOP_res.txt 0.0034648313
## 154 UCS PIK3CA ukbEUR_PVD_res.txt 0.0024078293
R code for the tables:
df <- data.frame(tumorType = c(),gene=c(), trait=c(), p_value=c())
for (i in 1:length(folderNames)){
#dir(file.path(getwd(),folderNames[i]), pattern = "20211011", full.names=F)
tumorName <- folderNames[i]
fileNames <- dir(file.path(getwd(),folderNames[i]),pattern = "res.txt", full.names = F) #This will create a vector that contains the list of file names ending with "res.txt".
for (j in 1:length(fileNames)){
traitName<- fileNames[j]
print(traitName)
data <- read.table(file.path(getwd(),tumorName,traitName), comment="", header = TRUE)
v_pval<-c()
for (k in 1:nrow(data)){
if (is.na(data[k,1])){
data[k,1] <- 1
}
if(is.na(data[k,2])){
data[k,1] <- 1
}
if (data[k,1] > data[k,2]){
v_pval[k]<- data[k,2]
} else{ v_pval[k] <- data[k,1] }
}
s_pval<- which(v_pval < 0.005)
number<- length(s_pval)
print(s_pval)
small_pval<- c()
q<-1
for (l in s_pval){
small_pval[q] <- v_pval[l]
q<-q+1
}
print(length(small_pval))
q<-1
geneN<- c()
for (o in s_pval){
geneN[q] <- row.names(data)[o]
q<-q+1
}
a<- rep(tumorName,number)
c<- rep(traitName,number)
final<-data.frame(a,geneN,c,small_pval)
df<- rbind(df, final)
}
}
## Error in eval(expr, envir, enclos): object 'folderNames' not found
colnames(df)<- c("tumorType","gene", "trait", "p_value")
## Error in names(x) <- value: 'names' attribute [4] must be the same length as the vector [0]
write.table(df, file = "smallPvalue_combined.txt", sep = "\t", quote = FALSE, row.names=FALSE)