info <- read.csv("Traits.csv")
ts<-info[,c(1,3)]
print(ts)
## Trait Sample.Size
## 1 Intelligence 269,867
## 2 Rheumatoid arthritis 41,282
## 3 Smoking cessation 547,219
## 4 Education attainment 766,345
## 5 Vitiligo 44,266
## 6 Smoking initiation 1,232,091
## 7 Major depressive disorder 101,498
## 8 Alcohol consumption 941,280
## 9 Ulcerative Colitis 27,432
## 10 Subjective well-being 298,420
## 11 Total body bone mineral density 66,628
## 12 Stroke 446,696
## 13 Type 2 diabetes 158,186
## 14 Bipolar disorder 41,653
## 15 Schizophrenia 65,967
## 16 Parkinson's disease 308,518
## 17 Inflammatory bowel disease 34,652
## 18 Breast cancer 228,951
## 19 Cigarette per day 262,014
## 20 Allergic disease 59,832
## 21 Ovarian cancer 66,450
## 22 Prostate cancer 140,254
## 23 Asthma 452,272
## 24 Height 344,664
## 25 Disease status 452,272
## 26 Obesity 258,442
## 27 Iron deficiency anemias 452,272
## 28 Cancer 452,272
## 29 Varicose veins of lower extremities 452,272
## 30 Body mass index 344,306
## 31 Osteoarthritis 452,272
## 32 Hypertensive disease 452,272
## 33 Coffee intake 421,947
## 34 Type 2 diabetes 452,272
## 35 Irritable bowel syndrome 452,272
## 36 Hernia abdominopelvic cavity 452,272
## 37 Severe obesity 157,142
## 38 Cardiovascular disease 452,272
## 39 Dyslipidemia 452,272
## 40 Tea intake 440,094
## 41 Peptic ulcers 452,272
## 42 Allergic rhinitis 452,272
## 43 Psychiatric disorder 452,272
## 44 Peripheral vascular disease 452,272
## 45 Osteoporosis 452,272
## 46 Hemorrhoids 452,272
There are 46 traits with the different sample sizes.
To find distribution of each trait is
library(stringr)
fileName <- "TCGA_HM3_Allergic_disease.profile"
AD <- file(fileName,open="r")
ADPRS <-word(readLines(AD),-1)
ADPRS<- as.numeric(ADPRS[2:length(ADPRS)])
#summary(ADPRS)
AllergicDisease_Histo<-hist(ADPRS,main=fileName, xlab="PRS")
library(stringr)
fileName <- "TCGA_HM3_Breast_Cancer.profile"
BC <- file(fileName,open="r")
BCPRS <-word(readLines(BC),-1)
BCPRS<- as.numeric(BCPRS[2:length(BCPRS)])
#summary(BCPRS)
BreastCancer_Histo<-hist(BCPRS, main=fileName, xlab="PRS")
To make multiple histogram by using ‘for loops’,
library(stringr)
file_names <- info[,5]
#file_names
for (filename in file_names){
PRS <- word(readLines(file(filename,open="r")),-1)
PRS <- as.numeric(PRS[2:length(PRS)])
hist(PRS, main=filename)
}
I saved this file as jihyun_PRShist.R (to run this code in terminal)>>> Rscript jihyun_PRShisto.R (histogram files will be made separately in the folder that the script was in)