得到需要转换的ID
d<-colnames(exp_data_1)
d<-data.frame(ensembleID=d)
##这样我们就得到了基因的ensembleID,其中点号后面的东西表示更新
###我们需要删除掉
library(stringr)
strsplit("ENSG00000000003.13",".")
strsplit("ENSG00000000003.13","[.]")
strsplit("ENSG00000000003.13","[.]")[[1]][1]
d$ID<-str_split(d$ensembleID,"[.]",simplify = T)[,1]
###http://www.bio-info-trainee.com/710.html
library(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
# [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT"
# [5] "ENSEMBLTRANS" "ENTREZID" "ENZYME" "EVIDENCE"
# [9] "EVIDENCEALL" "GENENAME" "GENETYPE" "GO"
# [13] "GOALL" "IPI" "MAP" "OMIM"
# [17] "ONTOLOGY" "ONTOLOGYALL" "PATH" "PFAM"
# [21] "PMID" "PROSITE" "REFSEQ" "SYMBOL"
# [25] "UCSCKG" "UNIPROT"
g2s<-toTable(org.Hs.egSYMBOL)
g2e<-toTable(org.Hs.egENSEMBL)
g2i<-toTable(org.Hs.eg)
e<-merge(d,g2e,by="ensembl_id",all.x=T)###all.x=T表示没有找到对应关系也将a中的保留下来
f<-merge(e,g2s,by="gene_id",all.x=T)
table(f$ensembl_id)[table(f$ensembl_id)>1]
f<-f[order(f$ID),]
f<-f[!duplicated(f$ID),]
f<-f[match(d$ID,f$ID),]##d所在的顺序,放到f这边来
###现在就得到了相应的基因ID对应顺序
list<-select(org.Hs.eg.db,
keys = f$ensembl_id,
columns=c("ENSEMBL","ENTREZID","SYMBOL"),
keytype="ENSEMBL")
colnames(list)<-c("ensembl_id","ENTREZID","SYMBOL")
f2<-merge(e,list,by="ensembl_id",all.x=T)
f2<-f2[order(f2$ensembl_id),]
f2<-f2[!duplicated(f2$ensembl_id),]
f2<-f2[match(f$ensembl_id,f2$ensembl_id),]
save(f2,file="ID_list.Rdata")
替换表达矩阵的基因ID
dim(exp_data_1)
colnames(exp_data_1)<-f2$SYMBOL
exp_data_1[1:3,1:3]
save(exp_data_1,file="CRC_530_SYMBOL.Rdata")
colnames(exp_data_1)<-f2$ENTREZID
exp_data_1[1:3,1:3]
save(exp_data_1,file="CRC_530_ENTREZID.Rdata")