R语言ID转换

得到需要转换的ID

d<-colnames(exp_data_1)
d<-data.frame(ensembleID=d)
##这样我们就得到了基因的ensembleID,其中点号后面的东西表示更新
###我们需要删除掉
library(stringr)
strsplit("ENSG00000000003.13",".")
strsplit("ENSG00000000003.13","[.]")
strsplit("ENSG00000000003.13","[.]")[[1]][1]
d$ID<-str_split(d$ensembleID,"[.]",simplify = T)[,1]
###http://www.bio-info-trainee.com/710.html
library(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
# [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT" 
# [5] "ENSEMBLTRANS" "ENTREZID"     "ENZYME"       "EVIDENCE"    
# [9] "EVIDENCEALL"  "GENENAME"     "GENETYPE"     "GO"          
# [13] "GOALL"        "IPI"          "MAP"          "OMIM"        
# [17] "ONTOLOGY"     "ONTOLOGYALL"  "PATH"         "PFAM"        
# [21] "PMID"         "PROSITE"      "REFSEQ"       "SYMBOL"      
# [25] "UCSCKG"       "UNIPROT"    

g2s<-toTable(org.Hs.egSYMBOL)
g2e<-toTable(org.Hs.egENSEMBL)
g2i<-toTable(org.Hs.eg)
e<-merge(d,g2e,by="ensembl_id",all.x=T)###all.x=T表示没有找到对应关系也将a中的保留下来
f<-merge(e,g2s,by="gene_id",all.x=T)

table(f$ensembl_id)[table(f$ensembl_id)>1]

f<-f[order(f$ID),]
f<-f[!duplicated(f$ID),]
f<-f[match(d$ID,f$ID),]##d所在的顺序,放到f这边来
###现在就得到了相应的基因ID对应顺序
list<-select(org.Hs.eg.db,
             keys = f$ensembl_id,
               columns=c("ENSEMBL","ENTREZID","SYMBOL"), 
             keytype="ENSEMBL")
colnames(list)<-c("ensembl_id","ENTREZID","SYMBOL")
f2<-merge(e,list,by="ensembl_id",all.x=T)
f2<-f2[order(f2$ensembl_id),]
f2<-f2[!duplicated(f2$ensembl_id),]
f2<-f2[match(f$ensembl_id,f2$ensembl_id),]
save(f2,file="ID_list.Rdata")

替换表达矩阵的基因ID

dim(exp_data_1)
colnames(exp_data_1)<-f2$SYMBOL
exp_data_1[1:3,1:3]
save(exp_data_1,file="CRC_530_SYMBOL.Rdata")
colnames(exp_data_1)<-f2$ENTREZID
exp_data_1[1:3,1:3]
save(exp_data_1,file="CRC_530_ENTREZID.Rdata")
上一篇:oracle数据导入导出


下一篇:linux内核漏洞提权过程