sklearn实战-乳腺癌细胞数据挖掘(博主亲自录制视频)
cpg数据库处理_找到未提取的pdf,存放于文件夹Chinese_undeal_pdfs
move_unextracted_pdfs.py
# -*- coding: utf-8 -*-
"""
Created on Sun Sep 18 17:06:15 2016 @author: Administrator
""" # -*- coding: utf-8 -*-
"""
Spyder Editor This is a temporary script file.
"""
import shutil,xlrd excelFilename="unextracted.xlsx"
sheetName="Sheet1"
data = xlrd.open_workbook(excelFilename)
table = data.sheets()[0]
#总pdf列表
totalpdfs_list=table.col_values(0)[1:]
extractedpdfs_list=table.col_values(1)[1:]
#已经提取的pdf文件列表
extractedpdfs_list1=[i for i in extractedpdfs_list if i!=""]
#未被提取的pdf文件列表
unextractedPdfs_list=[i for i in totalpdfs_list if i not in extractedpdfs_list1]
#移动失败的文件列表
failed_files=[] #移动函数,目录里不匹配文件移入unmatching_file文件夹
def RemoveFile():
dir="Chinese_undeal_pdfs"
for file in unextractedPdfs_list:
try:
shutil.move(file,dir)
except:
failed_files.append(file)
continue RemoveFile()
移动英语pdf文件
remove_englishFile.py
# -*- coding: utf-8 -*-
"""
Spyder Editor
remove_englishFile.py
This is a temporary script file.
"""
import shutil,xlrd excelFilename="be_cpg_English.xlsx"
sheetName="Sheet1"
data = xlrd.open_workbook(excelFilename)
table = data.sheets()[0]
EnglishFile_list=table.col_values(0)[1:]
#移动函数,目录里不匹配文件移入unmatching_file文件夹
def RemoveFile():
dir="English"
for file in EnglishFile_list:
shutil.move(file,dir)