newline: (windows) Only for text mode
写:
- newline=None (default)
-
newline=''
-
newline='\n'
- other
Summarize: 写文件时, 全部使用 '\n' , newline=None 替换成 os.linesep, newline='' or '\n' 不做替换, newline='\r' or '\r\n' 替换成 '\r' or '\r\n'
读:
原文件
- newline=None
- newline='' or '\n' or '\r' '\r\n'
都没做转换
牵扯到readline() readlines()
定律:
读文件采用默认 newline=None, Python把 \r \n \r\n 都自动转换为 \n, 进行处理
写文件采用 换行统一使用 \n, newline=None时, 写道文件中的是 os.linesep, newline='\n' or '', 写道文件的是 \n
word census 单词统计:
import string def census(file: str, encoding = 'utf-8'): valet = dict() with open(file = file, mode = 'r+t', encoding = encoding, errors = 'strict', newline = None) as f: for line in f: valor = line.split() for k, v in zip(valor, (1,) * len(valor)): k = k.strip(string.punctuation).lower() valet[k] = valet.get(k, 0) + 1 valent = sorted(valet.items(), key = lambda item: item[1], reverse = True) for b in range(10): print(str(valent[b]).strip("'()").replace("'", '').replace(',', ':')) return valent statistic = census('word.txt') # print(statistic)
import string def census(file, encoding = 'utf-8'): valet = dict() with open(file = file, encoding = encoding, errors = 'strict', newline = None) as f: for line in f: valor = line.split() for k, v in zip(valor, (1,) * len(valor)): k = k.strip(string.punctuation).lower() # 排除了开头和结尾的特殊符号 # k = k.strip().lower() if not k: # '' Empty String continue start = 0 for i, v in enumerate(k): if v in set(string.punctuation): # @abc abc@@def lib/posixpath.py valor. if start == i: # @abc abc@@d 开头 or 连续 start += 1 continue valet[k[start:i]] = valet.get(k[start:i], 0) + 1 start = i + 1 else: if k[start:]: # valor.. not need, k已经strip(string.punctuation) # print(k[start:], k, start, 55) valet[k[start:]] = valet.get(k[start:], 0) + 1 valent = sorted(valet.items(), key = lambda item: item[1], reverse = True) for b in range(10): if b < len(valent): print(str(valent[b]).strip("'()").replace("'", "").replace(',', ':')) return valent statistic = census('word.txt') # print(statistic)