本文汇总了在python中各种类型文件的读取和写入,包含文本、图像、表格、log文件、pickle文件、npy文件、npz文件等。
- 文本类型:txt文件、json文件、yaml文件
- 图像类型:使用skimage、PIL、opencv、imageio/scipy、plt库
- 表格类型:xlsx文件、csv文件
- 其他类型:log文件、pickle文件、npy文件、npz文件
文本类型
txt文件
text_name = 'test.txt'
# 文件写入
lines = ['aaa\n', 'bbb\n', 'ccc\n']
with open(text_name, 'w') as file:
# 逐行写入
for line in lines:
file.write(line)
# 一起写入
file.writelines(lines)
# 文件读取
with open(text_name, 'r') as file:
# 逐行读取
while True:
line = file.readline()
if not line:
break
words = line.split()
# 一起读取
lines = file.readlines()
for line in lines:
words = line.split()
json文件
import json
text_name = 'test.json'
# 文件写入
data = {'1': 1, '2': [{'a': 1}, {'b': 2}, {'c': 3}], 3: {1: '!', '2': '?'}} # key必须是string, 否则也会自动转换为string
with open(text_name, "w") as file:
json.dump(data, file, indent=4)
# 文件读取
with open(text_name, "r") as file:
data = json.load(file)
print(json.dumps(data, indent=4))
yaml文件
import yaml
text_name = 'test.yaml'
# 文件写入
data = {'1': 1, '2': [{'a': 1}, {'b': 2}, {'c': 3}], 3: {1: '!', '2': '?'}}
with open(text_name, "w") as file:
yaml.dump(data, file, indent=4)
# 文件读取
with open(text_name, "r") as file:
data = yaml.load(file, Loader=yaml.FullLoader)
print(yaml.dump(data, indent=4))
图像类型
使用skimage
from skimage import io
image_name = "test.jpg"
# 文件读取
img = io.imread(image_name)
print(type(img), img.dtype, img.shape) # numpy类型, [height, width, channel]
# 文件写入
io.imsave(image_name, img)
使用PIL
from PIL import Image
import numpy as np
image_name = "test.jpg"
# 文件读取
img = Image.open(image_name)
print(type(img), img.size) # PIL类型, [height, width]
img = np.array(img)
print(type(img), img.dtype, img.shape) # numpy类型, [height, width, channel]
# 文件写入
img = Image.fromarray(img)
img.save(image_name)
使用opencv
import cv2
image_name = "test.jpg"
# 文件读取
img = cv2.imread(image_name)
print(type(img), img.dtype, img.shape) # numpy类型, [height, width, channel]
# 文件写入
cv2.imwrite(image_name, img)
使用imageio/scipy
import imageio
from scipy import misc # 新版本scipy中接口已被移除,建议使用imageio替代
image_name = "test.jpg"
# 文件读取
img = imageio.imread(image_name)
print(type(img), img.dtype, img.shape) # imageio类型, [height, width, channel]
# 文件写入
imageio.imwrite(image_name, img)
使用plt
import matplotlib.pyplot as plt
image_name = "test.jpg"
# 文件读取
img = plt.imread(image_name)
print(type(img), img.dtype, img.shape) # numpy类型, [height, width, channel]
# 文件写入
plt.imsave(image_name, img)
表格类型
xlsx文件
import openpyxl
table_name = "test.xlsx"
# 文件写入
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = 'Sheet1'
sheet.cell(1, 1, "TEST") # 下标从[1, 1]开始
line1 = ['aaa', 1]
line2 = ['bbb', 2]
sheet.append(line1)
sheet.append(line2)
workbook.save(table_name)
# 文件读取
workbook = openpyxl.load_workbook(table_name)
sheet = workbook.get_sheet_by_name('Sheet1') # or sheet = workbook.active
print(sheet.title, sheet.max_row, sheet.max_column)
cell = sheet.cell(1, 1)
print(cell.value)
csv文件
import csv
table_name = 'test.csv'
# 文件写入
line1 = ['aaa', 1]
line2 = ['bbb', 2]
with open(table_name, 'w', newline='') as file:
csv_write = csv.writer(file, dialect='excel')
csv_write.writerow(line1)
csv_write.writerow(line2)
# 文件读取
with open(table_name, 'r', newline='') as file:
reader = csv.reader(file)
for row in reader:
print(row)
其他类型
log文件
import logging
logger = logging.getLogger()
logger.setLevel(level=logging.DEBUG)
# 输出到文件
log_name = 'test.log'
filehandler = logging.FileHandler(log_name, mode='w')
filehandler.setLevel(level=logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(filename)s [line:%(lineno)d] - %(levelname)s: %(message)s")
filehandler.setFormatter(formatter)
logger.addHandler(filehandler)
# 输出到屏幕
consolehandler = logging.StreamHandler()
consolehandler.setLevel(level=logging.DEBUG)
logger.addHandler(consolehandler)
logger.info("INFO")
logger.debug("DEBUG")
pickle文件
import pickle
file_name = "test.pickle"
# 写入文件
string = [a*'a' for a in range(10)]
label = [i for i in range(10)]
with open(file_name, 'wb') as f:
pickle.dump([string, label], f, protocol=2)
# 读取文件
with open(file_name, 'rb') as f:
string, label = pickle.load(f)
print(string, label)
npy文件
npy文件用于保存单个numpy类型的数组
import numpy as np
file_name = "test.npy"
# 写入文件
data = np.array([[1, 2], [3, 4]])
np.save(file_name, data)
# 读取文件
data = np.load(file_name)
print(data)
npz文件
npz文件用于同时保存多个numpy类型的数组
import numpy as np
file_name = "test.npz"
# 写入文件
data = {"data_1": np.array([[1, 2], [3, 4]]), "data_2": np.array([[1, 2], [3, 4]]) * 2}
np.savez(file_name, **data)
# 读取文件
data = np.load(file_name)
keys = list(data)
print(keys, data['data_1'], data['data_2'])