我当前的流程涉及循环遍历源目录并将每个文件的名称添加到 python 中的数据帧中。我也想获取每个文件的修改日期
import datetime
import os
import pandas as pd
#set src directory
os.chdir('C:/Users/jj/Desktop/do/Claims/globmove')
def read_files(filenames):
result = []
for filename in filenames:
file = read_sheets(filename)
file['Filename'] = filename
result.append(file)
return pd.concat(result, ignore_index=True)
def modification_date(filename):
t = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(t)
folder_path = os.path.abspath('C:/Users/jj/Desktop/do/Claims/globmove')
files = [file for file in os.listdir(folder_path) if file.endswith(".xlsx")]
dfooc = read_files(files)
我可以毫无错误地运行它,但日期修改的时间戳当前不会附加到最终的数据帧-dfooc。如何获取要追加的修改日期?
编辑:更改上面原始代码的顺序后出现缩进错误
def read_files(filenames):
result = []
for filename in filenames:
file = read_sheets(filename)
file['Filename'] = filename
def modification_date(filename):
t = os.path.getmtime(filename)
return datetime.datetime.fromtimestamp(t)
file['ModificationDate'] = filename
result.append(file)
return pd.concat(result, ignore_index=True)
return pd.concat(result, ignore_index=True)
^
IndentationError: unexpected indent
我是这样做的。
import os
from pathlib import Path
import pandas as pd
import pendulum
class FileDates:
def __init__(self, **kwargs):
self.file_type = kwargs.get("file_type")
self.file_path = kwargs.get("file_path")
self.path = kwargs.get("path")
self.tz = pendulum.now().timezone.name
def main(self) -> pd.DataFrame:
files = self.get_files()
dates = self.get_dates(files)
return pd.DataFrame(list(zip([str(Path(x)).split("/")[-1] for x in files], dates)), columns=["file", "date"])
def get_files(self) -> list:
files = [str(x) for x in self.file_path.rglob("*") if x.is_file()]
return [x for x in files if self.file_type in x]
def get_dates(self, files: list) -> list:
return [pendulum.from_timestamp(os.path.getmtime(Path(x))).in_tz(self.tz).to_date_string() for x in files]
file_type = ".xlsx"
file_path = Path(f"{Path.home()}/Desktop/do/Claims/globmove/")
data = FileDates(file_type=file_type, file_path=file_path).main()
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)