我制作了一个脚本,它读取给定的输入文件(csv),以某种方式操作数据并写入输出文件(csv).
就我而言,我给定的输入文件如下所示:
| sku | article_name |
| 1 | MyArticle |
对于我的输出文件,我需要重新排列这些列(还有更多,但我认为当有人向我展示方法时我可能能够解决它)
我的输出文件应如下所示:
| article_name | another_column | sku |
| MyArticle | | 1 |
请注意,这是一个新列,不在源 csv 文件中,但无论如何都必须打印它(顺序也很重要)
这是我到目前为止所拥有的:
#!/usr/bin/env python
# -*- coding: latin_1 -*-
import csv
import argparse
import sys
header_mappings = {'attr_artikel_bezeichnung1': 'ARTICLE LABEL',
'sku': 'ARTICLE NUMBER',
'Article label locale': 'Article label locale',
'attr_purchaseprice': 'EK-Preis',
'attr_salesPrice': 'EuroNettoPreis',
'attr_salesunit': 'Einheit',
'attr_salesvatcode': 'MwSt.-Satz',
'attr_suppliercode': 'Lieferantennummer',
'attr_suppliersitemcode': 'Artikelnummer Lieferant',
'attr_isbatchitem': 'SNWarenausgang'}
row_mapping = {'Einheit': {'pc': 'St.'},
'MwSt.-Satz': {'3': '19'}}
def remap_header(header):
for h_map in header_mappings:
if h_map in header:
yield header_mappings.get(h_map), header.get(h_map)
def map_header(header):
for elem in header:
yield elem, header.index(elem)
def read_csv(filename):
with open(filename, 'rb') as incsv:
csv_reader = csv.reader(incsv, delimiter=';')
for r in csv_reader:
yield r
def add_header(header, fields=()):
for f in fields:
header.append(f)
return header
def duplicate(csv_row, header_name, fields):
csv_row[new_csv_header.index(fields)] = csv_row[new_csv_header.index(header_name)]
return csv_row
def do_new_row(csv_row):
for header_name in new_csv_header:
for r_map in row_mapping:
row_content = csv_row[mapped_header.get(r_map)]
if row_content in row_mapping.get(r_map):
csv_row[mapped_header.get(r_map)] = row_mapping.get(r_map).get(row_content)
try:
yield csv_row[mapped_header.get(header_name)]
except TypeError:
continue
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--infile', metavar='CSV')
parser.add_argument('-o', '--outfile', metavar='CSV')
args = parser.parse_args()
arguments = vars(args)
if len(sys.argv[1:]) == 0:
parser.print_usage()
sys.exit(0)
# print arguments
# parse_csv(**arguments)
"""
"""
csv_reader_iter = read_csv(arguments.get('infile'))
# neuer csv header
new_csv_header = list()
csv_header = next(csv_reader_iter)
for h in csv_header:
if h in header_mappings:
new_csv_header.append(header_mappings.get(h))
# print new_csv_header
new_csv_header = add_header(new_csv_header, ('Article label locale', 'Nummer'))
mapped_header = dict(remap_header(dict(map_header(csv_header))))
# print mapped_header
with open(arguments.get('outfile'), 'wb') as outcsv:
csv_writer = csv.writer(outcsv, delimiter=';')
csv_writer.writerow(new_csv_header)
for row in csv_reader_iter:
row = list(do_new_row(row))
delta = len(new_csv_header) - len(row)
if delta > 0:
row = row + (delta * [''])
# duplicate(row, 'SNWarenausgang', 'SNWareneingang')
# duplicate(row, 'SNWarenausgang', 'SNWareneingang')
csv_writer.writerow(row)
print "Done."
"""
print new_csv_header
for row in csv_reader_iter:
row = list(do_new_row(row))
delta = len(new_csv_header) - len(row)
if delta > 0:
row = row + (delta * [''])
duplicate(row, 'Herstellernummer', 'Nummer')
duplicate(row, 'SNWarenausgang', 'SNWareneingang')
print row
"""
现在,尽管它先写着“ARTICLE LABEL”,但 sku 会先打印。我的猜测:这是由于 csv 文件的顺序造成的,因为 sku 是那里的第一个字段......对吗?