python3 csv/xls/json/pickle 等序列化反序列化代码速查
Posted on Sun 12 November 2017 in 遗迹
全部
# pandas 秒天秒地
# 条件有限的时候再考虑别的
import pandas as pd
import numpy as np
df = pd.read_clipboard()
df = pd.read_csv(data_or_path)
df = pd.read_html(data_or_path)
df = pd.read_json(data_or_path)
df = pd.read_msgpack(data_or_path)
df = pd.read_pickle(data_or_path)
df.to_clipboard()
df.to_csv()
df.to_csv(fn)
df.to_excel(fn)
df.to_html()
df.to_html(fn)
df.to_json()
df.to_json(fn)
df.to_msgpack()
df.to_msgpack(fn)
df.to_pickle(fn)
# 转 dict 时 orient
# orient : str {‘dict’, ‘list’, ‘series’, ‘split’, ‘records’, ‘index’}
# Determines the type of the values of the dictionary.
# dict (default) : dict like {column -> {index -> value}}
# list : dict like {column -> [values]}
# series : dict like {column -> Series(values)}
# split : dict like {index -> [index], columns -> [columns], data -> [values]}
# records : list like [{column -> value}, ... , {column -> value}]
# index : dict like {index -> {column -> value}}
# 转 json 时 orient
# orient : string
# The format of the JSON string
# split : dict like {index -> [index], columns -> [columns], data -> [values]}
# records : list like [{column -> value}, ... , {column -> value}]
# index : dict like {index -> {column -> value}}
# columns : dict like {column -> {index -> value}}
# values : just the values array
# table : dict like {‘schema’: {schema}, ‘data’: {data}} describing the data, and the data component is like orient='records'.
# 其余不一一列出,详见
# http://pandas.pydata.org/pandas-docs/stable/api.html#id12
csv
# 读取
import csv
reader = csv.reader(open('test.csv', 'r', encoding='utf-8'))
reader = csv.reader(open('test.csv', 'r', encoding='cp936')) # for excel
# 写入
import csv
info = []
writer = csv.writer(open('ret.csv', 'w', newline='', encoding='utf-8'))
writer = csv.writer(open('ret.csv', 'w', newline='', encoding='utf_8_sig')) # BOM utf-8, 这样excel不乱码
writer.writerows(info)
xls
尽量用 pandas,不然处理时间啥的还要费力
# 单列读取
import xlrd
xls = xlrd.open_workbook('test.xls')
sheet = xls.sheet_by_name('Sheet1')
rowA = sheet.row_values(0) # 第一行
colA = sheet.col_values(0) # 第一列
# 全部读取
import xlrd
xls = xlrd.open_workbook('test.xls')
sheet = xls.sheet_by_name('Sheet1')
data = [sheet.row_values(x) for x in range(sheet.nrows)]
# 写入
# 摸了,用 pandas 或 csv, utf-8 with bom 输出吧
json
# 读取
import json
info = json.loads(open('data.json', 'r').read())
# 写入
import json
open('data.json', 'w').write(json.dumps(info))
pickle
# 读取
import pickle
info = pickle.loads(open('data.pkl', 'rb').read())
# 写入
import pickle
open('data.pkl', 'wb').write(pickle.dumps(info))
纯文本
# 读取
txt = open(fn, 'r', encoding='utf-8').read()
# 写入
open(fn, 'w', encoding='utf-8').write(txt)
二进制
# 读取
data = open(fn, 'rb').read()
# 写入
open(fn, 'wb').write(data)
P.S. 其中很多一直在用但一直没统一整理,这样就方便很多了。内容实际大部分是月初填充的,但觉得太水不好意思单发。正好今天发个文,这篇算是附带的。