python3 csv/xls/json/pickle 等序列化反序列化代码速查

Posted on Sun 12 November 2017 in 遗迹

全部

# pandas 秒天秒地
# 条件有限的时候再考虑别的
import pandas as pd
import numpy as np

df = pd.read_clipboard()
df = pd.read_csv(data_or_path)
df = pd.read_html(data_or_path)
df = pd.read_json(data_or_path)
df = pd.read_msgpack(data_or_path)
df = pd.read_pickle(data_or_path)

df.to_clipboard()
df.to_csv()
df.to_csv(fn)
df.to_excel(fn)
df.to_html()
df.to_html(fn)
df.to_json()
df.to_json(fn)
df.to_msgpack()
df.to_msgpack(fn)
df.to_pickle(fn)

# 转 dict 时 orient
# orient : str {‘dict’, ‘list’, ‘series’, ‘split’, ‘records’, ‘index’}
# Determines the type of the values of the dictionary.
#    dict (default) : dict like {column -> {index -> value}}
#    list : dict like {column -> [values]}
#    series : dict like {column -> Series(values)}
#    split : dict like {index -> [index], columns -> [columns], data -> [values]}
#    records : list like [{column -> value}, ... , {column -> value}]
#    index : dict like {index -> {column -> value}}


# 转 json 时 orient
# orient : string
# The format of the JSON string
#    split : dict like {index -> [index], columns -> [columns], data -> [values]}
#    records : list like [{column -> value}, ... , {column -> value}]
#    index : dict like {index -> {column -> value}}
#    columns : dict like {column -> {index -> value}}
#    values : just the values array
#    table : dict like {‘schema’: {schema}, ‘data’: {data}} describing the data, and the data component is like orient='records'.

# 其余不一一列出，详见
# http://pandas.pydata.org/pandas-docs/stable/api.html#id12

csv

# 读取
import csv
reader = csv.reader(open('test.csv', 'r', encoding='utf-8'))
reader = csv.reader(open('test.csv', 'r', encoding='cp936'))  # for excel

# 写入
import csv
info = []
writer = csv.writer(open('ret.csv', 'w', newline='', encoding='utf-8'))
writer = csv.writer(open('ret.csv', 'w', newline='', encoding='utf_8_sig'))  # BOM utf-8, 这样excel不乱码
writer.writerows(info)

xls

尽量用 pandas，不然处理时间啥的还要费力

# 单列读取
import xlrd
xls = xlrd.open_workbook('test.xls')
sheet = xls.sheet_by_name('Sheet1')
rowA = sheet.row_values(0) # 第一行
colA = sheet.col_values(0) # 第一列

# 全部读取
import xlrd
xls = xlrd.open_workbook('test.xls')
sheet = xls.sheet_by_name('Sheet1')
data = [sheet.row_values(x) for x in range(sheet.nrows)]

# 写入
# 摸了，用 pandas 或 csv, utf-8 with bom 输出吧

json

# 读取
import json
info = json.loads(open('data.json', 'r').read())

# 写入
import json
open('data.json', 'w').write(json.dumps(info))

pickle

# 读取
import pickle
info = pickle.loads(open('data.pkl', 'rb').read())

# 写入
import pickle
open('data.pkl', 'wb').write(pickle.dumps(info))

纯文本

# 读取
txt = open(fn, 'r', encoding='utf-8').read()

# 写入
open(fn, 'w', encoding='utf-8').write(txt)

二进制

# 读取
data = open(fn, 'rb').read()

# 写入
open(fn, 'wb').write(data)

P.S. 其中很多一直在用但一直没统一整理，这样就方便很多了。内容实际大部分是月初填充的，但觉得太水不好意思单发。正好今天发个文，这篇算是附带的。