Python 标准库速查 — os / sys / pathlib / subprocess / json / csv

pathlib — 现代路径处理

from pathlib import Path

# 路径构建
p = Path.home() / "projects" / "myapp" / "config.json"
# / 运算符自动处理分隔符

# 属性
p.name          # "config.json"
p.stem          # "config"（无后缀）
p.suffix        # ".json"
p.parent        # Path("/home/user/projects/myapp")
p.parts         # ('/', 'home', 'user', 'projects', 'myapp', 'config.json')

# 操作
p.exists()                     # 是否存在
p.is_file() / p.is_dir()       # 类型判断
p.mkdir(parents=True)          # 递归创建目录
text = p.read_text()           # 读取文本
p.write_text("content")        # 写入
data = p.read_bytes()          # 读取二进制

# 遍历
for f in Path("src").rglob("*.cpp"):  # 递归 glob
    print(f)

# 相对路径
Path("a/b/c").relative_to("a")  # Path("b/c")

os / sys / shutil

import os, sys, shutil

# os
os.getcwd()                    # 当前目录
os.chdir("/tmp")               # 切换目录
os.environ["HOME"]             # 环境变量
os.path.join("a", "b")         # 路径拼接（pathlib 更好）
os.path.exists("file.txt")     # 是否存在

# sys
sys.argv                       # 命令行参数
sys.exit(0)                    # 退出
sys.platform                   # "linux" / "darwin" / "win32"

# shutil
shutil.copy("a.txt", "b.txt")  # 复制
shutil.move("a.txt", "/tmp/")  # 移动
shutil.rmtree("/tmp/build")    # 递归删除
shutil.make_archive("dist", "zip", "build/")  # 打包

subprocess — 执行命令

import subprocess

# 运行 + 捕获输出（推荐）
result = subprocess.run(
    ["git", "status", "--short"],
    capture_output=True,
    text=True,          # 返回 str 而非 bytes
    timeout=10,
    check=True          # 非零退出码抛异常
)
print(result.stdout)

# 管道
p1 = subprocess.Popen(["ls"], stdout=subprocess.PIPE)
p2 = subprocess.Popen(["grep", ".cpp"], stdin=p1.stdout,
                       stdout=subprocess.PIPE, text=True)
output, _ = p2.communicate()

# 快捷版（仅获取输出不检查退出码）
out = subprocess.check_output(["echo", "hello"], text=True)

json

import json

# 读取
with open("config.json") as f:
    config = json.load(f)

# 写入
with open("output.json", "w") as f:
    json.dump(data, f, indent=2, ensure_ascii=False)

# 字符串 ↔ 对象
s = json.dumps({"key": "值"}, indent=2)   # 对象 → 字符串
obj = json.loads(s)                         # 字符串 → 对象

# 自定义序列化
class Point:
    def __init__(self, x, y): self.x, self.y = x, y

def point_encoder(obj):
    if isinstance(obj, Point):
        return {"x": obj.x, "y": obj.y}
    raise TypeError

json.dumps(Point(1, 2), default=point_encoder)

csv

import csv

# 读取
with open("data.csv") as f:
    reader = csv.reader(f)
    header = next(reader)        # 跳过表头
    for row in reader:
        print(row[0], row[1])

# 字典读取
with open("data.csv") as f:
    for row in csv.DictReader(f):
        print(row["Name"], row["Age"])

# 写入
with open("out.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["Name", "Age"])       # 表头
    writer.writerows([("Alice", 25), ("Bob", 30)])

# 字典写入
with open("out.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["Name", "Age"])
    writer.writeheader()
    writer.writerow({"Name": "Alice", "Age": 25})

datetime

from datetime import datetime, timedelta

now = datetime.now()
ts = now.timestamp()                         # Unix 时间戳

# 格式化
now.strftime("%Y-%m-%d %H:%M:%S")            # "2025-01-15 14:30:00"
datetime.strptime("2025-01-15", "%Y-%m-%d")  # 字符串 → datetime

# 运算
tomorrow = now + timedelta(days=1)
diff = tomorrow - now  # timedelta

collections

from collections import defaultdict, Counter, deque

# defaultdict — 自动默认值
d = defaultdict(list)
d["key"].append(1)       # 不需要先 d["key"] = []

# Counter — 计数
c = Counter("abracadabra")
c.most_common(3)          # [('a', 5), ('b', 2), ('r', 2)]

# deque — 双端队列
q = deque(maxlen=100)     # 自动丢弃旧元素
q.append(1); q.appendleft(0)

itertools

from itertools import chain, groupby, product, combinations

list(chain([1,2], [3,4]))                     # [1,2,3,4]
list(combinations("ABC", 2))                  # [('A','B'),('A','C'),('B','C')]
list(product("AB", "12"))                     # [('A','1'),('A','2'),('B','1'),('B','2')]