Python版csv文件utf8转ansi小程序

为什么会有这个呢?因为 CSV 文件默认是 UTF-8 格式,但是在 excel 中打开的时候会显示乱码,虽然网上有很多教程教你如何通过 excel 的数据导入功能导入,但是稍显繁琐。实际上最方便的方式是用记事本打开 csv 文件然后另存为 ansi 格式即可,但是在有多个文件需要转换的时候就比较繁琐了。
这个小程序能进行批量转换,只需要将需要转换的csv文件或者文件夹拖入程序,点击开始转换即可。
csv_to_ansi.py源码:
# csv_to_ansi_gui_drag.py
import sys
import os
import traceback
from pathlib import Path
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
# 需要安装 tkinterdnd2:pip install tkinterdnd2
from tkinterdnd2 import DND_FILES, TkinterDnD
DEFAULT_TARGET_ENCODING = "cp936" # Windows 中文 ANSI
def find_csvs_in_path(p: Path, recursive=True):
files = []
if p.is_file():
if p.suffix.lower() == ".csv":
files.append(p)
elif p.is_dir():
if recursive:
for fp in p.rglob("*.csv"):
files.append(fp)
else:
for fp in p.glob("*.csv"):
files.append(fp)
return files
def convert_file(src_path: Path, target_encoding=DEFAULT_TARGET_ENCODING, overwrite=False):
try:
text = None
for enc in ("utf-8-sig", "utf-8", "latin1"):
try:
with src_path.open("r", encoding=enc) as f:
text = f.read()
read_enc = enc
break
except:
continue
if text is None:
return False, f"无法读取文件:{src_path}", None
if overwrite:
out_path = src_path
else:
out_name = src_path.stem + "_ANSI" + src_path.suffix
out_path = src_path.with_name(out_name)
i = 1
while out_path.exists():
out_path = src_path.with_name(f"{src_path.stem}_ANSI({i}){src_path.suffix}")
i += 1
with out_path.open("w", encoding=target_encoding, errors="replace", newline="") as f:
f.write(text)
return True, f"转换成功 -> {out_path}", out_path
except Exception as e:
tb = traceback.format_exc()
return False, f"转换出错:{e}\n{tb}", None
def process_paths(paths, target_encoding=DEFAULT_TARGET_ENCODING, recursive=True, overwrite=False, callback=None):
results = []
file_list = []
for p in paths:
ppath = Path(p)
if ppath.is_dir():
file_list.extend(find_csvs_in_path(ppath, recursive=recursive))
elif ppath.is_file():
if ppath.suffix.lower() == ".csv":
file_list.append(ppath)
seen = set()
uniq = []
for f in file_list:
if str(f.resolve()) not in seen:
seen.add(str(f.resolve()))
uniq.append(f)
for idx, f in enumerate(uniq, start=1):
if callback:
callback(f, idx, len(uniq))
ok, msg, out = convert_file(f, target_encoding=target_encoding, overwrite=overwrite)
results.append((f, ok, msg, out))
return results
class App:
def __init__(self, root):
self.root = root
root.title("CSV UTF-8 -> ANSI 转换器")
root.geometry("720x420")
root.minsize(620, 360)
frm = ttk.Frame(root, padding=10)
frm.pack(fill="both", expand=True)
top = ttk.Frame(frm)
top.pack(fill="x", pady=(0,8))
ttk.Label(top, text="目标编码:").pack(side="left")
self.encoding_var = tk.StringVar(value=DEFAULT_TARGET_ENCODING)
encoding_options = [("cp936 (GBK - 中文 Windows ANSI)", "cp936"),
("utf-8-sig (UTF-8 with BOM - Excel modern)", "utf-8-sig"),
("latin1 (ISO-8859-1)", "latin1"),
("windows-1252", "cp1252")]
self.encoding_combo = ttk.Combobox(top, state="readonly",
values=[o[0] for o in encoding_options],
width=35)
self._enc_map = {o[0]: o[1] for o in encoding_options}
self.encoding_combo.set(encoding_options[0][0])
self.encoding_combo.pack(side="left", padx=(5,15))
self.overwrite_var = tk.BooleanVar(value=False)
ttk.Checkbutton(top, text="覆盖源文件(谨慎)", variable=self.overwrite_var).pack(side="left")
mid = ttk.Frame(frm)
mid.pack(fill="both", expand=True)
left = ttk.Frame(mid)
left.pack(side="left", fill="both", expand=True)
ttk.Label(left, text="待转换文件(拖拽或点击添加):").pack(anchor="w")
self.listbox = tk.Listbox(left, selectmode="extended")
self.listbox.pack(fill="both", expand=True, padx=(0,6), pady=6)
# 支持拖拽文件到 listbox
self.listbox_drop_target_register()
btns = ttk.Frame(left)
btns.pack(fill="x")
ttk.Button(btns, text="添加文件", command=self.add_files).pack(side="left", padx=3)
ttk.Button(btns, text="添加文件夹", command=self.add_folder).pack(side="left", padx=3)
ttk.Button(btns, text="移除所选", command=self.remove_selected).pack(side="left", padx=3)
ttk.Button(btns, text="清空列表", command=self.clear_list).pack(side="left", padx=3)
right = ttk.Frame(mid, width=260)
right.pack(side="right", fill="y")
ttk.Label(right, text="日志 / 进度:").pack(anchor="w")
self.log = tk.Text(right, width=40, height=15, state="disabled", wrap="word")
self.log.pack(fill="both", expand=True, pady=6)
self.progress = ttk.Progressbar(frm, orient="horizontal", mode="determinate")
self.progress.pack(fill="x", pady=(6,0))
action = ttk.Frame(frm)
action.pack(fill="x", pady=8)
ttk.Button(action, text="开始转换", command=self.start_conversion).pack(side="left", padx=6)
ttk.Button(action, text="打开输出目录", command=self.open_out_dir).pack(side="left", padx=6)
ttk.Button(action, text="退出", command=root.quit).pack(side="right", padx=6)
# ---------------- Drag & Drop ----------------
def listbox_drop_target_register(self):
# 如果 tk.Tk() 是 TkinterDnD.Tk() 才支持
if isinstance(self.root, TkinterDnD.Tk):
self.listbox.drop_target_register(DND_FILES)
self.listbox.dnd_bind('<<Drop>>', self.drop_files)
def drop_files(self, event):
# event.data 是文件路径列表字符串,格式可能带 {}
files = self.root.splitlist(event.data)
for f in files:
self.listbox.insert("end", f)
# ---------------- GUI functions ----------------
def add_files(self):
paths = filedialog.askopenfilenames(title="选择 CSV 文件", filetypes=[("CSV 文件","*.csv"),("所有文件","*.*")])
for p in paths:
self.listbox.insert("end", p)
def add_folder(self):
folder = filedialog.askdirectory(title="选择文件夹")
if folder:
self.listbox.insert("end", folder)
def remove_selected(self):
sel = list(self.listbox.curselection())
for i in reversed(sel):
self.listbox.delete(i)
def clear_list(self):
self.listbox.delete(0,"end")
def log_append(self, text):
self.log.configure(state="normal")
self.log.insert("end", text+"\n")
self.log.see("end")
self.log.configure(state="disabled")
def open_out_dir(self):
try:
first = self.listbox.get(0)
p = Path(first)
folder = str(p.parent if p.is_file() else p)
except:
folder = os.getcwd()
if sys.platform.startswith("win"):
os.startfile(folder)
else:
messagebox.showinfo("提示", f"请手动打开:{folder}")
def start_conversion(self):
items = list(self.listbox.get(0,"end"))
if not items:
messagebox.showwarning("没有文件","请添加要转换的文件或文件夹")
return
enc_display = self.encoding_combo.get()
target_enc = self._enc_map.get(enc_display, DEFAULT_TARGET_ENCODING)
overwrite = self.overwrite_var.get()
self.log_append(f"开始处理 {len(items)} 条路径,目标编码: {target_enc}。 overwrite={overwrite}")
self.progress["value"] = 0
self.root.update_idletasks()
def cb(fpath, idx, total):
self.log_append(f"[{idx}/{total}] 处理: {fpath}")
try:
self.progress["value"] = int((idx-1)/total*100)
except:
pass
self.root.update_idletasks()
results = process_paths(items, target_encoding=target_enc, recursive=True, overwrite=overwrite, callback=cb)
self.progress["value"] = 100
succ = sum(1 for r in results if r[1])
fail = len(results)-succ
for _, ok, msg, out in results:
self.log_append(msg)
self.log_append(f"完成:成功 {succ},失败 {fail}。")
messagebox.showinfo("完成", f"转换完成:成功 {succ},失败 {fail}")
def main():
# 使用 TkinterDnD.Tk() 替代 tk.Tk() 支持拖拽
root = TkinterDnD.Tk()
app = App(root)
# 如果程序启动时带了文件参数,自动添加到列表
for p in sys.argv[1:]:
app.listbox.insert("end", p)
root.mainloop()
if __name__ == "__main__":
main()