Python版csv文件utf8转ansi小程序


为什么会有这个呢?因为 CSV 文件默认是 UTF-8 格式,但是在 excel 中打开的时候会显示乱码,虽然网上有很多教程教你如何通过 excel 的数据导入功能导入,但是稍显繁琐。实际上最方便的方式是用记事本打开 csv 文件然后另存为 ansi 格式即可,但是在有多个文件需要转换的时候就比较繁琐了。

这个小程序能进行批量转换,只需要将需要转换的csv文件或者文件夹拖入程序,点击开始转换即可。

csv_to_ansi.py源码:

# csv_to_ansi_gui_drag.py
import sys
import os
import traceback
from pathlib import Path
import tkinter as tk
from tkinter import filedialog, messagebox, ttk

# 需要安装 tkinterdnd2:pip install tkinterdnd2
from tkinterdnd2 import DND_FILES, TkinterDnD

DEFAULT_TARGET_ENCODING = "cp936"  # Windows 中文 ANSI

def find_csvs_in_path(p: Path, recursive=True):
    files = []
    if p.is_file():
        if p.suffix.lower() == ".csv":
            files.append(p)
    elif p.is_dir():
        if recursive:
            for fp in p.rglob("*.csv"):
                files.append(fp)
        else:
            for fp in p.glob("*.csv"):
                files.append(fp)
    return files

def convert_file(src_path: Path, target_encoding=DEFAULT_TARGET_ENCODING, overwrite=False):
    try:
        text = None
        for enc in ("utf-8-sig", "utf-8", "latin1"):
            try:
                with src_path.open("r", encoding=enc) as f:
                    text = f.read()
                read_enc = enc
                break
            except:
                continue
        if text is None:
            return False, f"无法读取文件:{src_path}", None

        if overwrite:
            out_path = src_path
        else:
            out_name = src_path.stem + "_ANSI" + src_path.suffix
            out_path = src_path.with_name(out_name)
            i = 1
            while out_path.exists():
                out_path = src_path.with_name(f"{src_path.stem}_ANSI({i}){src_path.suffix}")
                i += 1

        with out_path.open("w", encoding=target_encoding, errors="replace", newline="") as f:
            f.write(text)

        return True, f"转换成功 -> {out_path}", out_path
    except Exception as e:
        tb = traceback.format_exc()
        return False, f"转换出错:{e}\n{tb}", None

def process_paths(paths, target_encoding=DEFAULT_TARGET_ENCODING, recursive=True, overwrite=False, callback=None):
    results = []
    file_list = []
    for p in paths:
        ppath = Path(p)
        if ppath.is_dir():
            file_list.extend(find_csvs_in_path(ppath, recursive=recursive))
        elif ppath.is_file():
            if ppath.suffix.lower() == ".csv":
                file_list.append(ppath)

    seen = set()
    uniq = []
    for f in file_list:
        if str(f.resolve()) not in seen:
            seen.add(str(f.resolve()))
            uniq.append(f)

    for idx, f in enumerate(uniq, start=1):
        if callback:
            callback(f, idx, len(uniq))
        ok, msg, out = convert_file(f, target_encoding=target_encoding, overwrite=overwrite)
        results.append((f, ok, msg, out))
    return results

class App:
    def __init__(self, root):
        self.root = root
        root.title("CSV UTF-8 -> ANSI 转换器")
        root.geometry("720x420")
        root.minsize(620, 360)

        frm = ttk.Frame(root, padding=10)
        frm.pack(fill="both", expand=True)

        top = ttk.Frame(frm)
        top.pack(fill="x", pady=(0,8))

        ttk.Label(top, text="目标编码:").pack(side="left")
        self.encoding_var = tk.StringVar(value=DEFAULT_TARGET_ENCODING)
        encoding_options = [("cp936 (GBK - 中文 Windows ANSI)", "cp936"),
                            ("utf-8-sig (UTF-8 with BOM - Excel modern)", "utf-8-sig"),
                            ("latin1 (ISO-8859-1)", "latin1"),
                            ("windows-1252", "cp1252")]
        self.encoding_combo = ttk.Combobox(top, state="readonly",
                                           values=[o[0] for o in encoding_options],
                                           width=35)
        self._enc_map = {o[0]: o[1] for o in encoding_options}
        self.encoding_combo.set(encoding_options[0][0])
        self.encoding_combo.pack(side="left", padx=(5,15))

        self.overwrite_var = tk.BooleanVar(value=False)
        ttk.Checkbutton(top, text="覆盖源文件(谨慎)", variable=self.overwrite_var).pack(side="left")

        mid = ttk.Frame(frm)
        mid.pack(fill="both", expand=True)

        left = ttk.Frame(mid)
        left.pack(side="left", fill="both", expand=True)

        ttk.Label(left, text="待转换文件(拖拽或点击添加):").pack(anchor="w")

        self.listbox = tk.Listbox(left, selectmode="extended")
        self.listbox.pack(fill="both", expand=True, padx=(0,6), pady=6)

        # 支持拖拽文件到 listbox
        self.listbox_drop_target_register()
        
        btns = ttk.Frame(left)
        btns.pack(fill="x")
        ttk.Button(btns, text="添加文件", command=self.add_files).pack(side="left", padx=3)
        ttk.Button(btns, text="添加文件夹", command=self.add_folder).pack(side="left", padx=3)
        ttk.Button(btns, text="移除所选", command=self.remove_selected).pack(side="left", padx=3)
        ttk.Button(btns, text="清空列表", command=self.clear_list).pack(side="left", padx=3)

        right = ttk.Frame(mid, width=260)
        right.pack(side="right", fill="y")

        ttk.Label(right, text="日志 / 进度:").pack(anchor="w")
        self.log = tk.Text(right, width=40, height=15, state="disabled", wrap="word")
        self.log.pack(fill="both", expand=True, pady=6)

        self.progress = ttk.Progressbar(frm, orient="horizontal", mode="determinate")
        self.progress.pack(fill="x", pady=(6,0))

        action = ttk.Frame(frm)
        action.pack(fill="x", pady=8)
        ttk.Button(action, text="开始转换", command=self.start_conversion).pack(side="left", padx=6)
        ttk.Button(action, text="打开输出目录", command=self.open_out_dir).pack(side="left", padx=6)
        ttk.Button(action, text="退出", command=root.quit).pack(side="right", padx=6)

    # ---------------- Drag & Drop ----------------
    def listbox_drop_target_register(self):
        # 如果 tk.Tk() 是 TkinterDnD.Tk() 才支持
        if isinstance(self.root, TkinterDnD.Tk):
            self.listbox.drop_target_register(DND_FILES)
            self.listbox.dnd_bind('<<Drop>>', self.drop_files)

    def drop_files(self, event):
        # event.data 是文件路径列表字符串,格式可能带 {}
        files = self.root.splitlist(event.data)
        for f in files:
            self.listbox.insert("end", f)

    # ---------------- GUI functions ----------------
    def add_files(self):
        paths = filedialog.askopenfilenames(title="选择 CSV 文件", filetypes=[("CSV 文件","*.csv"),("所有文件","*.*")])
        for p in paths:
            self.listbox.insert("end", p)

    def add_folder(self):
        folder = filedialog.askdirectory(title="选择文件夹")
        if folder:
            self.listbox.insert("end", folder)

    def remove_selected(self):
        sel = list(self.listbox.curselection())
        for i in reversed(sel):
            self.listbox.delete(i)

    def clear_list(self):
        self.listbox.delete(0,"end")

    def log_append(self, text):
        self.log.configure(state="normal")
        self.log.insert("end", text+"\n")
        self.log.see("end")
        self.log.configure(state="disabled")

    def open_out_dir(self):
        try:
            first = self.listbox.get(0)
            p = Path(first)
            folder = str(p.parent if p.is_file() else p)
        except:
            folder = os.getcwd()
        if sys.platform.startswith("win"):
            os.startfile(folder)
        else:
            messagebox.showinfo("提示", f"请手动打开:{folder}")

    def start_conversion(self):
        items = list(self.listbox.get(0,"end"))
        if not items:
            messagebox.showwarning("没有文件","请添加要转换的文件或文件夹")
            return

        enc_display = self.encoding_combo.get()
        target_enc = self._enc_map.get(enc_display, DEFAULT_TARGET_ENCODING)
        overwrite = self.overwrite_var.get()

        self.log_append(f"开始处理 {len(items)} 条路径,目标编码: {target_enc}。 overwrite={overwrite}")
        self.progress["value"] = 0
        self.root.update_idletasks()

        def cb(fpath, idx, total):
            self.log_append(f"[{idx}/{total}] 处理: {fpath}")
            try:
                self.progress["value"] = int((idx-1)/total*100)
            except:
                pass
            self.root.update_idletasks()

        results = process_paths(items, target_encoding=target_enc, recursive=True, overwrite=overwrite, callback=cb)
        self.progress["value"] = 100
        succ = sum(1 for r in results if r[1])
        fail = len(results)-succ
        for _, ok, msg, out in results:
            self.log_append(msg)
        self.log_append(f"完成:成功 {succ},失败 {fail}。")
        messagebox.showinfo("完成", f"转换完成:成功 {succ},失败 {fail}")

def main():
    # 使用 TkinterDnD.Tk() 替代 tk.Tk() 支持拖拽
    root = TkinterDnD.Tk()
    app = App(root)

    # 如果程序启动时带了文件参数,自动添加到列表
    for p in sys.argv[1:]:
        app.listbox.insert("end", p)

    root.mainloop()

if __name__ == "__main__":
    main()