Files
medical-mall/docs/sql/fix_garbled_v2.py

168 lines
7.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
修复 complete_mall_database.sql 中的 UTF-8 替换字符乱码 v2
策略:先把 {{GARBLED}}? 统一为 {{GARBLED}},再批量替换
"""
import os
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
SRC = os.path.join(ROOT, "医疗-consumer", "mall_sql", "schemas", "complete_mall_database.sql")
DST = os.path.join(ROOT, "医疗-consumer", "mall_sql", "schemas", "complete_mall_database_fixed.sql")
with open(SRC, "rb") as f:
raw = f.read()
marker = b"{{GARBLED}}"
raw = raw.replace(b"\xef\xbf\xbd", marker)
text = raw.decode("utf-8")
# 第一步:把 {{GARBLED}}? 统一为 {{GARBLED}}
text = text.replace("{{GARBLED}}?", "{{GARBLED}}")
# 第二步:批量替换(按从长到短的顺序,避免子串误替换)
replacements = [
# 长词优先
("为需要的表创{{GARBLED}}updated_at 触发{{GARBLED}}", "为需要的表创建 updated_at 触发器"),
("已创建索{{GARBLED}} 30+ 个索{{GARBLED}}", "已创建索引 30+ 个索引"),
("已创建函{{GARBLED}} 10+ 个函{{GARBLED}}", "已创建函数 10+ 个函数"),
("已创建视{{GARBLED}} 3 个视{{GARBLED}}", "已创建视图 3 个视图"),
("包含: 表结构、索引、触发器、RLS策略、视图、函{{GARBLED}}", "包含: 表结构、索引、触发器、RLS策略、视图、函数"),
# COMMENT / 表名
("商城用户扩展信息{{GARBLED}}", "商城用户扩展信息表"),
("用户地址{{GARBLED}}", "用户地址表"),
("商品分类{{GARBLED}}", "商品分类表"),
("品牌{{GARBLED}}", "品牌表"),
("商品{{GARBLED}}", "商品表"),
("商品SKU{{GARBLED}}", "商品SKU表"),
("商品规格{{GARBLED}}", "商品规格表"),
("店铺信息{{GARBLED}}", "店铺信息表"),
("订单{{GARBLED}}", "订单表"),
("订单商品{{GARBLED}}", "订单商品表"),
("配送员信息{{GARBLED}}", "配送员信息表"),
("商品评价{{GARBLED}}", "商品评价表"),
("用户收藏{{GARBLED}}", "用户收藏表"),
("用户浏览历史{{GARBLED}}", "用户浏览历史表"),
("搜索记录{{GARBLED}}", "搜索记录表"),
("系统配置{{GARBLED}}", "系统配置表"),
("地区{{GARBLED}}", "地区表"),
# 模块/标题
("数据库设{{GARBLED}}", "数据库设计"),
("复用{{GARBLED}} ak_users", "复用主表 ak_users"),
("用户扩展{{GARBLED}}", "用户扩展表"),
("商品管理{{GARBLED}}", "商品管理模块"),
("店铺管理{{GARBLED}}", "店铺管理模块"),
("订单管理{{GARBLED}}", "订单管理模块"),
("营销管理{{GARBLED}}", "营销管理模块"),
("评价管理{{GARBLED}}", "评价管理模块"),
("用户行为{{GARBLED}}", "用户行为模块"),
# 状态/属性
("基础属{{GARBLED}}", "基础属性"),
("{{GARBLED}}", "状态"),
("认证状{{GARBLED}}", "认证状态"),
("状态信{{GARBLED}}", "状态信息"),
("未认{{GARBLED}}", "未认证"),
("已认{{GARBLED}}", "已认证"),
# 订单状态
("待付{{GARBLED}}", "待付款"),
("待发{{GARBLED}}", "待发货"),
("待收{{GARBLED}}", "待收货"),
("已完{{GARBLED}}", "已完成"),
("已取{{GARBLED}}", "已取消"),
("已退{{GARBLED}}", "已退款"),
("未付{{GARBLED}}", "未付款"),
("已付{{GARBLED}}", "已付款"),
("部分退{{GARBLED}}", "部分退款"),
("全额退{{GARBLED}}", "全额退款"),
("未发{{GARBLED}}", "未发货"),
("已发{{GARBLED}}", "已发货"),
("运输{{GARBLED}}", "运输中"),
# 配送状态
("待接{{GARBLED}}", "待接单"),
("已接{{GARBLED}}", "已接单"),
("取货{{GARBLED}}", "取货中"),
("配送失{{GARBLED}}", "配送失败"),
# 其他业务词
("总金{{GARBLED}}", "总金额"),
("优惠{{GARBLED}}", "优惠值"),
("最低订单金{{GARBLED}}", "最低订单金额"),
("最大优惠金{{GARBLED}}", "最大优惠金额"),
("总发放数{{GARBLED}}", "总发放数量"),
("每用户限领数{{GARBLED}}", "每用户限领数量"),
("使用次数限{{GARBLED}}", "使用次数限制"),
("已结{{GARBLED}}", "已结束"),
("未使{{GARBLED}}", "未使用"),
("已使{{GARBLED}}", "已使用"),
("已过{{GARBLED}}", "已过期"),
("已删{{GARBLED}}", "已删除"),
("已隐{{GARBLED}}", "已隐藏"),
# 配送/商品
("电动{{GARBLED}}", "电动车"),
("摩托{{GARBLED}}", "摩托车"),
("配送距{{GARBLED}}", "配送距离"),
("预计配送时{{GARBLED}}", "预计配送时间"),
("取货{{GARBLED}}", "取货码"),
("浏览时长({{GARBLED}}", "浏览时长(秒)"),
# 通用词
("营业执照{{GARBLED}}", "营业执照号"),
("规格值数{{GARBLED}}", "规格值数组"),
("NULL表示平台{{GARBLED}}", "NULL表示平台券"),
("满减{{GARBLED}}", "满减券"),
("折扣{{GARBLED}}", "折扣券"),
("百分{{GARBLED}}", "百分比"),
("自己的数{{GARBLED}}", "自己的数据"),
("初始化数{{GARBLED}}", "初始化数据"),
("优惠券设{{GARBLED}}", "优惠券设置"),
# 索引/触发器/函数/视图
("用户扩展表索{{GARBLED}}", "用户扩展表索引"),
("地址表索{{GARBLED}}", "地址表索引"),
("商品表索{{GARBLED}}", "商品表索引"),
("SKU表索{{GARBLED}}", "SKU表索引"),
("订单表索{{GARBLED}}", "订单表索引"),
("订单商品表索{{GARBLED}}", "订单商品表索引"),
("收藏表索{{GARBLED}}", "收藏表索引"),
("触发器函{{GARBLED}}", "触发器函数"),
("自动更新 updated_at 字段的函{{GARBLED}}", "自动更新 updated_at 字段的函数"),
("商品库存更新触发{{GARBLED}}", "商品库存更新触发器"),
("更新商品总库{{GARBLED}}", "更新商品总库存"),
("订单状态变更时的处{{GARBLED}}", "订单状态变更时的处理"),
("更新商品销{{GARBLED}}", "更新商品销量"),
("生成优惠券码的函{{GARBLED}}", "生成优惠券码的函数"),
("计算购物车总金{{GARBLED}}", "计算购物车总金额"),
("获取商品总库{{GARBLED}}", "获取商品总库存"),
# 扩/函单独处理(避免被前面的长词覆盖后残留)
("基础配置和扩{{GARBLED}}", "基础配置和扩展"),
("启用必要的扩{{GARBLED}}", "启用必要的扩展"),
]
# 按长度降序排序,避免短词先替换导致长词无法匹配
replacements.sort(key=lambda x: len(x[0]), reverse=True)
for old, new in replacements:
text = text.replace(old, new)
# 第三步:清理残留的 ?(在中文后面多余的 ?
# 对于文件末尾 RAISE NOTICE 等行中可能残留的 {{GARBLED}} 做兜底
if "{{GARBLED}}" in text:
for i, line in enumerate(text.splitlines(), 1):
if "{{GARBLED}}" in line:
print(f" 残留 Line {i}: {line.strip()[:120]}")
with open(DST, "w", encoding="utf-8") as f:
f.write(text)
remaining = text.count("{{GARBLED}}")
print(f"修复完成,残留乱码标记: {remaining}")