Files
medical-mall/find_non_utf8.py
2026-02-25 11:39:54 +08:00

56 lines
1.9 KiB
Python

import os
extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh')
root_dir = r'd:\骅锋\mall'
non_utf8_files = []
for root, dirs, files in os.walk(root_dir):
# Skip unpackage and node_modules for performance
if 'unpackage' in root or 'node_modules' in root or '.git' in root:
continue
for file in files:
if file.lower().endswith(extensions):
file_path = os.path.join(root, file)
try:
with open(file_path, 'rb') as f:
content = f.read()
# Try to decode as UTF-8
try:
content.decode('utf-8')
# If it succeeds, it IS UTF-8
continue
except UnicodeDecodeError:
# Not UTF-8
pass
# Try to detect encoding
encoding = "Unknown/Other"
try:
content.decode('gbk')
encoding = "GBK/GB2312"
except UnicodeDecodeError:
try:
content.decode('utf-16')
encoding = "UTF-16"
except UnicodeDecodeError:
try:
content.decode('latin-1')
encoding = "Latin-1/Windows-1252"
except UnicodeDecodeError:
pass
non_utf8_files.append((file_path, encoding))
except Exception as e:
# Permission denied or other error
continue
print(f"{'Detected Encoding':<25} | {'File Path'}")
print("-" * 100)
for file_path, encoding in non_utf8_files:
print(f"{encoding:<25} | {file_path}")
print(f"\nFound {len(non_utf8_files)} non-UTF-8 files.")