Files
medical-mall/final_scan.py
2026-02-25 11:39:54 +08:00

61 lines
2.1 KiB
Python

import os
root_dir = r'd:\骅锋\mall'
non_utf8_reports = []
utf8_with_bom_reports = []
extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh')
for root, dirs, files in os.walk(root_dir):
if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']):
continue
for file in files:
if file.lower().endswith(extensions):
path = os.path.join(root, file)
try:
with open(path, 'rb') as f:
content = f.read()
# Check for BOM
if content.startswith(b'\xef\xbb\xbf'):
utf8_with_bom_reports.append(path)
# We still want to see if it's otherwise valid UTF-8
try:
content.decode('utf-8')
continue # It's valid UTF-8 with BOM
except UnicodeDecodeError:
pass # It's non-UTF8 (even if it has a fake BOM)
# Try UTF-8
try:
content.decode('utf-8')
# If success and not BOM, it's pure UTF-8
except UnicodeDecodeError:
# Non-UTF8!
# Try GBK
try:
content.decode('gbk')
non_utf8_reports.append((path, "GBK"))
except:
non_utf8_reports.append((path, "Unknown/Binary"))
except Exception as e:
# print(f"Error {path}: {e}")
pass
if non_utf8_reports:
print("NON-UTF8 FILES FOUND:")
for path, enc in non_utf8_reports:
print(f"{enc: <20} | {path}")
else:
print("No strictly non-UTF-8 files found.")
if utf8_with_bom_reports:
print("\nUTF-8 WITH BOM FILES FOUND (These are technically valid UTF-8 but have BOM):")
for path in utf8_with_bom_reports:
print(f"{'UTF-8-BOM': <20} | {path}")
else:
print("\nNo UTF-8 with BOM files found.")
print(f"\nScan finished.")