medical-mall/final_scan.py

import os

root_dir = r'd:\骅锋\mall'

non_utf8_reports = []
utf8_with_bom_reports = []

extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh')

for root, dirs, files in os.walk(root_dir):
    if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']):
        continue
    for file in files:
        if file.lower().endswith(extensions):
            path = os.path.join(root, file)
            try:
                with open(path, 'rb') as f:
                    content = f.read()

                # Check for BOM
                if content.startswith(b'\xef\xbb\xbf'):
                    utf8_with_bom_reports.append(path)
                    # We still want to see if it's otherwise valid UTF-8
                    try:
                        content.decode('utf-8')
                        continue # It's valid UTF-8 with BOM
                    except UnicodeDecodeError:
                        pass # It's non-UTF8 (even if it has a fake BOM)

                # Try UTF-8
                try:
                    content.decode('utf-8')
                    # If success and not BOM, it's pure UTF-8
                except UnicodeDecodeError:
                    # Non-UTF8!
                    # Try GBK
                    try:
                        content.decode('gbk')
                        non_utf8_reports.append((path, "GBK"))
                    except:
                        non_utf8_reports.append((path, "Unknown/Binary"))
            except Exception as e:
                # print(f"Error {path}: {e}")
                pass

if non_utf8_reports:
    print("NON-UTF8 FILES FOUND:")
    for path, enc in non_utf8_reports:
        print(f"{enc: <20} | {path}")
else:
    print("No strictly non-UTF-8 files found.")

if utf8_with_bom_reports:
    print("\nUTF-8 WITH BOM FILES FOUND (These are technically valid UTF-8 but have BOM):")
    for path in utf8_with_bom_reports:
        print(f"{'UTF-8-BOM': <20} | {path}")
else:
    print("\nNo UTF-8 with BOM files found.")

print(f"\nScan finished.")