import os root_dir = r'd:\骅锋\mall' non_utf8_reports = [] utf8_with_bom_reports = [] extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh') for root, dirs, files in os.walk(root_dir): if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']): continue for file in files: if file.lower().endswith(extensions): path = os.path.join(root, file) try: with open(path, 'rb') as f: content = f.read() # Check for BOM if content.startswith(b'\xef\xbb\xbf'): utf8_with_bom_reports.append(path) # We still want to see if it's otherwise valid UTF-8 try: content.decode('utf-8') continue # It's valid UTF-8 with BOM except UnicodeDecodeError: pass # It's non-UTF8 (even if it has a fake BOM) # Try UTF-8 try: content.decode('utf-8') # If success and not BOM, it's pure UTF-8 except UnicodeDecodeError: # Non-UTF8! # Try GBK try: content.decode('gbk') non_utf8_reports.append((path, "GBK")) except: non_utf8_reports.append((path, "Unknown/Binary")) except Exception as e: # print(f"Error {path}: {e}") pass if non_utf8_reports: print("NON-UTF8 FILES FOUND:") for path, enc in non_utf8_reports: print(f"{enc: <20} | {path}") else: print("No strictly non-UTF-8 files found.") if utf8_with_bom_reports: print("\nUTF-8 WITH BOM FILES FOUND (These are technically valid UTF-8 but have BOM):") for path in utf8_with_bom_reports: print(f"{'UTF-8-BOM': <20} | {path}") else: print("\nNo UTF-8 with BOM files found.") print(f"\nScan finished.")