import os print("Starting scan...") # Use current directory to avoid path encoding issues root_dir = '.' non_utf8_reports = [] utf8_with_bom_reports = [] extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh') for root, dirs, files in os.walk(root_dir): if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']): continue for file in files: if file.lower().endswith(extensions): path = os.path.join(root, file) # Use abspath for clarity in report abs_path = os.path.abspath(path) try: with open(path, 'rb') as f: content = f.read() # Check for BOM if content.startswith(b'\xef\xbb\xbf'): utf8_with_bom_reports.append(abs_path) try: content.decode('utf-8') continue except UnicodeDecodeError: pass # Try UTF-8 try: content.decode('utf-8') except UnicodeDecodeError: # Non-UTF8 try: content.decode('gbk') non_utf8_reports.append((abs_path, "GBK")) except: non_utf8_reports.append((abs_path, "Other/Binary")) except Exception as e: pass if non_utf8_reports: print("Detected Encoding | File Path") print("-" * 100) for path, enc in non_utf8_reports: print(f"{enc:<25} | {path}") else: print("No strictly non-UTF-8 files found.") if utf8_with_bom_reports: print("\nUTF-8 WITH BOM FILES FOUND (Technically valid but have BOM):") print("-" * 100) for path in utf8_with_bom_reports: print(f"{'UTF-8-BOM':<25} | {path}") else: print("\nNo UTF-8 with BOM files found.") print(f"\nScan finished.")