import os root_dir = '.' skipped_dirs = ('.git', 'node_modules', 'unpackage', '.hbuilderx', '.vscode') binary_exts = ('.png', '.jpg', '.jpeg', '.gif', '.ico', '.ttf', '.woff', '.woff2', '.pdf', '.zip', '.tar', '.gz', '.7z') print(f"Scanning {os.path.abspath(root_dir)} recursively...") print("Looking for non-UTF8 text files...") non_utf8 = [] with_bom = [] for root, dirs, files in os.walk(root_dir): if any(skip in root for skip in skipped_dirs): continue for file in files: if any(file.lower().endswith(ext) for ext in binary_exts): continue path = os.path.join(root, file) try: with open(path, 'rb') as f: data = f.read() if not data: continue # Check UTF-8 with BOM if data.startswith(b'\xef\xbb\xbf'): with_bom.append(path) continue # Try UTF-8 try: data.decode('utf-8') continue # Valid plain UTF-8 except UnicodeDecodeError: # Try GBK try: data.decode('gbk') non_utf8.append((path, "GBK")) except UnicodeDecodeError: # Try UTF-16 try: data.decode('utf-16') non_utf8.append((path, "UTF-16")) except UnicodeDecodeError: non_utf8.append((path, "Other")) except: pass if non_utf8: print(f"\nFound {len(non_utf8)} non-UTF-8 files:") print("-" * 100) for path, enc in non_utf8: print(f"{enc: <10} | {path}") else: print("\nNo non-UTF-8 files found.") if with_bom: print(f"\nFound {len(with_bom)} UTF-8 files with BOM:") print("-" * 100) for path in with_bom: print(f"UTF8-BOM | {path}") print("\nScan complete.")