import os def is_binary(data): return b"\0" in data def detect_file_info(file_path): try: with open(file_path, 'rb') as f: data = f.read(1024) if is_binary(data): return "Binary", False with open(file_path, 'rb') as f: data = f.read() if data.startswith(b'\xef\xbb\xbf'): return "UTF-8 with BOM", False try: data.decode('utf-8') return "UTF-8", False except UnicodeDecodeError: pass try: data.decode('gbk') return "GBK", True except UnicodeDecodeError: pass try: data.decode('utf-16') return "UTF-16", True except UnicodeDecodeError: pass return "Not UTF-8 (Unknown Encoding)", True except Exception as e: return f"Error: {e}", False # The user mentioned specific extensions: (uvue, uts, vue, json, js, ts, scss, md, txt, ps1, bat, sh) target_extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh') root_dir = r'd:\骅锋\mall' file_count = 0 results = [] for root, dirs, files in os.walk(root_dir): if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']): continue for file in files: if file.lower().endswith(target_extensions): file_count += 1 path = os.path.join(root, file) enc, is_non_utf8 = detect_file_info(path) if is_non_utf8: results.append((path, enc)) print(f"Scanned {file_count} files.") if not results: print("No non-UTF-8 files (within the target extensions) found in the project.") else: print(f"{'Detected Encoding':<25} | {'File Path'}") print("-" * 100) for path, enc in results: print(f"{enc:<25} | {path}") print(f"\nFinal count of non-UTF8 text files: {len(results)}")