68 lines
2.0 KiB
Python
68 lines
2.0 KiB
Python
import os
|
|
|
|
def is_binary(data):
|
|
return b"\0" in data
|
|
|
|
def detect_file_info(file_path):
|
|
try:
|
|
with open(file_path, 'rb') as f:
|
|
data = f.read(1024)
|
|
if is_binary(data):
|
|
return "Binary", False
|
|
|
|
with open(file_path, 'rb') as f:
|
|
data = f.read()
|
|
|
|
if data.startswith(b'\xef\xbb\xbf'):
|
|
return "UTF-8 with BOM", False
|
|
|
|
try:
|
|
data.decode('utf-8')
|
|
return "UTF-8", False
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
try:
|
|
data.decode('gbk')
|
|
return "GBK", True
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
try:
|
|
data.decode('utf-16')
|
|
return "UTF-16", True
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
return "Not UTF-8 (Unknown Encoding)", True
|
|
except Exception as e:
|
|
return f"Error: {e}", False
|
|
|
|
# The user mentioned specific extensions: (uvue, uts, vue, json, js, ts, scss, md, txt, ps1, bat, sh)
|
|
target_extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh')
|
|
root_dir = r'd:\骅锋\mall'
|
|
|
|
file_count = 0
|
|
results = []
|
|
for root, dirs, files in os.walk(root_dir):
|
|
if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']):
|
|
continue
|
|
for file in files:
|
|
if file.lower().endswith(target_extensions):
|
|
file_count += 1
|
|
path = os.path.join(root, file)
|
|
enc, is_non_utf8 = detect_file_info(path)
|
|
if is_non_utf8:
|
|
results.append((path, enc))
|
|
|
|
print(f"Scanned {file_count} files.")
|
|
if not results:
|
|
print("No non-UTF-8 files (within the target extensions) found in the project.")
|
|
else:
|
|
print(f"{'Detected Encoding':<25} | {'File Path'}")
|
|
print("-" * 100)
|
|
for path, enc in results:
|
|
print(f"{enc:<25} | {path}")
|
|
|
|
print(f"\nFinal count of non-UTF8 text files: {len(results)}")
|