63 lines
2.0 KiB
Python
63 lines
2.0 KiB
Python
import os
|
|
|
|
print("Starting scan...")
|
|
# Use current directory to avoid path encoding issues
|
|
root_dir = '.'
|
|
|
|
non_utf8_reports = []
|
|
utf8_with_bom_reports = []
|
|
|
|
extensions = ('.uvue', '.uts', '.vue', '.json', '.js', '.ts', '.scss', '.md', '.txt', '.ps1', '.bat', '.sh')
|
|
|
|
for root, dirs, files in os.walk(root_dir):
|
|
if any(skip in root for skip in ['.git', 'node_modules', 'unpackage']):
|
|
continue
|
|
for file in files:
|
|
if file.lower().endswith(extensions):
|
|
path = os.path.join(root, file)
|
|
# Use abspath for clarity in report
|
|
abs_path = os.path.abspath(path)
|
|
try:
|
|
with open(path, 'rb') as f:
|
|
content = f.read()
|
|
|
|
# Check for BOM
|
|
if content.startswith(b'\xef\xbb\xbf'):
|
|
utf8_with_bom_reports.append(abs_path)
|
|
try:
|
|
content.decode('utf-8')
|
|
continue
|
|
except UnicodeDecodeError:
|
|
pass
|
|
|
|
# Try UTF-8
|
|
try:
|
|
content.decode('utf-8')
|
|
except UnicodeDecodeError:
|
|
# Non-UTF8
|
|
try:
|
|
content.decode('gbk')
|
|
non_utf8_reports.append((abs_path, "GBK"))
|
|
except:
|
|
non_utf8_reports.append((abs_path, "Other/Binary"))
|
|
except Exception as e:
|
|
pass
|
|
|
|
if non_utf8_reports:
|
|
print("Detected Encoding | File Path")
|
|
print("-" * 100)
|
|
for path, enc in non_utf8_reports:
|
|
print(f"{enc:<25} | {path}")
|
|
else:
|
|
print("No strictly non-UTF-8 files found.")
|
|
|
|
if utf8_with_bom_reports:
|
|
print("\nUTF-8 WITH BOM FILES FOUND (Technically valid but have BOM):")
|
|
print("-" * 100)
|
|
for path in utf8_with_bom_reports:
|
|
print(f"{'UTF-8-BOM':<25} | {path}")
|
|
else:
|
|
print("\nNo UTF-8 with BOM files found.")
|
|
|
|
print(f"\nScan finished.")
|