50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import os
|
|
|
|
print("Scanning pages/mall/admin for non-UTF8 files...")
|
|
root_dir = r'pages/mall/admin'
|
|
|
|
results = []
|
|
for root, dirs, files in os.walk(root_dir):
|
|
for file in files:
|
|
path = os.path.join(root, file)
|
|
try:
|
|
with open(path, 'rb') as f:
|
|
data = f.read()
|
|
if not data: continue
|
|
|
|
# Identify UTF-8-BOM specifically
|
|
if data.startswith(b'\xef\xbb\xbf'):
|
|
# Technically UTF-8 but maybe they want it
|
|
# I'll check if it's valid beyond BOM
|
|
try:
|
|
data.decode('utf-8')
|
|
# Valid UTF-8 with BOM
|
|
continue # Skip for now unless we find something non-UTF8
|
|
except UnicodeDecodeError:
|
|
results.append((path, "Partial UTF-8 / Mixed"))
|
|
continue
|
|
|
|
try:
|
|
data.decode('utf-8')
|
|
except UnicodeDecodeError:
|
|
# Non-UTF8
|
|
# Try GBK
|
|
try:
|
|
data.decode('gbk')
|
|
results.append((path, "GBK"))
|
|
except UnicodeDecodeError:
|
|
results.append((path, "Other/Binary"))
|
|
except Exception as e:
|
|
# print(f"Error {path}: {e}")
|
|
pass
|
|
|
|
if not results:
|
|
print("All files in pages/mall/admin appear to be valid UTF-8.")
|
|
else:
|
|
print(f"Detected Encoding | File Path")
|
|
print("-" * 100)
|
|
for path, enc in results:
|
|
print(f"{enc:<25} | {os.path.abspath(path)}")
|
|
|
|
print("Scan finished.")
|