mirror of
https://github.com/m5stack/StackChan.git
synced 2026-04-27 19:12:40 +00:00
192 lines
7.1 KiB
Python
192 lines
7.1 KiB
Python
import os
|
|
import re
|
|
import sys
|
|
|
|
# ==========================================
|
|
# Configuration / 配置
|
|
# ==========================================
|
|
|
|
# Directories to exclude from scanning
|
|
# 不需要扫描的目录
|
|
EXCLUDE_DIRS = {
|
|
'.git',
|
|
'.idea',
|
|
'.vscode',
|
|
'__pycache__',
|
|
}
|
|
|
|
def load_gitignore():
|
|
"""Load exclusions from .gitignore / 从 .gitignore 加载排除项"""
|
|
if os.path.exists('.gitignore'):
|
|
print("Loading .gitignore...")
|
|
with open('.gitignore', 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line or line.startswith('#'): continue
|
|
|
|
# Normalize path: remove leading/trailing slashes
|
|
# 简单处理:移除开头结尾的斜杠
|
|
clean_item = line.rstrip('/').lstrip('/')
|
|
|
|
# Skip patterns with wildcards (simple directory names only)
|
|
# 跳过带通配符的复杂规则,仅添加确定的目录名
|
|
if '*' not in clean_item:
|
|
EXCLUDE_DIRS.add(clean_item)
|
|
|
|
# Load gitignore patterns
|
|
load_gitignore()
|
|
|
|
# File extensions to exclude (binary files, images, etc.)
|
|
# 不需要扫描的文件类型
|
|
EXCLUDE_EXTENSIONS = {
|
|
'.o', '.a', '.elf', '.bin', '.map', '.hex',
|
|
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico',
|
|
'.wav', '.mp3', '.ogg',
|
|
'.pdf', '.zip', '.tar', '.gz',
|
|
'.pyc', '.ninja'
|
|
}
|
|
|
|
# Regular expressions for sensitive data
|
|
# 敏感信息的正则表达式匹配规则
|
|
PATTERNS = [
|
|
# 1. IP Addresses (Exclude localhost and common local IPs)
|
|
# 匹配可能的硬编码公网 IP (排除 127.0.0.1, 0.0.0.0, 192.168.x.x)
|
|
(r'\b(?!127\.0\.0\.1|0\.0\.0\.0|192\.168\.)\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', "Potential Public IP"),
|
|
|
|
# 2. Key/Secret Assignments
|
|
# 匹配变量赋值: 变量名包含 key/secret/token/password 等
|
|
# regex matches: variable_name = "value" OR variable_name: "value"
|
|
(r'(?i)(api[_-]?key|secret|token|password|passwd|pwd|auth|access[_-]?key|client[_-]?secret|private[_-]?key)\s*[:=]\s*["\']([^"\']+)["\']', "Potential Secret/Key"),
|
|
|
|
# 3. WiFi Credentials
|
|
# 匹配 WiFi SSID 或 Password
|
|
(r'(?i)(ssid|wifi[_-]?pass(word)?)\s*[:=]\s*["\']([^"\']+)["\']', "Potential WiFi Credential"),
|
|
|
|
# 4. URLs with Credentials
|
|
# 匹配 http://user:pass@host 格式
|
|
(r'https?://[^:\s]+:[^@\s]+@[^/\s]+', "URL with Credentials"),
|
|
|
|
# 5. Cryptographic Key Headers (Private/Public)
|
|
# 匹配 RSA/DSA 私钥或公钥头
|
|
(r'-----BEGIN\s+[A-Z\s]+KEY-----', "Cryptographic Key Block"),
|
|
|
|
# 6. AWS Access Key ID (Common Pattern)
|
|
(r'\bAKIA[0-9A-Z]{16}\b', "AWS Access Key ID"),
|
|
|
|
# 7. Generic high-entropy strings (simplified view for Bearer tokens etc)
|
|
# 匹配 "Bearer <token>" 格式
|
|
(r'Bearer\s+[a-zA-Z0-9\-\._~\+/]{20,}', "Potential Bearer Token"),
|
|
]
|
|
|
|
# Allow-list for dummy values (Common placeholders to ignore)
|
|
# 白名单:如果是这些值,则不认为是泄露
|
|
ALLOW_LIST = {
|
|
"",
|
|
"your_ssid", "your_password", "password", "12345678", "00000000",
|
|
"dummy", "example", "changeme", "TODO", "x" * 10
|
|
}
|
|
|
|
# ==========================================
|
|
# Script Logic / 逻辑实现
|
|
# ==========================================
|
|
|
|
def is_text_file(filepath):
|
|
"""Check if file is text by reading first chunk."""
|
|
try:
|
|
with open(filepath, 'rb') as f:
|
|
chunk = f.read(1024)
|
|
if not chunk: return True # Empty file
|
|
if b'\0' in chunk: return False # Contains null bytes -> binary
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
def check_line(line, line_num):
|
|
issues = []
|
|
for pattern, desc in PATTERNS:
|
|
matches = re.finditer(pattern, line)
|
|
for match in matches:
|
|
matched_text = match.group(0)
|
|
|
|
# If pattern has groups (like variable assignment), check the value part
|
|
if len(match.groups()) >= 2 and match.group(2):
|
|
sensitive_val = match.group(2)
|
|
if sensitive_val in ALLOW_LIST:
|
|
continue
|
|
# Simple heuristc: skip if looks like a format string placeholder
|
|
if "%s" in sensitive_val or "%d" in sensitive_val or "{}" in sensitive_val:
|
|
continue
|
|
|
|
# Skip common false positives for IP
|
|
if desc == "Potential Public IP":
|
|
# Skip version numbers that look like IPs (e.g. 1.0.0.0 in cmake)
|
|
if "version" in line.lower():
|
|
continue
|
|
|
|
issues.append({
|
|
'desc': desc,
|
|
'match': matched_text
|
|
})
|
|
return issues
|
|
|
|
def scan_files(root_dir):
|
|
print(f"Scanning directory: {root_dir}")
|
|
print(f"Ignoring directories: {', '.join(EXCLUDE_DIRS)}")
|
|
print("-" * 60)
|
|
|
|
found_issues_count = 0
|
|
|
|
for dirpath, dirnames, filenames in os.walk(root_dir):
|
|
# Filter directories
|
|
dirnames[:] = [d for d in dirnames if d not in EXCLUDE_DIRS]
|
|
|
|
for filename in filenames:
|
|
# Filter extensions
|
|
_, ext = os.path.splitext(filename)
|
|
if ext.lower() in EXCLUDE_EXTENSIONS:
|
|
continue
|
|
|
|
# Skip self
|
|
if filename == os.path.basename(__file__):
|
|
continue
|
|
|
|
filepath = os.path.join(dirpath, filename)
|
|
|
|
if not is_text_file(filepath):
|
|
continue
|
|
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
file_issues = []
|
|
for i, line in enumerate(f, 1):
|
|
line_stripped = line.strip()
|
|
if not line_stripped: continue
|
|
|
|
line_issues = check_line(line_stripped, i)
|
|
if line_issues:
|
|
for issue in line_issues:
|
|
file_issues.append((i, issue['desc'], issue['match'], line_stripped))
|
|
|
|
if file_issues:
|
|
found_issues_count += 1
|
|
print(f"\n[FILE] {os.path.relpath(filepath, root_dir)}")
|
|
for line_num, desc, match, content in file_issues:
|
|
# Truncate content for display
|
|
disp_content = content[:80] + "..." if len(content) > 80 else content
|
|
print(f" Line {line_num}: \033[91m{desc}\033[0m")
|
|
print(f" Match: {match}")
|
|
print(f" Code : {disp_content}")
|
|
|
|
except Exception as e:
|
|
print(f"[WARN] Could not read {filepath}: {e}")
|
|
|
|
print("-" * 60)
|
|
if found_issues_count == 0:
|
|
print("\033[92mNo obvious sensitive hardcoded strings found.\033[0m")
|
|
else:
|
|
print(f"\033[93mScan complete. Found potential issues in {found_issues_count} files.\033[0m")
|
|
print("Please review them manually to ensure no real secrets are leaked.")
|
|
|
|
if __name__ == "__main__":
|
|
scan_files(os.getcwd())
|