mirror of
https://github.com/yeasy/docker_practice.git
synced 2026-03-11 04:14:38 +00:00
- Enforce Level 1-3 structural numbering based on SUMMARY.md hierarchy - Remove structural numbering from Level 4+ headings - Eliminate single child headings by converting to bold text - Auto-inject summary text for headings with multiple children missing intro text - Exclude Appendix chapters from structural numbering - Avoid modifying code block contents - Add script to detect non-standard English usage in headers
251 lines
9.0 KiB
Go
251 lines
9.0 KiB
Go
import os
|
||
import re
|
||
|
||
ENG_ALLOWLIST = {
|
||
'DOCKER', 'KUBERNETES', 'XML', 'LLM', 'RAG', 'LINUX', 'UBUNTU', 'MAC', 'MACOS',
|
||
'WINDOWS', 'API', 'JSON', 'YAML', 'REGISTRY', 'HUB', 'REPOSITORY', 'TAG', 'IMAGE',
|
||
'CONTAINER', 'DEBIAN', 'FEDORA', 'CENTOS', 'RASPBERRY', 'PI', 'PULL', 'LIST',
|
||
'RM', 'COMMIT', 'BUILD', 'RUN', 'DAEMON', 'STOP', 'NEXUS', 'VOLUMES', 'TMPFS',
|
||
'DNS', 'PORT', 'BUILDX', 'BUILDKIT', 'COMPOSE', 'DJANGO', 'RAILS', 'WORDPRESS',
|
||
'LNMP', 'NAMESPACE', 'CGROUPS', 'UFS', 'PODMAN', 'PROMETHEUS', 'ELK', 'BUSYBOX',
|
||
'ALPINE', 'DEVOPS', 'ACTIONS', 'DRONE', 'IDE', 'VS', 'CODE', 'NGINX', 'PHP',
|
||
'NODE.JS', 'MYSQL', 'MONGODB', 'REDIS', 'MINIO', 'DOCKERD', 'TENCENTCLOUD',
|
||
'ALICLOUD', 'AWS', 'COREOS', 'KUBEADM', 'CONTAINERD', 'DESKTOP', 'KIND', 'K3S',
|
||
'SYSTEMD', 'DASHBOARD', 'KUBECTL', 'ETCD', 'ETCDCTL', 'VM', 'VAGRANT', 'LXC',
|
||
'GITHUB', 'GOOGLE', 'CLOUD', 'NPM', 'MAVEN', 'ACR', 'TCR', 'ECR', 'HARBOR',
|
||
'CNCF', 'SIGSTORE', 'NOTATION', 'SCOUT', 'TRIVY', 'CMD', 'ENTRYPOINT', 'ENV', 'ARG',
|
||
'VOLUME', 'EXPOSE', 'WORKDIR', 'USER', 'HEALTHCHECK', 'ONBUILD', 'LABEL', 'SHELL',
|
||
'COPY', 'ADD', 'DOCKERFILE', 'CI', 'CD', 'OS'
|
||
}
|
||
|
||
def parse_summary():
|
||
if not os.path.exists('SUMMARY.md'):
|
||
return {}
|
||
with open('SUMMARY.md', 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
file_to_context = {}
|
||
chapter_idx = 0
|
||
section_idx = 0
|
||
is_appendix = False
|
||
|
||
for line in content.split('\n'):
|
||
if '## 附录' in line or '附录' in line and line.startswith('## '):
|
||
is_appendix = True
|
||
|
||
m_chap = re.match(r'^\* \[(第[一二三四五六七八九十百]+章[^\]]*)\]\((.*?)\)', line)
|
||
if m_chap:
|
||
title = m_chap.group(1).replace(' ', ':', 1)
|
||
if ':' not in title:
|
||
title = title.replace('章', '章:')
|
||
filepath = m_chap.group(2)
|
||
chapter_idx += 1
|
||
section_idx = 0
|
||
file_to_context[filepath] = {
|
||
'level': 1,
|
||
'title': title,
|
||
'chap_num': chapter_idx,
|
||
'is_app': False
|
||
}
|
||
continue
|
||
|
||
m_sec = re.match(r'^\s+\* \[(.*?)\]\((.*?)\)', line)
|
||
if m_sec:
|
||
title = m_sec.group(1)
|
||
filepath = m_sec.group(2)
|
||
section_idx += 1
|
||
|
||
if is_appendix or 'appendix' in filepath:
|
||
file_to_context[filepath] = {
|
||
'level': 2,
|
||
'title': title,
|
||
'is_app': True
|
||
}
|
||
else:
|
||
file_to_context[filepath] = {
|
||
'level': 2,
|
||
'title': title,
|
||
'chap_num': chapter_idx,
|
||
'sec_num': section_idx,
|
||
'is_app': False
|
||
}
|
||
|
||
m_app = re.match(r'^\* \[(附录[^\]]*)\]\((.*?)\)', line)
|
||
if m_app:
|
||
title = m_app.group(1)
|
||
filepath = m_app.group(2)
|
||
file_to_context[filepath] = {
|
||
'level': 1,
|
||
'title': title,
|
||
'is_app': True
|
||
}
|
||
continue
|
||
|
||
return file_to_context
|
||
|
||
def check_english(title):
|
||
words = re.findall(r'[a-zA-Z\.]+', title)
|
||
for w in words:
|
||
if w.upper() not in ENG_ALLOWLIST and w.upper() != 'DOCKER':
|
||
print(f" [!] Notice: English word '{w}' in title: {title}")
|
||
|
||
def process_file(filepath, context):
|
||
try:
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
lines = f.readlines()
|
||
except Exception as e:
|
||
print(f"Error reading {filepath}: {e}")
|
||
return False
|
||
|
||
headings = []
|
||
in_code_block = False
|
||
for i, line in enumerate(lines):
|
||
line_stripped = line.strip()
|
||
if line_stripped.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
|
||
if not in_code_block:
|
||
match = re.match(r'^(#{1,6})\s+(.*)', line)
|
||
if match:
|
||
level = len(match.group(1))
|
||
title = match.group(2).strip()
|
||
headings.append({'level': level, 'title': title, 'line_idx': i, 'children': []})
|
||
|
||
for i, h in enumerate(headings):
|
||
level = h['level']
|
||
for j in range(i+1, len(headings)):
|
||
if headings[j]['level'] <= level:
|
||
break
|
||
if headings[j]['level'] == level + 1:
|
||
h['children'].append(j)
|
||
|
||
actions = {}
|
||
|
||
def has_text_between(start_idx, end_idx):
|
||
for text_ln in range(start_idx + 1, end_idx):
|
||
content = lines[text_ln].strip()
|
||
if content and not content.startswith('#'):
|
||
return True
|
||
return False
|
||
|
||
is_app = context.get('is_app', False)
|
||
chap_num = context.get('chap_num', 0)
|
||
sec_num = context.get('sec_num', 0)
|
||
|
||
h2_counter = sec_num if sec_num > 0 else 0
|
||
h3_counter = 0
|
||
|
||
for i, h in enumerate(headings):
|
||
level = h['level']
|
||
title = h['title']
|
||
ln = h['line_idx']
|
||
|
||
original_title = title
|
||
check_english(title)
|
||
|
||
if level == 1:
|
||
if not is_app and chap_num > 0:
|
||
pass
|
||
elif is_app:
|
||
title = re.sub(r'^[\d\.]+\s*', '', title)
|
||
m = re.match(r'^(附录[一二三四五六七八九十]*)\s*(.*)', title)
|
||
if m:
|
||
p1 = m.group(1).strip()
|
||
p2 = m.group(2).strip()
|
||
if p2.startswith(':') or p2.startswith(':'):
|
||
p2 = p2[1:].strip()
|
||
title = f"{p1}:{p2}" if p2 else p1
|
||
|
||
elif level == 2:
|
||
if not is_app:
|
||
clean_title = re.sub(r'^[\d\.]+\s*', '', title)
|
||
title = f"{chap_num}.{h2_counter} {clean_title}" if h2_counter > 0 else clean_title
|
||
else:
|
||
title = re.sub(r'^[\d\.]+\s*', '', title)
|
||
h3_counter = 0
|
||
|
||
elif level == 3:
|
||
h3_counter += 1
|
||
if not is_app:
|
||
clean_title = re.sub(r'^[\d\.]+\s*', '', title)
|
||
if h2_counter > 0:
|
||
title = f"{chap_num}.{h2_counter}.{h3_counter} {clean_title}"
|
||
else:
|
||
title = re.sub(r'^[\d\.]+\s*', '', title)
|
||
|
||
elif level >= 4:
|
||
m = re.match(r'^([\d\.]+)\s+(.*)', title)
|
||
if m:
|
||
nums = m.group(1)
|
||
rest = m.group(2)
|
||
if '.' in nums.strip('.'):
|
||
title = rest
|
||
|
||
if title != original_title:
|
||
actions[ln] = f"{'#' * level} {title}\n"
|
||
h['title'] = title
|
||
|
||
children_indices = h['children']
|
||
if len(children_indices) == 1:
|
||
child_idx = children_indices[0]
|
||
child_h = headings[child_idx]
|
||
child_ln = child_h['line_idx']
|
||
child_title = child_h['title']
|
||
|
||
if child_ln in actions:
|
||
modified_line = actions[child_ln]
|
||
m_child = re.match(r'^(#{1,6})\s+(.*)', modified_line)
|
||
if m_child:
|
||
child_title = m_child.group(2).strip()
|
||
|
||
actions[child_ln] = f"**{child_title}**\n\n"
|
||
|
||
elif len(children_indices) >= 2:
|
||
child_idx = children_indices[0]
|
||
child_ln = headings[child_idx]['line_idx']
|
||
if not has_text_between(ln, child_ln):
|
||
if level < 4:
|
||
if ln in actions:
|
||
actions[ln] = actions[ln].rstrip() + "\n\n涵盖了如下重点内容:\n\n"
|
||
else:
|
||
actions[ln] = lines[ln].rstrip() + "\n\n涵盖了如下重点内容:\n\n"
|
||
|
||
if not actions:
|
||
return False
|
||
|
||
new_lines = []
|
||
for i, line in enumerate(lines):
|
||
if i in actions:
|
||
if actions[i].startswith('**'):
|
||
pass
|
||
new_lines.append(actions[i])
|
||
else:
|
||
new_lines.append(line)
|
||
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
f.writelines(new_lines)
|
||
return True
|
||
|
||
if __name__ == "__main__":
|
||
file_contexts = parse_summary()
|
||
modified = 0
|
||
for filepath, context in file_contexts.items():
|
||
if os.path.exists(filepath):
|
||
if process_file(filepath, context):
|
||
modified += 1
|
||
print(f" -> MODIFIED: {filepath}")
|
||
|
||
for root, dirs, files in os.walk('.'):
|
||
if '.git' in root or 'node_modules' in root or '.gemini' in root:
|
||
continue
|
||
for file in files:
|
||
if file.endswith('.md') and file not in ['SUMMARY.md', 'README.md', 'CONTRIBUTING.md', 'CHANGELOG.md']:
|
||
filepath = os.path.join(root, file)
|
||
clean_path = filepath.replace('./', '')
|
||
if clean_path not in file_contexts:
|
||
if process_file(clean_path, {'is_app': True}):
|
||
modified += 1
|
||
print(f" -> MODIFIED: {clean_path}")
|
||
|
||
print(f"\nTotal Modified {modified} files")
|