import os import re ENG_ALLOWLIST = { 'DOCKER', 'KUBERNETES', 'XML', 'LLM', 'RAG', 'LINUX', 'UBUNTU', 'MAC', 'MACOS', 'WINDOWS', 'API', 'JSON', 'YAML', 'REGISTRY', 'HUB', 'REPOSITORY', 'TAG', 'IMAGE', 'CONTAINER', 'DEBIAN', 'FEDORA', 'CENTOS', 'RASPBERRY', 'PI', 'PULL', 'LIST', 'RM', 'COMMIT', 'BUILD', 'RUN', 'DAEMON', 'STOP', 'NEXUS', 'VOLUMES', 'TMPFS', 'DNS', 'PORT', 'BUILDX', 'BUILDKIT', 'COMPOSE', 'DJANGO', 'RAILS', 'WORDPRESS', 'LNMP', 'NAMESPACE', 'CGROUPS', 'UFS', 'PODMAN', 'PROMETHEUS', 'ELK', 'BUSYBOX', 'ALPINE', 'DEVOPS', 'ACTIONS', 'DRONE', 'IDE', 'VS', 'CODE', 'NGINX', 'PHP', 'NODE.JS', 'MYSQL', 'MONGODB', 'REDIS', 'MINIO', 'DOCKERD', 'TENCENTCLOUD', 'ALICLOUD', 'AWS', 'COREOS', 'KUBEADM', 'CONTAINERD', 'DESKTOP', 'KIND', 'K3S', 'SYSTEMD', 'DASHBOARD', 'KUBECTL', 'ETCD', 'ETCDCTL', 'VM', 'VAGRANT', 'LXC', 'GITHUB', 'GOOGLE', 'CLOUD', 'NPM', 'MAVEN', 'ACR', 'TCR', 'ECR', 'HARBOR', 'CNCF', 'SIGSTORE', 'NOTATION', 'SCOUT', 'TRIVY', 'CMD', 'ENTRYPOINT', 'ENV', 'ARG', 'VOLUME', 'EXPOSE', 'WORKDIR', 'USER', 'HEALTHCHECK', 'ONBUILD', 'LABEL', 'SHELL', 'COPY', 'ADD', 'DOCKERFILE', 'CI', 'CD', 'OS' } def parse_summary(): if not os.path.exists('SUMMARY.md'): return {} with open('SUMMARY.md', 'r', encoding='utf-8') as f: content = f.read() file_to_context = {} chapter_idx = 0 section_idx = 0 is_appendix = False for line in content.split('\n'): if '## 附录' in line or '附录' in line and line.startswith('## '): is_appendix = True m_chap = re.match(r'^\* \[(第[一二三四五六七八九十百]+章[^\]]*)\]\((.*?)\)', line) if m_chap: title = m_chap.group(1).replace(' ', ':', 1) if ':' not in title: title = title.replace('章', '章:') filepath = m_chap.group(2) chapter_idx += 1 section_idx = 0 file_to_context[filepath] = { 'level': 1, 'title': title, 'chap_num': chapter_idx, 'is_app': False } continue m_sec = re.match(r'^\s+\* \[(.*?)\]\((.*?)\)', line) if m_sec: title = m_sec.group(1) filepath = m_sec.group(2) section_idx += 1 if is_appendix or 'appendix' in filepath: file_to_context[filepath] = { 'level': 2, 'title': title, 'is_app': True } else: file_to_context[filepath] = { 'level': 2, 'title': title, 'chap_num': chapter_idx, 'sec_num': section_idx, 'is_app': False } m_app = re.match(r'^\* \[(附录[^\]]*)\]\((.*?)\)', line) if m_app: title = m_app.group(1) filepath = m_app.group(2) file_to_context[filepath] = { 'level': 1, 'title': title, 'is_app': True } continue return file_to_context def check_english(title): words = re.findall(r'[a-zA-Z\.]+', title) for w in words: if w.upper() not in ENG_ALLOWLIST and w.upper() != 'DOCKER': print(f" [!] Notice: English word '{w}' in title: {title}") def process_file(filepath, context): try: with open(filepath, 'r', encoding='utf-8') as f: lines = f.readlines() except Exception as e: print(f"Error reading {filepath}: {e}") return False headings = [] in_code_block = False for i, line in enumerate(lines): line_stripped = line.strip() if line_stripped.startswith('```'): in_code_block = not in_code_block if not in_code_block: match = re.match(r'^(#{1,6})\s+(.*)', line) if match: level = len(match.group(1)) title = match.group(2).strip() headings.append({'level': level, 'title': title, 'line_idx': i, 'children': []}) for i, h in enumerate(headings): level = h['level'] for j in range(i+1, len(headings)): if headings[j]['level'] <= level: break if headings[j]['level'] == level + 1: h['children'].append(j) actions = {} def has_text_between(start_idx, end_idx): for text_ln in range(start_idx + 1, end_idx): content = lines[text_ln].strip() if content and not content.startswith('#'): return True return False is_app = context.get('is_app', False) chap_num = context.get('chap_num', 0) sec_num = context.get('sec_num', 0) h2_counter = sec_num if sec_num > 0 else 0 h3_counter = 0 for i, h in enumerate(headings): level = h['level'] title = h['title'] ln = h['line_idx'] original_title = title check_english(title) if level == 1: if not is_app and chap_num > 0: pass elif is_app: title = re.sub(r'^[\d\.]+\s*', '', title) m = re.match(r'^(附录[一二三四五六七八九十]*)\s*(.*)', title) if m: p1 = m.group(1).strip() p2 = m.group(2).strip() if p2.startswith(':') or p2.startswith(':'): p2 = p2[1:].strip() title = f"{p1}:{p2}" if p2 else p1 elif level == 2: if not is_app: clean_title = re.sub(r'^[\d\.]+\s*', '', title) title = f"{chap_num}.{h2_counter} {clean_title}" if h2_counter > 0 else clean_title else: title = re.sub(r'^[\d\.]+\s*', '', title) h3_counter = 0 elif level == 3: h3_counter += 1 if not is_app: clean_title = re.sub(r'^[\d\.]+\s*', '', title) if h2_counter > 0: title = f"{chap_num}.{h2_counter}.{h3_counter} {clean_title}" else: title = re.sub(r'^[\d\.]+\s*', '', title) elif level >= 4: m = re.match(r'^([\d\.]+)\s+(.*)', title) if m: nums = m.group(1) rest = m.group(2) if '.' in nums.strip('.'): title = rest if title != original_title: actions[ln] = f"{'#' * level} {title}\n" h['title'] = title children_indices = h['children'] if len(children_indices) == 1: child_idx = children_indices[0] child_h = headings[child_idx] child_ln = child_h['line_idx'] child_title = child_h['title'] if child_ln in actions: modified_line = actions[child_ln] m_child = re.match(r'^(#{1,6})\s+(.*)', modified_line) if m_child: child_title = m_child.group(2).strip() actions[child_ln] = f"**{child_title}**\n\n" elif len(children_indices) >= 2: child_idx = children_indices[0] child_ln = headings[child_idx]['line_idx'] if not has_text_between(ln, child_ln): if level < 4: if ln in actions: actions[ln] = actions[ln].rstrip() + "\n\n涵盖了如下重点内容:\n\n" else: actions[ln] = lines[ln].rstrip() + "\n\n涵盖了如下重点内容:\n\n" if not actions: return False new_lines = [] for i, line in enumerate(lines): if i in actions: if actions[i].startswith('**'): pass new_lines.append(actions[i]) else: new_lines.append(line) with open(filepath, 'w', encoding='utf-8') as f: f.writelines(new_lines) return True if __name__ == "__main__": file_contexts = parse_summary() modified = 0 for filepath, context in file_contexts.items(): if os.path.exists(filepath): if process_file(filepath, context): modified += 1 print(f" -> MODIFIED: {filepath}") for root, dirs, files in os.walk('.'): if '.git' in root or 'node_modules' in root or '.gemini' in root: continue for file in files: if file.endswith('.md') and file not in ['SUMMARY.md', 'README.md', 'CONTRIBUTING.md', 'CHANGELOG.md']: filepath = os.path.join(root, file) clean_path = filepath.replace('./', '') if clean_path not in file_contexts: if process_file(clean_path, {'is_app': True}): modified += 1 print(f" -> MODIFIED: {clean_path}") print(f"\nTotal Modified {modified} files")