Files
docker_practice/fix_project_rules.py
2026-02-21 16:43:31 -08:00

196 lines
6.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
def fix_bold_spaces(line):
parts = line.split("**")
if len(parts) >= 3 and len(parts) % 2 == 1:
for i in range(1, len(parts), 2):
inner = parts[i]
if inner.strip() != "":
parts[i] = inner.strip()
line = "**".join(parts)
return line
def fix_trailing_newline(content):
if not content:
return content
return content.rstrip('\n') + '\n'
def process_file(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
filename = os.path.basename(filepath)
is_readme = filename.lower() == 'readme.md'
is_summary = filename.lower() == 'summary.md'
is_section = bool(re.match(r'^\d+\.\d+_.*\.md$', filename))
for i in range(len(lines)):
lines[i] = fix_bold_spaces(lines[i])
# Pass 1 & 2: First Header Level & Hierarchy
changed = True
safe = 100
while changed and safe > 0:
safe -= 1
changed = False
headers = []
in_code_block = False
for i, line in enumerate(lines):
if line.startswith('```'):
in_code_block = not in_code_block
if not in_code_block:
m = re.match(r'^(#{1,6})\s+(.*)', line)
if m:
headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})
if headers:
first_h = headers[0]
expected = None
if is_readme: expected = 1
elif is_summary: expected = 2
elif is_section: expected = 2
if expected and first_h['level'] != expected:
lines[first_h['line']] = '#' * expected + ' ' + first_h['text']
changed = True
for j in range(len(headers) - 1):
curr_level = headers[j]['level']
next_level = headers[j+1]['level']
if next_level > curr_level + 1:
new_level = curr_level + 1
lines[headers[j+1]['line']] = '#' * new_level + ' ' + headers[j+1]['text']
changed = True
# Pass 3: Parentheses
headers = []
in_code_block = False
for i, line in enumerate(lines):
if line.startswith('```'):
in_code_block = not in_code_block
if not in_code_block:
m = re.match(r'^(#{1,6})\s+(.*)', line)
if m:
headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})
for h in headers:
line_idx = h['line']
level = h['level']
text = h['text']
new_text = re.sub(r'[A-Za-z\s0-9]+', '', text)
new_text = re.sub(r'\([A-Za-z\s0-9]+\)', '', new_text)
if new_text != text:
lines[line_idx] = '#' * level + ' ' + new_text.strip()
# Pass 4: Single Child Headers Loop
headers = []
in_code_block = False
for i, line in enumerate(lines):
if line.startswith('```'):
in_code_block = not in_code_block
if not in_code_block:
m = re.match(r'^(#{1,6})\s+(.*)', line)
if m:
headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})
inserts = []
for j in range(len(headers)):
level = headers[j]['level']
children = []
for k in range(j+1, len(headers)):
if headers[k]['level'] <= level:
break
if headers[k]['level'] == level + 1:
children.append(headers[k])
if len(children) == 1:
child = children[0]
inserts.append((child['line'], level + 1))
# Remove duplicates and sort descending
inserts = list(set(inserts))
inserts.sort(key=lambda x: x[0], reverse=True)
for (line_idx, lvl) in inserts:
# We must insert BEFORE the ONLY child
lines.insert(line_idx, '')
lines.insert(line_idx, '总体概述了以下内容。')
lines.insert(line_idx, '')
lines.insert(line_idx, '#' * lvl + ' 概述')
# Pass 5: Output structure (Bridge text & Content Intro)
out_lines = []
in_code_block = False
i = 0
while i < len(lines):
line = lines[i]
if line.startswith('```'):
in_code_block = not in_code_block
is_header = bool(re.match(r'^#{1,6}\s+.*', line)) and not in_code_block
out_lines.append(line)
if is_header:
m = re.match(r'^(#{1,6})\s+(.*)', line)
curr_level = len(m.group(1))
k = i + 1
while k < len(lines) and lines[k].strip() == '':
k += 1
out_lines.append('') # Ensure ONE blank line follows the header
if k < len(lines):
next_content = lines[k].strip()
next_m = re.match(r'^(#{1,6})\s+.*', next_content)
if next_m and len(next_m.group(1)) > curr_level:
# Bridge text
out_lines.append('本节涵盖了相关内容与详细描述主要探讨以下几个方面')
out_lines.append('')
elif next_content.startswith('```'):
# codeblock intro
out_lines.append('如下代码块所示展示了相关示例')
out_lines.append('')
elif "![" in next_content and "](" in next_content:
# image intro
out_lines.append('下图直观地展示了本节内容')
out_lines.append('')
# Set cursor to process next actual content line correctly
i = k - 1
i += 1
content = '\n'.join(out_lines)
content = fix_trailing_newline(content)
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
def main():
md_files = []
for root, dirs, files in os.walk('.'):
if 'node_modules' in root or '.git' in root or '.vuepress' in root:
continue
for f in files:
if f.endswith('.md') and f != 'book_rule.md':
md_files.append(os.path.join(root, f))
for f in md_files:
try:
process_file(f)
except Exception as e:
print(f"Error processing {f}: {e}")
if __name__ == '__main__':
main()