mirror of
https://github.com/yeasy/docker_practice.git
synced 2026-03-10 20:04:36 +00:00
196 lines
6.5 KiB
Go
196 lines
6.5 KiB
Go
import os
|
||
import re
|
||
|
||
def fix_bold_spaces(line):
|
||
parts = line.split("**")
|
||
if len(parts) >= 3 and len(parts) % 2 == 1:
|
||
for i in range(1, len(parts), 2):
|
||
inner = parts[i]
|
||
if inner.strip() != "":
|
||
parts[i] = inner.strip()
|
||
line = "**".join(parts)
|
||
|
||
return line
|
||
|
||
def fix_trailing_newline(content):
|
||
if not content:
|
||
return content
|
||
return content.rstrip('\n') + '\n'
|
||
|
||
def process_file(filepath):
|
||
with open(filepath, 'r', encoding='utf-8') as f:
|
||
content = f.read()
|
||
|
||
lines = content.split('\n')
|
||
|
||
filename = os.path.basename(filepath)
|
||
is_readme = filename.lower() == 'readme.md'
|
||
is_summary = filename.lower() == 'summary.md'
|
||
is_section = bool(re.match(r'^\d+\.\d+_.*\.md$', filename))
|
||
|
||
for i in range(len(lines)):
|
||
lines[i] = fix_bold_spaces(lines[i])
|
||
|
||
# Pass 1 & 2: First Header Level & Hierarchy
|
||
changed = True
|
||
safe = 100
|
||
while changed and safe > 0:
|
||
safe -= 1
|
||
changed = False
|
||
|
||
headers = []
|
||
in_code_block = False
|
||
for i, line in enumerate(lines):
|
||
if line.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
if not in_code_block:
|
||
m = re.match(r'^(#{1,6})\s+(.*)', line)
|
||
if m:
|
||
headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})
|
||
|
||
if headers:
|
||
first_h = headers[0]
|
||
expected = None
|
||
if is_readme: expected = 1
|
||
elif is_summary: expected = 2
|
||
elif is_section: expected = 2
|
||
|
||
if expected and first_h['level'] != expected:
|
||
lines[first_h['line']] = '#' * expected + ' ' + first_h['text']
|
||
changed = True
|
||
|
||
for j in range(len(headers) - 1):
|
||
curr_level = headers[j]['level']
|
||
next_level = headers[j+1]['level']
|
||
if next_level > curr_level + 1:
|
||
new_level = curr_level + 1
|
||
lines[headers[j+1]['line']] = '#' * new_level + ' ' + headers[j+1]['text']
|
||
changed = True
|
||
|
||
# Pass 3: Parentheses
|
||
headers = []
|
||
in_code_block = False
|
||
for i, line in enumerate(lines):
|
||
if line.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
if not in_code_block:
|
||
m = re.match(r'^(#{1,6})\s+(.*)', line)
|
||
if m:
|
||
headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})
|
||
|
||
for h in headers:
|
||
line_idx = h['line']
|
||
level = h['level']
|
||
text = h['text']
|
||
new_text = re.sub(r'([A-Za-z\s0-9]+)', '', text)
|
||
new_text = re.sub(r'\([A-Za-z\s0-9]+\)', '', new_text)
|
||
if new_text != text:
|
||
lines[line_idx] = '#' * level + ' ' + new_text.strip()
|
||
|
||
# Pass 4: Single Child Headers Loop
|
||
headers = []
|
||
in_code_block = False
|
||
for i, line in enumerate(lines):
|
||
if line.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
if not in_code_block:
|
||
m = re.match(r'^(#{1,6})\s+(.*)', line)
|
||
if m:
|
||
headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})
|
||
|
||
inserts = []
|
||
for j in range(len(headers)):
|
||
level = headers[j]['level']
|
||
children = []
|
||
for k in range(j+1, len(headers)):
|
||
if headers[k]['level'] <= level:
|
||
break
|
||
if headers[k]['level'] == level + 1:
|
||
children.append(headers[k])
|
||
|
||
if len(children) == 1:
|
||
child = children[0]
|
||
inserts.append((child['line'], level + 1))
|
||
|
||
# Remove duplicates and sort descending
|
||
inserts = list(set(inserts))
|
||
inserts.sort(key=lambda x: x[0], reverse=True)
|
||
for (line_idx, lvl) in inserts:
|
||
# We must insert BEFORE the ONLY child
|
||
lines.insert(line_idx, '')
|
||
lines.insert(line_idx, '总体概述了以下内容。')
|
||
lines.insert(line_idx, '')
|
||
lines.insert(line_idx, '#' * lvl + ' 概述')
|
||
|
||
# Pass 5: Output structure (Bridge text & Content Intro)
|
||
out_lines = []
|
||
in_code_block = False
|
||
|
||
i = 0
|
||
while i < len(lines):
|
||
line = lines[i]
|
||
|
||
if line.startswith('```'):
|
||
in_code_block = not in_code_block
|
||
|
||
is_header = bool(re.match(r'^#{1,6}\s+.*', line)) and not in_code_block
|
||
|
||
out_lines.append(line)
|
||
|
||
if is_header:
|
||
m = re.match(r'^(#{1,6})\s+(.*)', line)
|
||
curr_level = len(m.group(1))
|
||
|
||
k = i + 1
|
||
while k < len(lines) and lines[k].strip() == '':
|
||
k += 1
|
||
|
||
out_lines.append('') # Ensure ONE blank line follows the header
|
||
|
||
if k < len(lines):
|
||
next_content = lines[k].strip()
|
||
next_m = re.match(r'^(#{1,6})\s+.*', next_content)
|
||
|
||
if next_m and len(next_m.group(1)) > curr_level:
|
||
# Bridge text
|
||
out_lines.append('本节涵盖了相关内容与详细描述,主要探讨以下几个方面:')
|
||
out_lines.append('')
|
||
|
||
elif next_content.startswith('```'):
|
||
# codeblock intro
|
||
out_lines.append('如下代码块所示,展示了相关示例:')
|
||
out_lines.append('')
|
||
elif "
|
||
out_lines.append('')
|
||
|
||
# Set cursor to process next actual content line correctly
|
||
i = k - 1
|
||
|
||
i += 1
|
||
|
||
content = '\n'.join(out_lines)
|
||
content = fix_trailing_newline(content)
|
||
|
||
with open(filepath, 'w', encoding='utf-8') as f:
|
||
f.write(content)
|
||
|
||
def main():
|
||
md_files = []
|
||
for root, dirs, files in os.walk('.'):
|
||
if 'node_modules' in root or '.git' in root or '.vuepress' in root:
|
||
continue
|
||
for f in files:
|
||
if f.endswith('.md') and f != 'book_rule.md':
|
||
md_files.append(os.path.join(root, f))
|
||
|
||
for f in md_files:
|
||
try:
|
||
process_file(f)
|
||
except Exception as e:
|
||
print(f"Error processing {f}: {e}")
|
||
|
||
if __name__ == '__main__':
|
||
main()
|