docker_practice/fix_project_rules.py

import os
import re

def fix_bold_spaces(line):
    parts = line.split("**")
    if len(parts) >= 3 and len(parts) % 2 == 1:
        for i in range(1, len(parts), 2):
            inner = parts[i]
            if inner.strip() != "":
                parts[i] = inner.strip()
        line = "**".join(parts)

    return line

def fix_trailing_newline(content):
    if not content:
        return content
    return content.rstrip('\n') + '\n'

def process_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        content = f.read()

    lines = content.split('\n')

    filename = os.path.basename(filepath)
    is_readme = filename.lower() == 'readme.md'
    is_summary = filename.lower() == 'summary.md'
    is_section = bool(re.match(r'^\d+\.\d+_.*\.md$', filename))

    for i in range(len(lines)):
        lines[i] = fix_bold_spaces(lines[i])

    # Pass 1 & 2: First Header Level & Hierarchy
    changed = True
    safe = 100
    while changed and safe > 0:
        safe -= 1
        changed = False

        headers = []
        in_code_block = False
        for i, line in enumerate(lines):
            if line.startswith('```'):
                in_code_block = not in_code_block
            if not in_code_block:
                m = re.match(r'^(#{1,6})\s+(.*)', line)
                if m:
                    headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})

        if headers:
            first_h = headers[0]
            expected = None
            if is_readme: expected = 1
            elif is_summary: expected = 2
            elif is_section: expected = 2

            if expected and first_h['level'] != expected:
                lines[first_h['line']] = '#' * expected + ' ' + first_h['text']
                changed = True

        for j in range(len(headers) - 1):
            curr_level = headers[j]['level']
            next_level = headers[j+1]['level']
            if next_level > curr_level + 1:
                new_level = curr_level + 1
                lines[headers[j+1]['line']] = '#' * new_level + ' ' + headers[j+1]['text']
                changed = True

    # Pass 3: Parentheses
    headers = []
    in_code_block = False
    for i, line in enumerate(lines):
        if line.startswith('```'):
            in_code_block = not in_code_block
        if not in_code_block:
            m = re.match(r'^(#{1,6})\s+(.*)', line)
            if m:
                headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})

    for h in headers:
        line_idx = h['line']
        level = h['level']
        text = h['text']
        new_text = re.sub(r'（[A-Za-z\s0-9]+）', '', text)
        new_text = re.sub(r'\([A-Za-z\s0-9]+\)', '', new_text)
        if new_text != text:
            lines[line_idx] = '#' * level + ' ' + new_text.strip()

    # Pass 4: Single Child Headers Loop
    headers = []
    in_code_block = False
    for i, line in enumerate(lines):
        if line.startswith('```'):
            in_code_block = not in_code_block
        if not in_code_block:
            m = re.match(r'^(#{1,6})\s+(.*)', line)
            if m:
                headers.append({'line': i, 'level': len(m.group(1)), 'text': m.group(2)})

    inserts = []
    for j in range(len(headers)):
        level = headers[j]['level']
        children = []
        for k in range(j+1, len(headers)):
            if headers[k]['level'] <= level:
                break
            if headers[k]['level'] == level + 1:
                children.append(headers[k])

        if len(children) == 1:
            child = children[0]
            inserts.append((child['line'], level + 1))

    # Remove duplicates and sort descending
    inserts = list(set(inserts))
    inserts.sort(key=lambda x: x[0], reverse=True)
    for (line_idx, lvl) in inserts:
        # We must insert BEFORE the ONLY child
        lines.insert(line_idx, '')
        lines.insert(line_idx, '总体概述了以下内容。')
        lines.insert(line_idx, '')
        lines.insert(line_idx, '#' * lvl + ' 概述')

    # Pass 5: Output structure (Bridge text & Content Intro)
    out_lines = []
    in_code_block = False

    i = 0
    while i < len(lines):
        line = lines[i]

        if line.startswith('```'):
            in_code_block = not in_code_block

        is_header = bool(re.match(r'^#{1,6}\s+.*', line)) and not in_code_block

        out_lines.append(line)

        if is_header:
            m = re.match(r'^(#{1,6})\s+(.*)', line)
            curr_level = len(m.group(1))

            k = i + 1
            while k < len(lines) and lines[k].strip() == '':
                k += 1

            out_lines.append('') # Ensure ONE blank line follows the header

            if k < len(lines):
                next_content = lines[k].strip()
                next_m = re.match(r'^(#{1,6})\s+.*', next_content)

                if next_m and len(next_m.group(1)) > curr_level:
                    # Bridge text
                    out_lines.append('本节涵盖了相关内容与详细描述，主要探讨以下几个方面：')
                    out_lines.append('')

                elif next_content.startswith('```'):
                    # codeblock intro
                    out_lines.append('如下代码块所示，展示了相关示例：')
                    out_lines.append('')
                elif "![" in next_content and "](" in next_content:
                    # image intro
                    out_lines.append('下图直观地展示了本节内容：')
                    out_lines.append('')

            # Set cursor to process next actual content line correctly
            i = k - 1

        i += 1

    content = '\n'.join(out_lines)
    content = fix_trailing_newline(content)

    with open(filepath, 'w', encoding='utf-8') as f:
        f.write(content)

def main():
    md_files = []
    for root, dirs, files in os.walk('.'):
        if 'node_modules' in root or '.git' in root or '.vuepress' in root:
            continue
        for f in files:
            if f.endswith('.md') and f != 'book_rule.md':
                md_files.append(os.path.join(root, f))

    for f in md_files:
        try:
            process_file(f)
        except Exception as e:
            print(f"Error processing {f}: {e}")

if __name__ == '__main__':
    main()