diff --git a/.gitignore b/.gitignore
index 94eaf02..7ced625 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,4 +16,4 @@ docker-compose.override.yml
.obsidian/
.vscode/
-.agent/
\ No newline at end of file
+.agent/combine.py
diff --git a/combine.py b/combine.py
deleted file mode 100644
index 14075b0..0000000
--- a/combine.py
+++ /dev/null
@@ -1,1001 +0,0 @@
-#!/usr/bin/env python3
-"""
-通用书籍合并工具 (Generic Book Combiner)
-
-功能:
-1. 自动扫描当前或指定目录。
-2. 解析 SUMMARY.md 获取章节结构。
-3. 解析 README.md 获取书籍标题和简介信息。
-4. 生成 single-page.md 和 single-page.html。
-"""
-
-import re
-import html
-import argparse
-import sys
-from pathlib import Path
-from datetime import datetime
-
-# HTML 模板
-HTML_TEMPLATE = """
-
-
-
-
- {title}
-
-
-
-
-
-
-
-
-
-
-
-
{title}
-
{subtitle}
-
-
-{content}
-
-
-
-
-
-
-
-"""
-
-
-def extract_book_info(project_dir: Path) -> tuple[str, str]:
- """
- 从 README.md 或 SUMMARY.md 中提取书籍标题和副标题。
-
- Returns:
- (title, subtitle)
- """
- title = "Untitled Book"
- subtitle = "Generated Book"
-
- # 优先尝试 README.md
- readme_path = project_dir / 'README.md'
- if readme_path.exists():
- try:
- content = readme_path.read_text(encoding='utf-8')
- lines = content.split('\n')
- for line in lines[:10]: # 只看前10行
- match = re.match(r'^#\s+(.+)$', line)
- if match:
- title = match.group(1).strip()
- break
-
- # 尝试查找引用块作为副标题
- for line in lines[:20]:
- match = re.match(r'^>\s+(.+)$', line)
- if match:
- subtitle = match.group(1).strip()
- break
- return title, subtitle
- except Exception:
- pass
-
- # 其次尝试 SUMMARY.md
- summary_path = project_dir / 'SUMMARY.md'
- if summary_path.exists():
- try:
- content = summary_path.read_text(encoding='utf-8')
- lines = content.split('\n')
- for line in lines[:5]:
- match = re.match(r'^#\s+(.+)$', line)
- if match:
- title = match.group(1).strip()
- return title, subtitle
- except Exception:
- pass
-
- return title, subtitle
-
-
-def parse_summary(summary_path: Path) -> list[tuple[str, str, int]]:
- """
- 解析 SUMMARY.md,提取所有章节链接。
-
- Returns:
- list of (title, file_path, indent_level)
- """
- entries = []
- if not summary_path.exists():
- return entries
-
- content = summary_path.read_text(encoding='utf-8')
-
- # 匹配 Markdown 链接格式: * [标题](文件路径) 或 - [标题](文件路径)
- # 支持多级缩进
- pattern = r'^(\s*)[\*\-]\s*\[([^\]]+)\]\(([^)]+)\)'
-
- for line in content.split('\n'):
- match = re.match(pattern, line)
- if match:
- indent = len(match.group(1))
- title = match.group(2)
- file_path = match.group(3)
-
- # 跳过外部链接
- if file_path.startswith('http'):
- continue
-
- entries.append((title, file_path, indent))
-
- return entries
-
-
-def convert_internal_links_to_anchors(content: str, file_to_anchor_map: dict[str, str]) -> str:
- """
- Convert internal markdown file links to anchor links for single-page output.
-
- Examples:
- [Title](1.2_xxx.md) -> [Title](#anchor-id)
- [Title](../04_mcp/README.md) -> [Title](#anchor-id)
- [Title](file.md#section) -> [Title](#section)
-
- Args:
- content: The markdown content to process
- file_to_anchor_map: Mapping from file paths to their anchor IDs
-
- Returns:
- Content with internal links converted to anchors
- """
- def replace_link(match):
- link_text = match.group(1)
- link_target = match.group(2)
-
- # Skip external URLs and mailto links
- if link_target.startswith('http://') or link_target.startswith('https://') or link_target.startswith('mailto:'):
- return match.group(0)
-
- # Skip image links (they start with !)
- # Check the character before the match - this is handled by the regex not matching ![]()
-
- # Handle anchor-only links
- if link_target.startswith('#'):
- return match.group(0)
-
- # Split target into file path and anchor
- if '#' in link_target:
- file_path, anchor = link_target.split('#', 1)
- # If there's a specific anchor, use it directly
- return f'[{link_text}](#{anchor})'
- else:
- file_path = link_target
-
- # Normalize the file path (remove ./, ../ prefixes and get the basename for matching)
- # Extract just the filename for simple matching
- normalized_path = file_path.replace('\\', '/').strip()
-
- # Try to find a matching anchor in the map
- # First try exact match
- if normalized_path in file_to_anchor_map:
- return f'[{link_text}](#{file_to_anchor_map[normalized_path]})'
-
- # Try matching by filename only (for links like ../04_mcp/README.md)
- from pathlib import PurePosixPath
- filename = PurePosixPath(normalized_path).name
-
- # Search for matching file in the map
- for path, anchor in file_to_anchor_map.items():
- if PurePosixPath(path).name == filename:
- # For README.md, we need to be more specific - check parent directory
- if filename == 'README.md':
- # Try to match by parent directory
- parts = normalized_path.replace('../', '').replace('./', '').split('/')
- if len(parts) >= 2:
- parent_dir = parts[-2]
- path_parts = path.split('/')
- if len(path_parts) >= 2 and path_parts[-2] == parent_dir:
- return f'[{link_text}](#{anchor})'
- continue
- return f'[{link_text}](#{anchor})'
-
- # If no match found, generate an anchor from the link text
- # This handles cases where the file might not be in the map
- fallback_anchor = re.sub(r'[^\w\u4e00-\u9fff]+', '-', link_text.lower()).strip('-')
- return f'[{link_text}](#{fallback_anchor})'
-
- # Match markdown links: [text](target) but not image links 
- # Use negative lookbehind for !
- pattern = r'(? str:
- """
- Fix relative image paths for single-page output.
-
- When combining files from different directories, relative image paths like
- `_images/xxx.png` need to be prefixed with the source file's directory.
-
- Examples:
- If file is from 07_coding/7.4_ide.md:
-  -> 
-
- Args:
- content: The markdown content to process
- file_path: The relative path of the source file (e.g., "07_coding/7.4_ide.md")
-
- Returns:
- Content with fixed image paths
- """
- from pathlib import PurePosixPath
-
- # Get the directory of the source file
- source_dir = str(PurePosixPath(file_path).parent)
-
- # If the file is in the root directory, no path fixing needed
- if source_dir == '.':
- return content
-
- def replace_image(match):
- alt_text = match.group(1)
- image_path = match.group(2)
-
- # Skip external URLs
- if image_path.startswith('http://') or image_path.startswith('https://'):
- return match.group(0)
-
- # Skip absolute paths
- if image_path.startswith('/'):
- return match.group(0)
-
- # Skip paths that already have a directory prefix (not starting with _images/)
- if not image_path.startswith('_images/') and not image_path.startswith('./_images/'):
- # Check if it's already a full path like 07_coding/_images/
- if '/_images/' in image_path or image_path.startswith('../'):
- return match.group(0)
-
- # Remove leading ./ if present
- clean_path = image_path.lstrip('./')
-
- # Prepend the source directory
- new_path = f"{source_dir}/{clean_path}"
-
- return f''
-
- # Match markdown image syntax: 
- pattern = r'!\[([^\]]*)\]\(([^)]+)\)'
- return re.sub(pattern, replace_image, content)
-
-
-def clean_navigation_links(content: str) -> str:
- """
- Remove navigation links (Next/Previous, arrows) from the end of the content.
- """
- lines = content.rstrip().split('\n')
-
- # Navigation line patterns
- nav_patterns = [
- r'^\s*[-=]{3,}\s*$', # Separator lines
- r'^\s*(\*\*|__)?(Next|Previous|下一[节章页]|上一[节章页])(\*\*|__)?.*$', # Text based
- r'^\s*(➡️|→|=>|==>|Example|Download)\s*.*$', # Arrow/Indicator based
- r'^\s*\[(Next|Previous|下一[节章]|上一[节章]).*?\]\(.*?\)\s*$', # Link with nav text
- ]
-
- # Also catch "Arrow [Link](Url)" specifically if not caught above
- # And purely link lines that look like nav " [Title](Url) " relative short
-
- while lines:
- last_line = lines[-1].strip()
- if not last_line:
- lines.pop()
- continue
-
- is_nav = False
-
- # Check explicit patterns
- for pattern in nav_patterns:
- if re.match(pattern, last_line, re.IGNORECASE):
- is_nav = True
- break
-
- # Check "Arrow + Link" specifically (common in this book)
- if not is_nav:
- # Pattern: Arrow (optional) + Link
- # e.g. "➡️ [Title](Link)"
- if re.match(r'^\s*(➡️|→|=>|==>)\s*\[.+?\]\(.+?\)\s*$', last_line):
- is_nav = True
-
- if is_nav:
- # print(f"DEBUG: Removing nav line: {last_line}")
- lines.pop()
- else:
- # Found a non-nav line, stop checking
- break
-
- return '\n'.join(lines)
-
-
-def clean_redundant_header(content: str, title: str, subtitle: str) -> str:
- """
- Remove the title and subtitle from the beginning of the content if they match the book info.
- """
- lines = content.split('\n')
-
- # Remove leading blank lines
- while lines and not lines[0].strip():
- lines.pop(0)
-
- if not lines:
- return content
-
- # Check for Title (H1)
- # Case 1: Exact match "# Title"
- # Case 2: Match with some whitespace flexibility
- if re.match(r'^#\s+' + re.escape(title) + r'\s*$', lines[0].strip(), re.IGNORECASE):
- lines.pop(0)
- # Remove blank lines after title
- while lines and not lines[0].strip():
- lines.pop(0)
-
- # Check for Subtitle (Blockquote)
- if subtitle and lines and lines[0].strip().startswith(">"):
- # Clean punctuation for comparison just in case
- line_text = lines[0].strip().lstrip('>').strip()
- if subtitle in line_text or line_text in subtitle:
- lines.pop(0)
- # Remove blank lines after subtitle
- while lines and not lines[0].strip():
- lines.pop(0)
-
- # Also remove common separator lines like "---" that often follow the header
- if lines and lines[0].strip().replace(' ', '') == '---':
- lines.pop(0)
- while lines and not lines[0].strip():
- lines.pop(0)
-
- return '\n'.join(lines)
-
-
-def markdown_to_html(md_content: str) -> str:
- """
- 将 Markdown 转换为 HTML。
- 简单实现,不依赖外部库。
- """
- lines = md_content.split('\n')
- html_lines = []
- in_code_block = False
- code_lang = ''
- code_content = []
- in_list = False
- in_table = False
- table_rows = []
-
- def process_inline(text: str) -> str:
- """处理行内格式"""
- # 代码
- text = re.sub(r'`([^`]+)`', r'\1', text)
- # 粗体
- text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text)
- # 斜体
- text = re.sub(r'\*([^*]+)\*', r'\1', text)
- # 链接
- text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text)
- return text
-
- i = 0
- while i < len(lines):
- line = lines[i]
-
- # 代码块
- if line.startswith('```'):
- if in_code_block:
- html_lines.append(f'
{html.escape(chr(10).join(code_content))}
')
- code_content = []
- in_code_block = False
- else:
- code_lang = line[3:].strip() or 'text'
- in_code_block = True
- i += 1
- continue
-
- if in_code_block:
- code_content.append(line)
- i += 1
- continue
-
- # 表格
- if '|' in line and not line.strip().startswith('```'):
- cells = [c.strip() for c in line.split('|')]
- cells = [c for c in cells if c] # 移除空单元格
-
- if cells and not all(re.match(r'^[-:]+$', c) for c in cells):
- if not in_table:
- in_table = True
- table_rows = []
- table_rows.append(cells)
- elif in_table and all(re.match(r'^[-:]+$', c) for c in cells):
- pass # 跳过分隔行
-
- # 检查下一行是否还是表格
- if i + 1 >= len(lines) or '|' not in lines[i + 1]:
- if in_table and table_rows:
- html_lines.append('
')
- for j, row in enumerate(table_rows):
- tag = 'th' if j == 0 else 'td'
- html_lines.append('
')
- for cell in row:
- html_lines.append(f'<{tag}>{process_inline(cell)}{tag}>')
- html_lines.append('