import os import re def fix_file(filepath): try: with open(filepath, 'r', encoding='utf-8') as f: lines = f.readlines() except Exception as e: print(f"Could not read file {filepath}: {e}") return False if not lines: return False new_lines = [] in_code_block = False for i, line in enumerate(lines): line_stripped = line.strip() # Code block tracking if line_stripped.startswith('```'): in_code_block = not in_code_block # 1. Full-width parentheses `(` `)` # We replace them with space + half-width parenthesis except if it's already spaced if not in_code_block and ('(' in line or ')' in line): # Replace left parenthesis line = re.sub(r'([^\s])(', r'\1 (', line) line = re.sub(r'\s*(', r' (', line) # Replace right parenthesis line = re.sub(r')([^\s.,;?!:,。;?!:])', r') \1', line) line = re.sub(r')\s*', r')', line) # Also a quick hack to replace any leftover ( ) line = line.replace('(', '(').replace(')', ')') # 3. Missing blank line before list item is_list_item = re.match(r'^(\s*[-*+]\s|\s*\d+\.\s)', line) if not in_code_block and is_list_item and i > 0: prev_line = lines[i-1] prev_line_stripped = prev_line.strip() # If prev line is not empty, and not already a list item, header, quote, HTML, or table if prev_line_stripped and not prev_line_stripped.startswith('#') and not prev_line_stripped.startswith('>'): if not re.match(r'^(\s*[-*+]\s|\s*\d+\.\s)', prev_line) and not prev_line_stripped.startswith('