import os import re import argparse def check_file(filepath, verbose=False): violations = [] with open(filepath, 'r', encoding='utf-8') as f: content = f.read() lines = content.split('\n') filename = os.path.basename(filepath) is_readme = filename.lower() == 'readme.md' is_summary = filename.lower() == 'summary.md' is_section = re.match(r'^\d+\.\d+_.*\.md$', filename) # 1.1 Bold Text: No spaces inside the bold markers for i, line in enumerate(lines): if '** ' in line or ' **' in line: # We must be careful: '** ' might be a bold start with space inside, but it could also be regular text. # Let's use a simpler, line-bound regex line by line if re.search(r'\*\*\s+[^*]+\*\*|\*\*[^*]+\s+\*\*', line): violations.append(f"1.1 Bold Text: Space inside bold markers at line {i+1}") # 1.4 Trailing Newline if not content.endswith('\n') or content.endswith('\n\n'): if content: # ignore empty files violations.append("1.4 Trailing Newline: File must end with exactly one newline character") headers = [] for i, line in enumerate(lines): m = re.match(r'^(#{1,6})\s+(.*)', line) if m: level = len(m.group(1)) text = m.group(2) headers.append({'line': i, 'level': level, 'text': text}) # 1.2 Header Spacing if i + 1 < len(lines): next_line = lines[i+1].strip() if next_line != '': violations.append(f"1.2 Header Spacing: Header at line {i+1} not followed by a blank line") if i + 2 < len(lines): if lines[i+1].strip() == '' and lines[i+2].strip() == '': violations.append(f"1.2 Header Spacing: Header at line {i+1} followed by multiple blank lines") # 1.3 Header Hierarchy for j in range(len(headers) - 1): curr_level = headers[j]['level'] next_level = headers[j+1]['level'] if next_level > curr_level + 1: violations.append(f"1.3 Header Hierarchy: Skipped header level from H{curr_level} to H{next_level} at line {headers[j+1]['line']+1}") # 2.2 File Header Levels if headers: first_header_level = headers[0]['level'] if is_readme and first_header_level != 1: violations.append("2.2 File Header Levels: README.md first header must be level 1") if is_summary and first_header_level != 2: violations.append("2.2 File Header Levels: SUMMARY.md first header must be level 2") if is_section and first_header_level != 2: violations.append("2.2 File Header Levels: Section file first header must be level 2") # 2.2 No English Parentheses in Headers unless very common terminologies for h in headers: text = h['text'] i = h['line'] if re.search(r'([A-Za-z\s]+)', text): violations.append(f"2.2 No English Parentheses: Header at line {i+1} contains English in parentheses: {text}") # 2.3 Single Child Headers for j in range(len(headers)): level = headers[j]['level'] children = 0 for k in range(j+1, len(headers)): if headers[k]['level'] <= level: break if headers[k]['level'] == level + 1: children += 1 if children == 1: violations.append(f"2.3 Single Child Headers: Header at line {headers[j]['line']+1} has exactly 1 child") # 2.5 Bridge Text for j in range(len(headers)): level = headers[j]['level'] child_line = -1 for k in range(j+1, len(headers)): if headers[k]['level'] <= level: break if headers[k]['level'] == level + 1: child_line = headers[k]['line'] break if child_line != -1: text_between = '\n'.join([l.strip() for l in lines[headers[j]['line']+1:child_line] if l.strip()]) if not text_between: violations.append(f"2.5 Bridge Text: Header at line {headers[j]['line']+1} is followed by a sub-header without introductory text") # 3.2 Content Introduction in_code_block = False for j, line in enumerate(lines): if line.startswith('```'): if not in_code_block: for k in range(j-1, -1, -1): if lines[k].strip(): if lines[k].startswith('#'): violations.append(f"3.2 Content Introduction: Code block at line {j+1} immediately follows a header") break in_code_block = not in_code_block elif "![" in line and "](" in line: for k in range(j-1, -1, -1): if lines[k].strip(): if lines[k].startswith('#'): violations.append(f"3.2 Content Introduction: Image at line {j+1} immediately follows a header") break return violations def main(): parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true') args = parser.parse_args() md_files = [] for root, dirs, files in os.walk('.'): for f in files: if f.endswith('.md') and '.git' not in root and 'node_modules' not in root and '.vuepress' not in root and 'book_rule.md' not in f and 'rules_result.txt' not in f: md_files.append(os.path.join(root, f)) total_violations = 0 for f in md_files: try: violations = check_file(f, args.verbose) if args.verbose: print(f"Scanned {f}") if violations: print(f"\\nViolations in {f}:") for v in violations: print(f" - {v}") total_violations += len(violations) except Exception as e: print(f"Error reading {f}: {e}") if total_violations == 0: print("No violations found!") else: print(f"\\nTotal violations: {total_violations}") if __name__ == '__main__': main()