Files
docker_practice/check_project_rules.py
2026-02-21 16:43:31 -08:00

149 lines
6.0 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import argparse
def check_file(filepath, verbose=False):
violations = []
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
lines = content.split('\n')
filename = os.path.basename(filepath)
is_readme = filename.lower() == 'readme.md'
is_summary = filename.lower() == 'summary.md'
is_section = re.match(r'^\d+\.\d+_.*\.md$', filename)
# 1.1 Bold Text: No spaces inside the bold markers
for i, line in enumerate(lines):
if '** ' in line or ' **' in line:
# We must be careful: '** ' might be a bold start with space inside, but it could also be regular text.
# Let's use a simpler, line-bound regex line by line
if re.search(r'\*\*\s+[^*]+\*\*|\*\*[^*]+\s+\*\*', line):
violations.append(f"1.1 Bold Text: Space inside bold markers at line {i+1}")
# 1.4 Trailing Newline
if not content.endswith('\n') or content.endswith('\n\n'):
if content: # ignore empty files
violations.append("1.4 Trailing Newline: File must end with exactly one newline character")
headers = []
for i, line in enumerate(lines):
m = re.match(r'^(#{1,6})\s+(.*)', line)
if m:
level = len(m.group(1))
text = m.group(2)
headers.append({'line': i, 'level': level, 'text': text})
# 1.2 Header Spacing
if i + 1 < len(lines):
next_line = lines[i+1].strip()
if next_line != '':
violations.append(f"1.2 Header Spacing: Header at line {i+1} not followed by a blank line")
if i + 2 < len(lines):
if lines[i+1].strip() == '' and lines[i+2].strip() == '':
violations.append(f"1.2 Header Spacing: Header at line {i+1} followed by multiple blank lines")
# 1.3 Header Hierarchy
for j in range(len(headers) - 1):
curr_level = headers[j]['level']
next_level = headers[j+1]['level']
if next_level > curr_level + 1:
violations.append(f"1.3 Header Hierarchy: Skipped header level from H{curr_level} to H{next_level} at line {headers[j+1]['line']+1}")
# 2.2 File Header Levels
if headers:
first_header_level = headers[0]['level']
if is_readme and first_header_level != 1:
violations.append("2.2 File Header Levels: README.md first header must be level 1")
if is_summary and first_header_level != 2:
violations.append("2.2 File Header Levels: SUMMARY.md first header must be level 2")
if is_section and first_header_level != 2:
violations.append("2.2 File Header Levels: Section file first header must be level 2")
# 2.2 No English Parentheses in Headers unless very common terminologies
for h in headers:
text = h['text']
i = h['line']
if re.search(r'[A-Za-z\s]+', text):
violations.append(f"2.2 No English Parentheses: Header at line {i+1} contains English in parentheses: {text}")
# 2.3 Single Child Headers
for j in range(len(headers)):
level = headers[j]['level']
children = 0
for k in range(j+1, len(headers)):
if headers[k]['level'] <= level:
break
if headers[k]['level'] == level + 1:
children += 1
if children == 1:
violations.append(f"2.3 Single Child Headers: Header at line {headers[j]['line']+1} has exactly 1 child")
# 2.5 Bridge Text
for j in range(len(headers)):
level = headers[j]['level']
child_line = -1
for k in range(j+1, len(headers)):
if headers[k]['level'] <= level:
break
if headers[k]['level'] == level + 1:
child_line = headers[k]['line']
break
if child_line != -1:
text_between = '\n'.join([l.strip() for l in lines[headers[j]['line']+1:child_line] if l.strip()])
if not text_between:
violations.append(f"2.5 Bridge Text: Header at line {headers[j]['line']+1} is followed by a sub-header without introductory text")
# 3.2 Content Introduction
in_code_block = False
for j, line in enumerate(lines):
if line.startswith('```'):
if not in_code_block:
for k in range(j-1, -1, -1):
if lines[k].strip():
if lines[k].startswith('#'):
violations.append(f"3.2 Content Introduction: Code block at line {j+1} immediately follows a header")
break
in_code_block = not in_code_block
elif "![" in line and "](" in line:
for k in range(j-1, -1, -1):
if lines[k].strip():
if lines[k].startswith('#'):
violations.append(f"3.2 Content Introduction: Image at line {j+1} immediately follows a header")
break
return violations
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-v', '--verbose', action='store_true')
args = parser.parse_args()
md_files = []
for root, dirs, files in os.walk('.'):
for f in files:
if f.endswith('.md') and '.git' not in root and 'node_modules' not in root and '.vuepress' not in root and 'book_rule.md' not in f and 'rules_result.txt' not in f:
md_files.append(os.path.join(root, f))
total_violations = 0
for f in md_files:
try:
violations = check_file(f, args.verbose)
if args.verbose:
print(f"Scanned {f}")
if violations:
print(f"\\nViolations in {f}:")
for v in violations:
print(f" - {v}")
total_violations += len(violations)
except Exception as e:
print(f"Error reading {f}: {e}")
if total_violations == 0:
print("No violations found!")
else:
print(f"\\nTotal violations: {total_violations}")
if __name__ == '__main__':
main()