-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
131 lines (112 loc) · 5.1 KB
/
main.py
File metadata and controls
131 lines (112 loc) · 5.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import dotenv
import os
import argparse
# Import the function that creates the flow
from flow import create_tutorial_flow
dotenv.load_dotenv()
# Default file patterns
DEFAULT_INCLUDE_PATTERNS = {
"*.py", "*.js", "*.jsx", "*.ts", "*.tsx", "*.go", "*.java", "*.pyi", "*.pyx",
"*.c", "*.cc", "*.cpp", "*.h", "*.md", "*.rst", "*Dockerfile",
"*Makefile", "*.yaml", "*.yml",
}
DEFAULT_EXCLUDE_PATTERNS = {
"assets/*", "data/*", "images/*", "public/*", "static/*", "temp/*",
"*docs/*",
"*venv/*",
"*.venv/*",
"*test*",
"*tests/*",
"*examples/*",
"v1/*",
"*dist/*",
"*build/*",
"*experimental/*",
"*deprecated/*",
"*misc/*",
"*legacy/*",
".git/*", ".github/*", ".next/*", ".vscode/*",
"*obj/*",
"*bin/*",
"*node_modules/*",
"*.log"
}
DEFAULT_TUTORIAL_LANGUAGE = "Chinese"
# --- Main Function ---
def main():
parser = argparse.ArgumentParser(description="Generate a tutorial for a GitHub codebase or local directory.")
# Create mutually exclusive group for source
source_group = parser.add_mutually_exclusive_group(required=True)
source_group.add_argument("--repo", help="URL of the public GitHub repository.")
source_group.add_argument("--dir", help="Path to local directory.")
parser.add_argument("-n", "--name", help="Project name (optional, derived from repo/directory if omitted).")
parser.add_argument("-t", "--token", help="GitHub personal access token (optional, reads from GITHUB_TOKEN env var if not provided).")
parser.add_argument("-o", "--output", default="output", help="Base directory for output (default: ./output).")
parser.add_argument("-i", "--include", nargs="+", help="Include file patterns (e.g. '*.py' '*.js'). Defaults to common code files if not specified.")
parser.add_argument("-e", "--exclude", nargs="+", help="Exclude file patterns (e.g. 'tests/*' 'docs/*'). Defaults to test/build directories if not specified.")
parser.add_argument("-s", "--max-size", type=int, default=100000, help="Maximum file size in bytes (default: 100000, about 100KB).")
# Add language parameter for multi-language support
parser.add_argument(
"--language",
default=DEFAULT_TUTORIAL_LANGUAGE,
help=f"Language for the generated tutorial (default: {DEFAULT_TUTORIAL_LANGUAGE})",
)
# Add use_cache parameter to control LLM caching
parser.add_argument("--no-cache", action="store_true", help="Disable LLM response caching (default: caching enabled)")
# Add max_abstraction_num parameter to control the number of abstractions
parser.add_argument("--max-abstractions", type=int, default=10, help="Maximum number of abstractions to identify (default: 10)")
parser.add_argument(
"--max-extraction-batches",
type=int,
default=None,
help="Maximum LLM extraction batches during abstraction analysis (default: env LLM_MAX_EXTRACTION_BATCHES or 40)",
)
parser.add_argument(
"--llm-extraction-concurrency",
type=int,
default=None,
help="Concurrent LLM workers for abstraction extraction (default: env LLM_EXTRACTION_CONCURRENCY or 1)",
)
args = parser.parse_args()
# Get GitHub token from argument or environment variable if using repo
github_token = None
if args.repo:
github_token = args.token or os.environ.get('GITHUB_TOKEN')
if not github_token:
print("Warning: No GitHub token provided. You might hit rate limits for public repositories.")
# Initialize the shared dictionary with inputs
shared = {
"repo_url": args.repo,
"local_dir": args.dir,
"project_name": args.name, # Can be None, FetchRepo will derive it
"github_token": github_token,
"output_dir": args.output, # Base directory for CombineTutorial output
# Add include/exclude patterns and max file size
"include_patterns": set(args.include) if args.include else DEFAULT_INCLUDE_PATTERNS,
"exclude_patterns": set(args.exclude) if args.exclude else DEFAULT_EXCLUDE_PATTERNS,
"max_file_size": args.max_size,
# Add language for multi-language support
"language": args.language,
# Add use_cache flag (inverse of no-cache flag)
"use_cache": not args.no_cache,
# Add max_abstraction_num parameter
"max_abstraction_num": args.max_abstractions,
"max_extraction_batches": args.max_extraction_batches,
"llm_extraction_concurrency": args.llm_extraction_concurrency,
# Outputs will be populated by the nodes
"files": [],
"abstractions": [],
"relationships": {},
"chapter_order": [],
"chapters": [],
"final_output_dir": None
}
# Display starting message with repository/directory and language
print(f"Starting tutorial generation for: {args.repo or args.dir} in {args.language.capitalize()} language")
print(f"LLM caching: {'Disabled' if args.no_cache else 'Enabled'}")
# Create the flow instance
tutorial_flow = create_tutorial_flow()
# Run the flow
tutorial_flow.run(shared)
if __name__ == "__main__":
main()