Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 79 additions & 5 deletions nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3427,7 +3427,11 @@ def prep(self, shared):

def _get_core_files_for_component(self, component, file_summaries):
"""Get file summaries for component's CRUD-relevant files."""
core_kinds = {'handler', 'service', 'repository', 'model', 'crud', 'controller', 'api', 'route', 'endpoint'}
# Expanded core_kinds to include more file types that may reference tables
core_kinds = {
'handler', 'service', 'repository', 'model', 'crud', 'controller', 'api', 'route', 'endpoint',
'migration', 'schema', 'config', 'seed', 'factory', 'type', 'interface', 'middleware'
}

core_files = []
for file_idx in component.get("files", []):
Expand All @@ -3440,7 +3444,7 @@ def _get_core_files_for_component(self, component, file_summaries):
core_files = [file_summaries.get(idx) for idx in component.get("files", [])
if file_summaries.get(idx)]

return core_files[:25] # Increased limit - context is just file summaries
return core_files[:50] # Raised limit from 25 to 50

def _extract_crud_entities(self, component, core_files, use_cache):
"""Extract CRUD entities from a component's core files."""
Expand All @@ -3467,7 +3471,7 @@ def _extract_crud_entities(self, component, core_files, use_cache):
COMPONENT TYPE: {component.get("type", "unknown")}

FILES:
{json.dumps(file_context, indent=2)[:30000]}
{json.dumps(file_context, indent=2)[:80000]}

TASK 1 - CRUD ENTITIES:
Identify entities with Create, Read, Update, or Delete operations.
Expand Down Expand Up @@ -3586,6 +3590,9 @@ def _cleanup_extracted_tables(self, tables, use_cache=True):
- API endpoints or URLs
- Configuration stores

IMPORTANT: When in doubt, KEEP the table. Only filter entries that are clearly NOT data storage identifiers.
Be CONSERVATIVE - it's better to include a questionable table than to lose a valid one.

OUTPUT FORMAT (JSON array):
[
{{"name": "cleaned_table_name", "type": "database_type"}}
Expand Down Expand Up @@ -3780,6 +3787,7 @@ def _classify_entities(self, merged_entities, use_cache):
- Usually have high CRUD coverage (create, read, update, delete)
- Used by many components
- Represent the primary data this system manages
- You may read the file summary, look at relationships and entities it interacts with. If any of them appear as system entities then you MUST immediately classify this as core

2. "supporting": Important but not central entities
- Support core workflows
Expand Down Expand Up @@ -4021,8 +4029,74 @@ def extract_for_component(comp):
all_entities.extend(result.get("entities", []))
all_tables.extend(result.get("all_tables", []))

# Dedupe tables by name, then filter noise
unique_tables = list({t["name"]: t for t in all_tables if t.get("name")}.values())
# CHANGE 1: Harvest tables from ALL files' touches_data (not just core-kind files)
# This catches tables from migrations, schemas, configs, etc. that core_kinds may miss
print(" - Step 1b: Harvesting tables from ALL files' touches_data...")
for file_idx, summary in file_summaries.items():
touches_data = summary.get("touches_data", {})
if not touches_data:
continue

# Extract tables/stores from touches_data
for store_type in ["reads", "writes", "tables", "collections", "stores"]:
stores = touches_data.get(store_type, [])
if isinstance(stores, list):
for store in stores:
if isinstance(store, str) and store.strip():
all_tables.append({
"name": store.strip(),
"type": "database",
"operations": ["read"] if store_type == "reads" else ["write"] if store_type == "writes" else ["read", "write"],
"from_component": summary.get("component_id", "unknown"),
"source_file": summary.get("path", "unknown")
})
elif isinstance(store, dict) and store.get("name"):
all_tables.append({
"name": store["name"],
"type": store.get("type", "database"),
"operations": store.get("operations", ["read", "write"]),
"from_component": summary.get("component_id", "unknown"),
"source_file": summary.get("path", "unknown")
})

print(f" Total tables after touches_data harvest: {len(all_tables)}")

# CHANGE 2: Better dedup that merges operations instead of last-wins
table_map = {}
for t in all_tables:
name = t.get("name")
if not name:
continue

if name not in table_map:
table_map[name] = {
"name": name,
"type": t.get("type", "database"),
"operations": set(t.get("operations", [])),
"from_components": [t.get("from_component")] if t.get("from_component") else [],
"source_files": [t.get("source_file")] if t.get("source_file") else []
}
else:
# Merge operations
table_map[name]["operations"].update(t.get("operations", []))
# Merge components
comp = t.get("from_component")
if comp and comp not in table_map[name]["from_components"]:
table_map[name]["from_components"].append(comp)
# Merge source files
src = t.get("source_file")
if src and src not in table_map[name]["source_files"]:
table_map[name]["source_files"].append(src)

# Convert back to list format
unique_tables = []
for name, data in table_map.items():
unique_tables.append({
"name": data["name"],
"type": data["type"],
"operations": list(data["operations"]),
"from_component": data["from_components"][0] if data["from_components"] else "unknown"
})

# Post-filter: ONLY formatting cleanup - no semantic filtering (that's the LLM's job)
def is_valid_table(table):
Expand Down