gunslingerOP · Meeran-Tofiq · Feb 5, 2026
diff --git a/nodes.py b/nodes.py
@@ -3427,7 +3427,11 @@ def prep(self, shared):
 
     def _get_core_files_for_component(self, component, file_summaries):
         """Get file summaries for component's CRUD-relevant files."""
-        core_kinds = {'handler', 'service', 'repository', 'model', 'crud', 'controller', 'api', 'route', 'endpoint'}
+        # Expanded core_kinds to include more file types that may reference tables
+        core_kinds = {
+            'handler', 'service', 'repository', 'model', 'crud', 'controller', 'api', 'route', 'endpoint',
+            'migration', 'schema', 'config', 'seed', 'factory', 'type', 'interface', 'middleware'
+        }
 
         core_files = []
         for file_idx in component.get("files", []):
@@ -3440,7 +3444,7 @@ def _get_core_files_for_component(self, component, file_summaries):
             core_files = [file_summaries.get(idx) for idx in component.get("files", [])
                           if file_summaries.get(idx)]
 
-        return core_files[:25]  # Increased limit - context is just file summaries
+        return core_files[:50]  # Raised limit from 25 to 50
 
     def _extract_crud_entities(self, component, core_files, use_cache):
         """Extract CRUD entities from a component's core files."""
@@ -3467,7 +3471,7 @@ def _extract_crud_entities(self, component, core_files, use_cache):
 COMPONENT TYPE: {component.get("type", "unknown")}
 
 FILES:
-{json.dumps(file_context, indent=2)[:30000]}
+{json.dumps(file_context, indent=2)[:80000]}
 
 TASK 1 - CRUD ENTITIES:
 Identify entities with Create, Read, Update, or Delete operations.
@@ -3586,6 +3590,9 @@ def _cleanup_extracted_tables(self, tables, use_cache=True):
 - API endpoints or URLs
 - Configuration stores
 
+IMPORTANT: When in doubt, KEEP the table. Only filter entries that are clearly NOT data storage identifiers.
+Be CONSERVATIVE - it's better to include a questionable table than to lose a valid one.
+
 OUTPUT FORMAT (JSON array):
 [
   {{"name": "cleaned_table_name", "type": "database_type"}}
@@ -3780,6 +3787,7 @@ def _classify_entities(self, merged_entities, use_cache):
    - Usually have high CRUD coverage (create, read, update, delete)
    - Used by many components
    - Represent the primary data this system manages
+   - You may read the file summary, look at relationships and entities it interacts with. If any of them appear as system entities then you MUST immediately classify this as core
 
 2. "supporting": Important but not central entities
    - Support core workflows
@@ -4021,8 +4029,74 @@ def extract_for_component(comp):
             all_entities.extend(result.get("entities", []))
             all_tables.extend(result.get("all_tables", []))
 
-        # Dedupe tables by name, then filter noise
-        unique_tables = list({t["name"]: t for t in all_tables if t.get("name")}.values())
+        # CHANGE 1: Harvest tables from ALL files' touches_data (not just core-kind files)
+        # This catches tables from migrations, schemas, configs, etc. that core_kinds may miss
+        print("  - Step 1b: Harvesting tables from ALL files' touches_data...")
+        for file_idx, summary in file_summaries.items():
+            touches_data = summary.get("touches_data", {})
+            if not touches_data:
+                continue
+
+            # Extract tables/stores from touches_data
+            for store_type in ["reads", "writes", "tables", "collections", "stores"]:
+                stores = touches_data.get(store_type, [])
+                if isinstance(stores, list):
+                    for store in stores:
+                        if isinstance(store, str) and store.strip():
+                            all_tables.append({
+                                "name": store.strip(),
+                                "type": "database",
+                                "operations": ["read"] if store_type == "reads" else ["write"] if store_type == "writes" else ["read", "write"],
+                                "from_component": summary.get("component_id", "unknown"),
+                                "source_file": summary.get("path", "unknown")
+                            })
+                        elif isinstance(store, dict) and store.get("name"):
+                            all_tables.append({
+                                "name": store["name"],
+                                "type": store.get("type", "database"),
+                                "operations": store.get("operations", ["read", "write"]),
+                                "from_component": summary.get("component_id", "unknown"),
+                                "source_file": summary.get("path", "unknown")
+                            })
+
+        print(f"    Total tables after touches_data harvest: {len(all_tables)}")
+
+        # CHANGE 2: Better dedup that merges operations instead of last-wins
+        table_map = {}
+        for t in all_tables:
+            name = t.get("name")
+            if not name:
+                continue
+
+            if name not in table_map:
+                table_map[name] = {
+                    "name": name,
+                    "type": t.get("type", "database"),
+                    "operations": set(t.get("operations", [])),
+                    "from_components": [t.get("from_component")] if t.get("from_component") else [],
+                    "source_files": [t.get("source_file")] if t.get("source_file") else []
+                }
+            else:
+                # Merge operations
+                table_map[name]["operations"].update(t.get("operations", []))
+                # Merge components
+                comp = t.get("from_component")
+                if comp and comp not in table_map[name]["from_components"]:
+                    table_map[name]["from_components"].append(comp)
+                # Merge source files
+                src = t.get("source_file")
+                if src and src not in table_map[name]["source_files"]:
+                    table_map[name]["source_files"].append(src)
+
+        # Convert back to list format
+        unique_tables = []
+        for name, data in table_map.items():
+            unique_tables.append({
+                "name": data["name"],
+                "type": data["type"],
+                "operations": list(data["operations"]),
+                "from_component": data["from_components"][0] if data["from_components"] else "unknown"
+            })
 
         # Post-filter: ONLY formatting cleanup - no semantic filtering (that's the LLM's job)
         def is_valid_table(table):