Skip to content

[Self-Host] Waterfalling into a permanent loop when blocked by anti-bot #2350

@krim404

Description

@krim404

In the past few weeks, my Firecrawl process frequently encounters a critical failure where it becomes completely unresponsive, stuck in an infinite loop. The log file repeatedly shows the same entries without any progress and without any kind of timeout...

any idea why?

nuq-worker-4   {"level":"info","message":"Waterfalling to next engine...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"a3dde25ece9ea351","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e","waitUntilWaterfall":120000}
nuq-worker-4   {"level":"info","message":"Scraping via document...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"a3dde25ece9ea351","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e","waitUntilWaterfall":15000}
nuq-worker-4   {"level":"debug","message":"Document was blocked by anti-bot, prefetching with chrome-cdp","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"e76c496ea97e95c3","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Scraping URL \"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films\"...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"10c2bcfb3c2d17c4","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Selected engines","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","selectedEngines":[{"engine":"pdf","supportScore":20,"unsupportedFeatures":{}},{"engine":"document","supportScore":20,"unsupportedFeatures":{}}],"span_id":"10c2bcfb3c2d17c4","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Scraping via pdf...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"10c2bcfb3c2d17c4","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e","waitUntilWaterfall":120000}
nuq-worker-4   {"error":{"engine":"pdf","error":{"message":"Engine pdf was unsuccessful","name":"EngineUnsuccessfulError","stack":"EngineUnsuccessfulError: Engine pdf was unsuccessful\n    at scrapePDF (/app/dist/src/scraper/scrapeURL/engines/pdf/index.js:262:31)\n    at process.processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at async scrapeURLWithEngine (/app/dist/src/scraper/scrapeURL/engines/index.js:443:12)\n    at async scrapeURLLoopIter (/app/dist/src/scraper/scrapeURL/index.js:201:26)\n    at async /app/dist/src/scraper/scrapeURL/index.js:315:37\n    at async /app/dist/src/scraper/scrapeURL/index.js:325:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async /app/dist/src/scraper/scrapeURL/index.js:671:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async runWebScraper (/app/dist/src/main/runWebScraper.js:59:24)"},"message":"WrappedEngineError","name":"WrappedEngineError","stack":"WrappedEngineError: WrappedEngineError\n    at /app/dist/src/scraper/scrapeURL/index.js:319:31\n    at process.processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at async /app/dist/src/scraper/scrapeURL/index.js:325:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async /app/dist/src/scraper/scrapeURL/index.js:671:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async runWebScraper (/app/dist/src/main/runWebScraper.js:59:24)\n    at async startWebScraperPipeline (/app/dist/src/main/runWebScraper.js:9:12)\n    at async processJob (/app/dist/src/services/worker/scrape-worker.js:145:26)\n    at async processJobWithTracing (/app/dist/src/services/worker/scrape-worker.js:832:36)"},"level":"warn","message":"An unexpected error happened while scraping with pdf.","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"10c2bcfb3c2d17c4","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Waterfalling to next engine...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"10c2bcfb3c2d17c4","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e","waitUntilWaterfall":120000}
nuq-worker-4   {"level":"info","message":"Scraping via document...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"10c2bcfb3c2d17c4","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e","waitUntilWaterfall":15000}
nuq-worker-4   {"level":"debug","message":"Document was blocked by anti-bot, prefetching with chrome-cdp","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"e76c496ea97e95c3","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Scraping URL \"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films\"...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"04aee25049a838e2","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Selected engines","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","selectedEngines":[{"engine":"pdf","supportScore":20,"unsupportedFeatures":{}},{"engine":"document","supportScore":20,"unsupportedFeatures":{}}],"span_id":"04aee25049a838e2","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}
nuq-worker-4   {"level":"info","message":"Scraping via pdf...","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"04aee25049a838e2","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e","waitUntilWaterfall":120000}
nuq-worker-4   {"error":{"engine":"pdf","error":{"message":"Engine pdf was unsuccessful","name":"EngineUnsuccessfulError","stack":"EngineUnsuccessfulError: Engine pdf was unsuccessful\n    at scrapePDF (/app/dist/src/scraper/scrapeURL/engines/pdf/index.js:262:31)\n    at process.processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at async scrapeURLWithEngine (/app/dist/src/scraper/scrapeURL/engines/index.js:443:12)\n    at async scrapeURLLoopIter (/app/dist/src/scraper/scrapeURL/index.js:201:26)\n    at async /app/dist/src/scraper/scrapeURL/index.js:315:37\n    at async /app/dist/src/scraper/scrapeURL/index.js:325:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async /app/dist/src/scraper/scrapeURL/index.js:671:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async runWebScraper (/app/dist/src/main/runWebScraper.js:59:24)"},"message":"WrappedEngineError","name":"WrappedEngineError","stack":"WrappedEngineError: WrappedEngineError\n    at /app/dist/src/scraper/scrapeURL/index.js:319:31\n    at process.processTicksAndRejections (node:internal/process/task_queues:105:5)\n    at async /app/dist/src/scraper/scrapeURL/index.js:325:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async /app/dist/src/scraper/scrapeURL/index.js:671:30\n    at async withSpan (/app/dist/src/lib/otel-tracer.js:49:24)\n    at async runWebScraper (/app/dist/src/main/runWebScraper.js:59:24)\n    at async startWebScraperPipeline (/app/dist/src/main/runWebScraper.js:9:12)\n    at async processJob (/app/dist/src/services/worker/scrape-worker.js:145:26)\n    at async processJobWithTracing (/app/dist/src/services/worker/scrape-worker.js:832:36)"},"level":"warn","message":"An unexpected error happened while scraping with pdf.","module":"ScrapeURL","scrapeId":"a25e91fc-9064-4983-ab5e-1f4a03926841","scrapeURL":"https://en.wikipedia.org/wiki/List_of_artificial_intelligence_films","span_id":"04aee25049a838e2","teamId":"bypass","team_id":"bypass","trace_flags":"01","trace_id":"67b4e0f540d9e665ea44119a25d6c91e"}

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions