Skip to content

Deadlock in FieldLevelTrackingApproach #1233

@tinnou

Description

@tinnou

Description

When using the AsyncExecutionStategy with DataLoader support, I'm getting rare instances of deadlocks when resolving queries. The thread dump extract below shows multiple threads waiting for the lock on the callstack object here.
I attached the full thread dump to the issue.

Found one Java-level deadlock:
=============================
"resolver-chain-thread-15":
  waiting to lock monitor 0x00007f8a93c32958 (object 0x000000076cf39598, a graphql.execution.instrumentation.dataloader.FieldLevelTrackingApproach$CallStack),
  which is held by "resolver-chain-thread-12"
"resolver-chain-thread-12":
  waiting to lock monitor 0x00007f8a94a34bb8 (object 0x000000076cfc1cf8, a graphql.execution.instrumentation.dataloader.FieldLevelTrackingApproach$CallStack),
  which is held by "main"
"main":
  waiting to lock monitor 0x00007f8a93c32958 (object 0x000000076cf39598, a graphql.execution.instrumentation.dataloader.FieldLevelTrackingApproach$CallStack),
  which is held by "resolver-chain-thread-12"

Steps to reproduce

I wrote a test that attempts to reproduce the issue consistently but you might have to run it multiple times to get into a deadlock.
Note: I passed my own executor to the supplyAsync method inside the batch functions instead of using the default ForkJoinPool, but I think that shouldn't be a problem.

The test below hangs when there is a deadlock:

 def "deadlock attempt"() {
        setup:
        def sdl = """
        type Album {
            id: ID!
            title: String!
            artist: Artist
            songs(
                limit: Int,
                nextToken: String
            ): ModelSongConnection
        }

        type Artist {
            id: ID!
            name: String!
            albums(
                limit: Int,
                nextToken: String
            ): ModelAlbumConnection
            songs(
                limit: Int,
                nextToken: String
            ): ModelSongConnection
        }

        type ModelAlbumConnection {
            items: [Album]
            nextToken: String
        }

        type ModelArtistConnection {
            items: [Artist]
            nextToken: String
        }

        type ModelSongConnection {
            items: [Song]
            nextToken: String
        }

        type Query {
            listArtists(limit: Int, nextToken: String): ModelArtistConnection
        }

        type Song {
            id: ID!
            title: String!
            artist: Artist
            album: Album
        }
        """

        ThreadFactory threadFactory = new BasicThreadFactory.Builder()
                .namingPattern("resolver-chain-thread-%d").build()
        def executor = new ThreadPoolExecutor(15, 15, 0L,
                TimeUnit.MILLISECONDS, new SynchronousQueue<>(), threadFactory,
                new ThreadPoolExecutor.CallerRunsPolicy())

        def dataLoaderAlbums = new DataLoader<Object, Object>(new BatchLoader<DataFetchingEnvironment, List<Object>>() {
            @Override
            CompletionStage<List<List<Object>>> load(List<DataFetchingEnvironment> keys) {
                return CompletableFuture.supplyAsync({
                    def limit = keys.first().getArgument("limit") as Integer
                    return keys.collect({ k ->
                        def albums = []
                        for (int i = 1; i <= limit; i++) {
                            albums.add(['id': "artist-$k.source.id-$i", 'title': "album-$i"])
                        }
                        def albumsConnection = ['nextToken': 'album-next', 'items': albums]
                        return albumsConnection
                    })
                }, executor)
            }
        }, DataLoaderOptions.newOptions().setMaxBatchSize(5))

        def dataLoaderSongs = new DataLoader<Object, Object>(new BatchLoader<DataFetchingEnvironment, List<Object>>() {
            @Override
            CompletionStage<List<List<Object>>> load(List<DataFetchingEnvironment> keys) {
                return CompletableFuture.supplyAsync({
                    def limit = keys.first().getArgument("limit") as Integer
                    return keys.collect({ k ->
                        def songs = []
                        for (int i = 1; i <= limit; i++) {
                            songs.add(['id': "album-$k.source.id-$i", 'title': "song-$i"])
                        }
                        def songsConnection = ['nextToken': 'song-next', 'items': songs]
                        return songsConnection
                    })
                }, executor)
            }
        }, DataLoaderOptions.newOptions().setMaxBatchSize(5))

        def dataLoaderRegistry = new DataLoaderRegistry()
        dataLoaderRegistry.register("artist.albums", dataLoaderAlbums)
        dataLoaderRegistry.register("album.songs", dataLoaderSongs)

        def albumsDf = new MyForwardingDataFetcher(dataLoaderAlbums)
        def songsDf = new MyForwardingDataFetcher(dataLoaderSongs)

        def dataFetcherArtists = new DataFetcher() {
            @Override
            Object get(DataFetchingEnvironment environment) {
                def limit = environment.getArgument("limit") as Integer
                def artists = []
                for (int i = 1; i <= limit; i++) {
                    artists.add(['id': "artist-$i", 'name': "artist-$i"])
                }
                return ['nextToken': 'artist-next', 'items': artists]
            }
        }

        def wiring = RuntimeWiring.newRuntimeWiring()
                .type(newTypeWiring("Query")
                    .dataFetcher("listArtists", dataFetcherArtists))
                .type(newTypeWiring("Artist")
                    .dataFetcher("albums", albumsDf))
                .type(newTypeWiring("Album")
                    .dataFetcher("songs", songsDf))
                .build()

        def schema = TestUtil.schema(sdl, wiring)

        when:
        def graphql = GraphQL.newGraphQL(schema)
                .instrumentation(new DataLoaderDispatcherInstrumentation(dataLoaderRegistry))
                .build()

        then: "execution shouldn't hang"
        List<CompletableFuture<ExecutionResult>> futures = []
        for (int i = 0; i < NUM_OF_REPS; i++) {
            def result = graphql.executeAsync(ExecutionInput.newExecutionInput()
                    .query("""
                    query getArtistsWithData {
                      listArtists(limit: 1) {
                        items {
                          name
                          albums(limit: 200) {
                            items {
                              title
                              # Uncommenting the following causes query to timeout
                              songs(limit: 5) {
                                 nextToken
                                 items {
                                   title
                                 }
                              }
                            }
                          }
                        }
                      }
                    }
                        """)
                    .build())
            result.whenComplete({ res, error ->
                if (error) {
                    throw error
                }
                assert res.errors.empty
            })
            // add all futures
            futures.add(result)
        }
        // wait for each future to complete and grab the results
        Async.each(futures)
                .whenComplete({ results, error ->
                    if (error) {
                        throw error
                    }
                    results.each { assert it.errors.empty }
                })
                .join()
    }

thread-dump-graphql-java.log

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions