2

I am getting a 'sort exceeded memory limit...' error which dictates using allowDiskUse(true) in my aggregate. My problem is that I can't quite figure out where to add that to my code. I have tried adding it as an object within the pipeline and as a property in the aggregate() method call and i get an error for both at runtime.

Code is below:

server.get('/cheap-flight-by-route', function (req, res, next) {

    Flights.aggregate(
        {$sort: {'fare.total_price': 1}},
        {$lookup: {
            from: 'travelroutes',
            localField: 'route',
            foreignField: '_id',
            as: 'routes'
        }},
        {$match: {
            'routes._id': {$exists: true}
        }},
        {$group: {
                _id: {
                    departureAirport: '$routes.departureAirport',
                    arrivalAirport: '$routes.arrivalAirport',
                },
                total_price: {$min: '$fare.total_price'},
                avg_price: {$avg: '$fare.total_price'},
                created: {$first: '$created'},
                doc: {$first: '$$ROOT'}
            }
        },
        {$project: {
            departureAirport: {$arrayElemAt: ['$_id.departureAirport', 0]},
            arrivalAirport: {$arrayElemAt: ['$_id.arrivalAirport', 0]},
            created : '$created',
            price: '$total_price',
            averagePrice: '$avg_price',
            'doc': 1,
            '_id': 0
        }},
        {$sort: {
            'created': 1,
            'departureAirport': 1,
            'arrivalAirport': 1
            },
        },
        function(err, cheapFlights){
            if (err) {
                log.error(err)
                return next(new errors.InvalidContentError(err.errors.name.message))
            }
            res.send(cheapFlights)
            next()
        }
    )  // <-- if I add a .allowDiskUse(true) here it throws a 'bad property' error
})

3 Answers 3

4

I make some changes in your code, try this:

server.get('/cheap-flight-by-route', function (req, res, next) {
    Flights.aggregate([
        {$sort: {
            'fare.total_price': 1
        } },
        {$lookup: {
            from: 'travelroutes',
            localField: 'route',
            foreignField: '_id',
            as: 'routes'
        } },
        {$match: {
            'routes._id': {$exists: true}
        } },
        {$group: {
            _id: {
                departureAirport: '$routes.departureAirport',
                arrivalAirport: '$routes.arrivalAirport',
            },
            total_price: {$min: '$fare.total_price'},
            avg_price: {$avg: '$fare.total_price'},
            created: {$first: '$created'},
            doc: {$first: '$$ROOT'}
        } },
        {$project: {
            departureAirport: {$arrayElemAt: ['$_id.departureAirport', 0]},
            arrivalAirport: {$arrayElemAt: ['$_id.arrivalAirport', 0]},
            created : '$created',
            price: '$total_price',
            averagePrice: '$avg_price',
            'doc': 1,
            '_id': 0
        } },
        {$sort: {
            'created': 1,
            'departureAirport': 1,
            'arrivalAirport': 1
        } }
    ],
    { 
        allowDiskUse: true
    },
    function (err, cheapFlights) {
        if (err) {
            log.error(err);
            return next(new errors.InvalidContentError(err.errors.name.message));
        }
        res.send(cheapFlights);
        next();
    });
});

Or you can try pipelines:

const JSONStream = require('JSONStream');
server.get('/cheap-flight-by-route', function (req, res) {
    let stream = Flights.aggregate([
        {$sort: {
            'fare.total_price': 1
        } },
        {$lookup: {
            from: 'travelroutes',
            localField: 'route',
            foreignField: '_id',
            as: 'routes'
        } },
        {$match: {
            'routes._id': {$exists: true}
        } },
        {$group: {
            _id: {
                departureAirport: '$routes.departureAirport',
                arrivalAirport: '$routes.arrivalAirport',
            },
            total_price: {$min: '$fare.total_price'},
            avg_price: {$avg: '$fare.total_price'},
            created: {$first: '$created'},
            doc: {$first: '$$ROOT'}
        } },
        {$project: {
            departureAirport: {$arrayElemAt: ['$_id.departureAirport', 0]},
            arrivalAirport: {$arrayElemAt: ['$_id.arrivalAirport', 0]},
            created : '$created',
            price: '$total_price',
            averagePrice: '$avg_price',
            'doc': 1,
            '_id': 0
        } },
        {$sort: {
            'created': 1,
            'departureAirport': 1,
            'arrivalAirport': 1
        } }
    ])
    .cursor()
    .exec();

    res.set('Content-Type', 'application/json');
    stream.pipe(JSONStream.stringify()).pipe(res);
});
Sign up to request clarification or add additional context in comments.

1 Comment

Thanks Yuriy, the pipeline example worked when I added: .allowDiskUse(true) before the .cursor() - I think you just forgot to add that. Thanks for you help!
2
MongoClient.connect("mongodb://localhost:27017/test", function(err, db) {
    // Get an aggregation cursor
    var cursor = db.collection('data').aggregate([
            {$match: {}}
        ], {
        allowDiskUse: true
      , cursor: {batchSize: 1000}
        });

    // Use cursor as stream
    cursor.on('data', function(data) {
        console.dir(data);
    });

    cursor.on('end', function() {
        db.close();
    });
});

Comments

0

Try add after your function, as second aggregation parameter.

`
    function(err, cheapFlights){
        if (err) {
            log.error(err)
            return next(new errors.InvalidContentError(err.errors.name.message));
        }
        res.send(cheapFlights);
        next();
     }, 
     { allowDiskUse: true }
)
`

2 Comments

I tried what you suggested and it gave me the following error: "Arguments must be aggregate pipeline operators"
Is there are more standard or more manageable way to write this so that I can use it against larger datasets?

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.