MongoDB медленный фасетный запрос с использованием нескольких групп по - PullRequest
0 голосов
/ 18 сентября 2018

Я работаю над проектом, в котором у нас будет почти 5 миллионов документов в коллекции.И размер каждого документа будет около 18571 байта с полями от 120 до 150.

Я должен вернуть свой ответ менее чем за 1 секунду.И монго-запрос будет выполнять почти 10–15 группировок в многогранном запросе для максимум 3 000 000 документов.

Я впервые обрабатываю большую часть данных, когда мне приходится возвращать ответы в режиме реального времени.

Я реализовал индексы, и они сократили время отклика до 5–6 секунд, но мне все еще нужно это менее чем за 1 секунду.

Ниже приведен пример запроса:

db.sample.aggregation(
    "$match":{
        "$and":[
            {"is_new": <true/false>},
            {"brand":<some-brand>},
            {"year":{"$gte":<some-year>,"$lte":<some-year>}},
            {"seller_id":{"$in":[<array-of-seller-ids-may-have-40,000-seller-ids>]}}
        ]
    },
    {
        "$facet":{
            "data":[{
                "$project":{
                    "_id":"_id",
                    "brand":"$brand_name",
                    "model":"$model_name",
                    <will have almost 20 keys with lookup>
                }
            }],
            "count":[{"$group":{"_id":"$_id"}},{"$count":"vin_count"}],
            "price":[{"$bucketAuto":{"groupBy":"$price", "buckets":1}}],
            <will have 12-15 group by>
        }
    }
)

Ниже приведен пример документа:

{
    "_id" : "KNDMC5C11J6394584",
    "brand_id" : 22,
    "brand_name" : "XYZ",
    "abc_id" : 1234567890,
    "city" : "Gurgaon, IN",
    "fluctuation" : 18,
    "created_at" : ISODate("2018-08-17T06:08:12.940Z"),
    "release_data" : "2018-06-29",
    "seller_name" : "Seller name",
    "seller_price" : 34890,
    "seller_rating" : 4,
    "seller_zip" : "12550",
    "feature1" : "ABC",
    "feature2" : 3300,
    "feature3" : "AB",
    "expected_price" : -1,
    "exterior_color" : "Unknown",
    "registered_dealer" : true,
    "registered_brand" : "ABC",
    "fluctuation_rate" : 20.700000000000003,
    "fluctuation_type" : 2,
    "fluc_type_name" : "Something",
    "has_patents" : false,
    "tested_frequency" : 24,
    "interior_color" : "---",
    "is_certified" : false,
    "is_certified_iso" : false,
    "is_featured" : false,
    "is_new" : true,
    "is_certified_bhel" : false,
    "location" : {
        "type" : "Point",
        "coordinates" : [
            -24.08180236816406,
            31.507198333740234
        ]
    },
    "max_input" : 8,
    "feature4" : 3,
    "feature5" : 206,
    "feature6" : "Something",
    "monthly_payment" : 649,
    "msrp" : 34890,
    "feature7" : false,
    "seller_id" : 123567890,
    "product_family_name" : "abc",
    "product_id" : 15,
    "product_name" : "Something",
    "reflection" : "Something",
    "fluc_id" : 2312,
    "fluc_name" : "something something (abc) ac",
    "updated_at" : ISODate("2018-09-11T17:59:36.889Z"),
    "product_damage_category" : "None",
    "year" : 2018,
    "damage_check" : "-",
    "team_size" : "-",
    "Technology" : {
        "camera_unit" : true
    }
}

Ниже приведен вывод объяснения

{
    "stages" : [
        {
            "$cursor" : {
                "query" : {
                    "$and" : [
                        {
                            "is_new" : true
                        },
                        {
                            "year" : {
                                "$gte" : 2018,
                                "$lte" : 2018
                            }
                        },
                        {
                            "sp_id" : {
                                "$in" : [<list of 40,000 seller ids>]
                            }
                        }
                    ]
                },
                "queryPlanner" : {
                    "plannerVersion" : 1,
                    "namespace" : "test_collection.col",
                    "indexFilterSet" : false,
                    "parsedQuery" : {
                        "$and" : [
                            {
                                "is_new" : {
                                    "$eq" : true
                                }
                            },
                            {
                                "year" : {
                                    "$lte" : 2018
                                }
                            },
                            {
                                "year" : {
                                    "$gte" : 2018
                                }
                            },
                            {
                                "sp_id" : {
                                    "$in" : [<list of 40,000 seller ids>]
                                }
                            }
                        ]
                    },
                    "winningPlan" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "IXSCAN",
                            "keyPattern" : {
                                "is_new" : 1,
                                "year" : 1,
                                "sp_id" : 1
                            },
                            "indexName" : "is_new_1_year_1_sp_id_1",
                            "isMultiKey" : false,
                            "multiKeyPaths" : {
                                "is_new" : [ ],
                                "year" : [ ],
                                "sp_id" : [ ]
                            },
                            "isUnique" : false,
                            "isSparse" : false,
                            "isPartial" : false,
                            "indexVersion" : 2,
                            "direction" : "forward",
                            "indexBounds" : {
                                "is_new" : [
                                    "[true, true]"
                                ],
                                "year" : [
                                    "[2018.0, 2018.0]"
                                ],
                                "sp_id" : [
                                    "[47590.0, 47590.0]",
                                    "[48333.0, 48333.0]",
                                    "[51333.0, 51333.0]",
                                    <range of 40,000 seller_ids>
                                ]
                            }
                        }
                    },
                    "rejectedPlans" : [ ]
                }
            }
        },
        {
            "$facet" : {
                "data" : [
                    {
                        "$project" : {
                            "_id" : "$_id",
                            "brand_name" : "$brand_name",
                            "feature1" : "$feature1",
                            "feature2" : "$feature2",
                            "feature3" : "$feature3",
                            "feature4" : "$feature4",
                            "feature5" : "$feature5",
                            "feature6" : "$feature6",
                            "feature7" : "$feature7",
                            "feature8" : "$feature8",
                            "feature9" : "$feature9",
                            "feature10" : "$feature10",
                            "feature11" : "$feature11",
                            "feature12" : "$feature12",
                            "feature13" : "$feature13",
                            "feature14" : "$feature14",
                            "feature15" : "$feature15",
                            "feature16" : "$feature16",
                            "feature17" : "$feature17",
                            "feature18" : "$feature18",
                            "feature19" : "$feature19",
                            "feature20" : "$feature20"
                        }
                    }
                ],
                "count" : [
                    {
                        "$group" : {
                            "_id" : "$_id"
                        }
                    },
                    {
                        "$group" : {
                            "_id" : {
                                "$const" : null
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$project" : {
                            "_id" : false,
                            "count" : true
                        }
                    }
                ],
                "feature1" : [
                    {
                        "$match" : {
                            "feature1" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature1",
                            "name" : {
                                "$first" : "$feature1"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature2" : [
                    {
                        "$match" : {
                            "feature2" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature2",
                            "name" : {
                                "$first" : "$feature2"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature3" : [
                    {
                        "$match" : {
                            "feature3" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature3",
                            "name" : {
                                "$first" : "$feature3"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature4" : [
                    {
                        "$match" : {
                            "feature4" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature4",
                            "name" : {
                                "$first" : "$feature4"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature5" : [
                    {
                        "$match" : {
                            "feature5" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature5",
                            "name" : {
                                "$first" : "$fuel"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature6" : [
                    {
                        "$match" : {
                            "feature6" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature6",
                            "name" : {
                                "$first" : "$feature6"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature7" : [
                    {
                        "$match" : {
                            "feature7" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature7",
                            "name" : {
                                "$first" : "$feature7"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature8" : [
                    {
                        "$match" : {
                            "feature8" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature8",
                            "name" : {
                                "$first" : "$feature8"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature9" : [
                    {
                        "$match" : {
                            "feature9" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature9",
                            "name" : {
                                "$first" : "$feature9"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "count" : -1
                            }
                        }
                    }
                ],
                "feature10" : [
                    {
                        "$match" : {
                            "feature10" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$group" : {
                            "_id" : "$feature10",
                            "name" : {
                                "$first" : "$feature10"
                            },
                            "count" : {
                                "$sum" : {
                                    "$const" : 1
                                }
                            }
                        }
                    },
                    {
                        "$sort" : {
                            "sortKey" : {
                                "_id" : -1
                            }
                        }
                    }
                ],
                "feature11" : [
                    {
                        "$match" : {
                            "feature11" : {
                                "$exists" : true
                            }
                        }
                    },
                    {
                        "$bucketAuto" : {
                            "groupBy" : "$feature11",
                            "buckets" : 1,
                            "output" : {
                                "count" : {
                                    "$sum" : {
                                        "$const" : 1
                                    }
                                }
                            }
                        }
                    }
                ],
                "feature12" : [
                    {
                        "$bucketAuto" : {
                            "groupBy" : "$feature11",
                            "buckets" : 1,
                            "output" : {
                                "count" : {
                                    "$sum" : {
                                        "$const" : 1
                                    }
                                }
                            }
                        }
                    }
                ]
            }
        }
    ],
    "ok" : 1
}

Если эта информация не полнаядля решения.Я предоставлю больше.

Я застрял на этом с прошлого месяца.

Буду признателен за любую помощь.

...