- например, у меня есть данные веб-сайта электронной коммерции.
webGuid = "website_name"
Словарь dbDict имеет ключ как имя коллекции и значение как имя базы данных.
dbDict={
'Users':'beta-user',
'Orders': 'beta-order',
('Products','Categories','UserRecommendations','GenericRecommendation'):'beta-product',
('PageHitsRecommendation','Analytics'):"beta-analytics"
}
Словарь _filter поддерживает фильтр как значение для данной коллекции как ключ.
_filter = {'Products':{"IsDeleted": False, "IsActive": True,'CollectionGuids.CollectionType':{'$in':['Category']}},
'Users': {"IsDeleted": False, "IsActive": True},
'Categories':{"IsActive": True, 'CollectionType':"Category"}
}
Здесь мы определяем курсор подключения как «db» с учетными данными.
def mongo_connection(collection):
from pymongo import MongoClient
dbName = next(v for k,v in dbDict.items() if collection in k)
conn = MongoClient('mongodb://{username}:{password}@{IP Addr}:{port No}/'
+dbName)
db = conn[dbName]
return db
В flask нам нужно определение ниже
def mongo_flask_connection(collection):
from flask_pymongo import PyMongo
dbName = next(v for k,v in dbDict.items() if collection in k)
app.config['MONGO_URI'] = \
'mongodb://{user name}:{password}@{IP Addr}:{Port No}/'+dbName
mongo = PyMongo(app)
return mongo.db
Здесь мы определили функцию, которая, следуя аргументам, возвращает кадр данных желаемого.
def read_mongo(collection, web_id=None, projection=None,
nestedArray=None, nested=None, myFilter={}, level=None):
'''
Arguments:
-------------
1. collection: string,
collection name whose data need to read.
2. web_id: string,
it is the website value to filter the collection data consider all data by default.
3. projection: list,
It is the fields that need from collection. fields syntax is same as in mongo db
4. nestedArray: string,
It is the name of field whose data is nested inside list format.
5. nested: string,
It is the name of field whose data is nested inside dictionary format.
6. addFilter:
If we need additional filtering apart from website name.
7. level: int,
It is the level of nested that need to dig down, by default it considers the last value.
Returns:
-------------
dataFrame: pandas data frame,
This the final pandas frame who has passed all the criteria placed in arguments.
'''
from pandas.io.json import json_normalize as jnz
import pandas as pd
from cytoolz.dicttoolz import merge
import re
db = mongo_connection(collection)
web_filter = {} if web_id is None else ({'WebSiteGuid': web_id} if collection == 'Products' \
else {'WebsiteGuid': web_id})
mongo_filter = {**web_filter,
**_filter.get(collection, web_filter), **myFilter}
mongo_projection = {} if projection is None else dict.fromkeys(projection, 1)
mongo_projection.update({'_id': 0})
data = list(db[collection].find(mongo_filter, mongo_projection))
passData = filter(None, data)
dataFormFilter = {1: ((nested != None) & (nestedArray != None)),
2: ((nested != None) & (nestedArray == None)),
3: ((nested == None) & (nestedArray != None))}
if dataFormFilter[1]:
dataFrame = jnz(passData).pipe(
lambda x: x.drop(nestedArray, 1)
.join(x[nestedArray].apply(lambda y: pd.Series(merge(y))))) \
.rename(columns=lambda x: re.sub('({})\.'.format(nested), '', x))
elif dataFormFilter[2]:
dataFrame = jnz(passData, max_level=level)\
.rename(columns=lambda x: re.sub('({})\.'.format(nested), '', x))
elif dataFormFilter[3]:
dataFrame = jnz(passData, nestedArray,
[s for s in projection
if not any(xs in s for xs in ['.', '_'])],
errors='ignore')
else:
dataFrame = pd.DataFrame(passData)
return dataFrame
вызвать функцию read_mon go со значением желаемых аргументов. Сбор заказов будет иметь данные в формате json, как показано ниже.
{
"ItemsList" : [
{
"ProductGuid" : "a70ca7b7-ee5d-4c53-a84b-b05afab756f0",
"SkuGuid" : "5937beee-6170-4104-8ada-d52840e4f00f",
"Quantity" : 50,
"Price" : "10.99",
"TotalPrice" : "549.50"
}],
"shippingInformation" : {
"ShippingName" : null,
"ShippingZip" : "10007",
"ShippingPhone" : "987456321"
}
"WebsiteGuid" : "string",
"UserGuid:'jasld213234'
}
orderDataRaw = read_mongo('Orders', webGuid, ['UserGuid','ItemsList.ProductGuid','shippingInformation.ShippingZip'],nestedArray = 'ItemsList', nested = 'shippingInformation')