pymongo
可能会帮助您,以下是我正在使用的一些代码:
import pandas as pd from pymongo import MongoClient def _connect_mongo(host, port, username, password, db): """ A util for making a connection to mongo """ if username and password: mongo_uri = 'mongodb://%s:%s@%s:%s/%s' % (username, password, host, port, db) conn = MongoClient(mongo_uri) else: conn = MongoClient(host, port) return conn[db] def read_mongo(db, collection, query={}, host='localhost', port=27017, username=None, password=None, no_id=True): """ Read from Mongo and Store into DataFrame """ # Connect to MongoDB db = _connect_mongo(host=host, port=port, username=username, password=password, db=db) # Make a query to the specific DB and Collection cursor = db[collection].find(query) # Expand the cursor and construct the DataFrame df = pd.DataFrame(list(cursor)) # Delete the _id if no_id: del df['_id'] return df
@H_419_7@解决方法
我需要分析mongodb中的集合中有大量数据。如何将这些数据导入熊猫?
我是pandas和numpy的新手。
编辑:mongodb集合包含带有日期和时间标记的传感器值。传感器值是float数据类型。
样本数据:
{ "_cls" : "SensorReport","_id" : ObjectId("515a963b78f6a035d9fa531b"),"_types" : [ "SensorReport" ],"Readings" : [ { "a" : 0.958069536790466,"_types" : [ "Reading" ],"ReadingUpdatedDate" : ISODate("2013-04-02T08:26:35.297Z"),"b" : 6.296118156595,"_cls" : "Reading" },{ "a" : 0.95574014778624,"ReadingUpdatedDate" : ISODate("2013-04-02T08:27:09.963Z"),"b" : 6.29651468650064,{ "a" : 0.953648289182713,"ReadingUpdatedDate" : ISODate("2013-04-02T08:27:37.545Z"),"b" : 7.29679823731148,{
{
"_cls" : "SensorReport","_id" : ObjectId("515a963b78f6a035d9fa531b"),"_types" : [
"SensorReport"
],"Readings" : [
{
"a" : 0.958069536790466,"_types" : [
"Reading"
],"ReadingUpdatedDate" : ISODate("2013-04-02T08:26:35.297Z"),"b" : 6.296118156595,"_cls" : "Reading"
},{
"a" : 0.95574014778624,"ReadingUpdatedDate" : ISODate("2013-04-02T08:27:09.963Z"),"b" : 6.29651468650064,{
"a" : 0.953648289182713,"ReadingUpdatedDate" : ISODate("2013-04-02T08:27:37.545Z"),"b" : 7.29679823731148,{
我需要分析mongodb中的集合中有大量数据。如何将这些数据导入熊猫?
我是pandas和numpy的新手。
编辑:mongodb集合包含带有日期和时间标记的传感器值。传感器值是float数据类型。
样本数据: