目录聚合Map-reduce 函数示例:按客户统计示例:按日期统计聚合 聚合操作处理多个文档并返回计算结果。您可以使用聚合操作来: 将多个文档中的值分组在一起。对分组数据执行操作以返回单个结果。分析数据随时间的变化。 要
聚合操作处理多个文档并返回计算结果。您可以使用聚合操作来:
要执行聚合操作,您可以使用:
在monGoshell 中,该db.collection.mapReduce() 方法是命令的包装器mapReduce。下面的例子使用该db.collection.mapReduce()方法。
定义: db.collection.mapReduce(map,reduce, { <options> })
该map功能有以下要求:
# 原型如下:
function() {
...
emit(key, value);
}
该reduce函数表现出以下行为:
# 该reduce函数具有以下原型:
function(key, values) {
...
return result;
}
插入测试数据。如下:
sit_rs1:PRIMARY> db.orders.insertMany([
... { _id: 1, cust_id: "A", ord_date: new Date("2023-06-01"), price: 15, items: [ { sku: "apple", Qty: 5, price: 2.5 }, { sku: "apples", qty: 5, price: 2.5 } ], status: "1" },
... { _id: 2, cust_id: "A", ord_date: new Date("2023-06-08"), price: 60, items: [ { sku: "apple", qty: 8, price: 2.5 }, { sku: "banana", qty: 5, price: 10 } ], status: "1" },
... { _id: 3, cust_id: "B", ord_date: new Date("2023-06-08"), price: 55, items: [ { sku: "apple", qty: 10, price: 2.5 }, { sku: "pears", qty: 10, price: 2.5 } ], status: "1" },
... { _id: 4, cust_id: "B", ord_date: new Date("2023-06-18"), price: 26, items: [ { sku: "apple", qty: 10, price: 2.5 } ], status: "1" },
... { _id: 5, cust_id: "B", ord_date: new Date("2023-06-19"), price: 40, items: [ { sku: "banana", qty: 5, price: 10 } ], status: "1"},
... { _id: 6, cust_id: "C", ord_date: new Date("2023-06-19"), price: 38, items: [ { sku: "carrots", qty: 10, price: 1.0 }, { sku: "apples", qty: 10, price: 2.5 } ], status: "1" },
... { _id: 7, cust_id: "C", ord_date: new Date("2023-06-20"), price: 21, items: [ { sku: "apple", qty: 10, price: 2.5 } ], status: "1" },
... { _id: 8, cust_id: "D", ord_date: new Date("2023-06-20"), price: 76, items: [ { sku: "banana", qty: 5, price: 10 }, { sku: "apples", qty: 10, price: 2.5 } ], status: "1" },
... { _id: 9, cust_id: "D", ord_date: new Date("2023-06-20"), price: 51, items: [ { sku: "carrots", qty: 5, price: 1.0 }, { sku: "apples", qty: 10, price: 2.5 }, { sku: "apple", qty: 10, price: 2.5 } ], status: "1" },
... { _id: 10, cust_id: "D", ord_date: new Date("2023-06-23"), price: 23, items: [ { sku: "apple", qty: 10, price: 2.5 } ], status: "1" }
... ])
{
"acknowledged" : true,
"insertedIds" : [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10
]
}
sit_rs1:PRIMARY> db.orders.find()
{ "_id" : 4, "cust_id" : "B", "ord_date" : ISODate("2023-06-18T00:00:00Z"), "price" : 26, "items" : [ { "sku" : "apple", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 6, "cust_id" : "C", "ord_date" : ISODate("2023-06-19T00:00:00Z"), "price" : 38, "items" : [ { "sku" : "carrots", "qty" : 10, "price" : 1 }, { "sku" : "apples", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 1, "cust_id" : "A", "ord_date" : ISODate("2023-06-01T00:00:00Z"), "price" : 15, "items" : [ { "sku" : "apple", "qty" : 5, "price" : 2.5 }, { "sku" : "apples", "qty" : 5, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 2, "cust_id" : "A", "ord_date" : ISODate("2023-06-08T00:00:00Z"), "price" : 60, "items" : [ { "sku" : "apple", "qty" : 8, "price" : 2.5 }, { "sku" : "banana", "qty" : 5, "price" : 10 } ], "status" : "1" }
{ "_id" : 9, "cust_id" : "D", "ord_date" : ISODate("2023-06-20T00:00:00Z"), "price" : 51, "items" : [ { "sku" : "carrots", "qty" : 5, "price" : 1 }, { "sku" : "apples", "qty" : 10, "price" : 2.5 }, { "sku" : "apple", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 3, "cust_id" : "B", "ord_date" : ISODate("2023-06-08T00:00:00Z"), "price" : 55, "items" : [ { "sku" : "apple", "qty" : 10, "price" : 2.5 }, { "sku" : "pears", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 5, "cust_id" : "B", "ord_date" : ISODate("2023-06-19T00:00:00Z"), "price" : 40, "items" : [ { "sku" : "banana", "qty" : 5, "price" : 10 } ], "status" : "1" }
{ "_id" : 7, "cust_id" : "C", "ord_date" : ISODate("2023-06-20T00:00:00Z"), "price" : 21, "items" : [ { "sku" : "apple", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 8, "cust_id" : "D", "ord_date" : ISODate("2023-06-20T00:00:00Z"), "price" : 76, "items" : [ { "sku" : "banana", "qty" : 5, "price" : 10 }, { "sku" : "apples", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
{ "_id" : 10, "cust_id" : "D", "ord_date" : ISODate("2023-06-23T00:00:00Z"), "price" : 23, "items" : [ { "sku" : "apple", "qty" : 10, "price" : 2.5 } ], "status" : "1" }
对集合 orders 执行map-reduce操作, 按 cust_id 进行分组, 然后统计每个客户的 price 计算总和,如下:
首先, 我们需要 定义map函数来处理每个输入文档:
sit_rs1:PRIMARY> var myMapFun = function() {
... emit(this.cust_id, this.price);
... };
sit_rs1:PRIMARY> print(myMapFun)
function() {
emit(this.cust_id, this.price);
}
然后,用两个参数 keyCustId 和 valuesPrices 定义相应的reduce函数。 这里需要调用数组的 sum 方法计算客户订单总价。
# 计算数组元素总和
sit_rs1:PRIMARY> Array.sum([2,2,6,8])
18
# 计算数组平均值
sit_rs1:PRIMARY> Array.avg([1,2,3])
2
sit_rs1:PRIMARY> var myReduceFun = function(keyCustId, valuesPrices) {
... return Array.sum(valuesPrices);
... };
sit_rs1:PRIMARY> print(myReduceFun)
function(keyCustId, valuesPrices) {
return Array.sum(valuesPrices);
}
最后,使用 myMapFun 函数和 myReduceFun 函数对集合 orders 中的所有文档执行map-reduce统计:
sit_rs1:PRIMARY> db.orders.mapReduce(
... myMapFun,
... myReduceFun,
... { out: "map_reduce_out" }
... )
{
"result" : "map_reduce_out",
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1690259241, 6),
"signature" : {
"hash" : BinData(0,"Kur+ueslJYcT5oExd8ujPIC/J3Q="),
"keyId" : NumberLong("7205479298910650370")
}
},
"operationTime" : Timestamp(1690259241, 6)
}
查询 map_reduce_out 集合以验证结果是否正确:
sit_rs1:PRIMARY> db.map_reduce_out.find().sort( { _id: 1 } )
{ "_id" : "A", "value" : 75 }
{ "_id" : "B", "value" : 121 }
{ "_id" : "C", "value" : 59 }
{ "_id" : "D", "value" : 150 }
# 检查 cust_id 为 A 的客户, 总和是 75 正确
sit_rs1:PRIMARY> db.orders.find({ "cust_id" : "A"}, {"price": 1})
{ "_id" : 1, "price" : 15 }
{ "_id" : 2, "price" : 60 }
# 检查 cust_id 为 B 的客户,总和是 121 正确
sit_rs1:PRIMARY> db.orders.find({ "cust_id" : "B"}, {"price": 1})
{ "_id" : 4, "price" : 26 }
{ "_id" : 3, "price" : 55 }
{ "_id" : 5, "price" : 40 }
按日期统计,和上面示例一样,只需要把 map 函数重新定义如下,将每个文档的 price 映射为 ord_date,并发出 ord_date 和 price 。
sit_rs1:PRIMARY> var myMapFun2 = function() {
... emit(this.ord_date, this.price);
... };
sit_rs1:PRIMARY> print(myMapFun2)
function() {
emit(this.ord_date, this.price);
}
然后,用两个参数 keyOrdDate 和 valuesPrices 定义相应的reduce函数。 这里需要调用数组的 avg 方法计算平均客单价。
sit_rs1:PRIMARY> var myReduceFun2 = function(keyOrdDate, valuesPrices) {
... return Array.avg(valuesPrices);
... };
sit_rs1:PRIMARY> print(myReduceFun2)
function(keyOrdDate, valuesPrices) {
return Array.avg(valuesPrices);
}
最后,使用 myMapFun2 函数和 myReduceFun2 函数对集合 orders 中的所有文档执行map-reduce统计:
sit_rs1:PRIMARY> db.orders.mapReduce(
... myMapFun2,
... myReduceFun2,
... { out: "map_reduce_out2" }
... )
{
"result" : "map_reduce_out2",
"ok" : 1,
"$clusterTime" : {
"clusterTime" : Timestamp(1690265083, 8),
"signature" : {
"hash" : BinData(0,"pCWskY3HjLGEjsk00ARYdzKECDE="),
"keyId" : NumberLong("7205479298910650370")
}
},
"operationTime" : Timestamp(1690265083, 8)
}
查询 map_reduce_out2 集合以验证结果是否正确:
sit_rs1:PRIMARY> db.map_reduce_out2.find()
{ "_id" : ISODate("2023-06-08T00:00:00Z"), "value" : 57.5 }
{ "_id" : ISODate("2023-06-01T00:00:00Z"), "value" : 15 }
{ "_id" : ISODate("2023-06-18T00:00:00Z"), "value" : 26 }
{ "_id" : ISODate("2023-06-20T00:00:00Z"), "value" : 49.333333333333336 }
{ "_id" : ISODate("2023-06-23T00:00:00Z"), "value" : 23 }
{ "_id" : ISODate("2023-06-19T00:00:00Z"), "value" : 39 }
# 检查日期2023-06-08的订单平均值
sit_rs1:PRIMARY> db.orders.find({ "ord_date" : ISODate("2023-06-08T00:00:00Z")}, {"price": 1})
{ "_id" : 2, "price" : 60 }
{ "_id" : 3, "price" : 55 }
sit_rs1:PRIMARY> print((60+55)/2)
57.5
# 检查日期2023-06-20的订单平均值
sit_rs1:PRIMARY> db.orders.find({ "ord_date" : ISODate("2023-06-20T00:00:00Z")}, {"price": 1})
{ "_id" : 9, "price" : 51 }
{ "_id" : 7, "price" : 21 }
{ "_id" : 8, "price" : 76 }
sit_rs1:PRIMARY> print((51+21+76)/3)
49.333333333333336
对于需要自定义功能的 Map-Reduce 操作,MongoDB 从 4.4 版本开始提供 $accumulator 和 $function 聚合运算符。使用这些运算符在 javascript 中自定义聚合表达式。
到此这篇关于Mongodb 多文档聚合操作处理方法(Map-reduce 函数)的文章就介绍到这了,更多相关Mongodb 聚合操作内容请搜索编程网(www.cppcns.com)以前的文章或继续浏览下面的相关文章希望大家以后多多支持编程网(www.cppcns.com)!
--结束END--
本文标题: 详解Mongodb 多文档聚合操作处理方法(Map-reduce 函数)
本文链接: https://www.lsjlt.com/news/366461.html(转载时请注明来源链接)
有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341
下载Word文档到电脑,方便收藏和打印~
2024-05-10
2024-05-10
2024-05-10
2024-05-10
2024-05-10
2024-05-10
2024-05-09
2024-05-09
2024-05-09
2024-05-09
回答
回答
回答
回答
回答
回答
回答
回答
回答
回答
0