使用两个条件在 JavaScript 中聚合数据

问题描述 投票:0回答:1

我一直在尝试汇总数据以满足两个标准。

假设我的初始数组 (

rawData
) 如下:

rawData = [
  { date: '2023-11-01', fruit: 'oranges', state: 'NY', salePct: 0.10 },
  { date: '2023-11-01', fruit: 'apples', state: 'NY', salePct: 0.25 },
  { date: '2023-11-01', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-01', fruit: 'figs', state: 'NY', salePct: 0.01 },
  { date: '2023-11-01', fruit: 'strawberries', state: 'NY', salePct: 0.15 },
  { date: '2023-11-01', fruit: 'raspberries', state: 'NY', salePct: 0.15 },
  { date: '2023-11-01', fruit: 'blueberries', state: 'NY', salePct: 0.15 },
  { date: '2023-11-01', fruit: 'cranberries', state: 'NY', salePct: 0.02 },
  { date: '2023-11-01', fruit: 'pineapples', state: 'NY', salePct: 0.02 },
  { date: '2023-11-01', fruit: 'peaches', state: 'NY', salePct: 0.05 },
  { date: '2023-11-02', fruit: 'oranges', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'apples', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-02', fruit: 'figs', state: 'NY', salePct: 0.10 },
  { date: '2023-11-02', fruit: 'strawberries', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'raspberries', state: 'NY', salePct: 0.10 },
  { date: '2023-11-02', fruit: 'blueberries', state: 'NY', salePct: 0.03 },
  { date: '2023-11-02', fruit: 'cranberries', state: 'NY', salePct: 0.03 },
  { date: '2023-11-02', fruit: 'pineapples', state: 'NY', salePct: 0.04},
  { date: '2023-11-03', fruit: 'oranges', state: 'NY', salePct: 0.20 },
  { date: '2023-11-03', fruit: 'apples', state: 'NY', salePct: 0.20 },
  { date: '2023-11-03', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-03', fruit: 'figs', state: 'NY', salePct: 0.10 },
  { date: '2023-11-03', fruit: 'strawberries', state: 'NY', salePct: 0.30 },
  { date: '2023-11-03', fruit: 'raspberries', state: 'NY', salePct: 0.05 },
  { date: '2023-11-03', fruit: 'blueberries', state: 'NY', salePct: 0.05 },
  { date: '2023-11-05', fruit: 'oranges', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'apples', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'figs', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'strawberries', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'raspberries', state: 'NY', salePct: 0.25 },
  { date: '2023-11-05', fruit: 'blueberries', state: 'NY', salePct: 0.25 },
];

我希望将可用日期的销售百分比低于所有可用日期的最小值的水果聚合为

others
。因此,对于
minSalePct = 0.10
aggregatedByThreshold
数组如下:

aggregatedByThreshold = [
  { date: '2023-11-01', fruit: 'oranges', state: 'NY', salePct: 0.10 },
  { date: '2023-11-01', fruit: 'apples', state: 'NY', salePct: 0.25 },
  { date: '2023-11-01', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-01', fruit: 'strawberries', state: 'NY', salePct: 0.15 },
  { dat3: '2023-11-01', fruit: 'others', state: 'NY', salePct: 0.40},
  { date: '2023-11-02', fruit: 'oranges', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'apples', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-02', fruit: 'strawberries', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'others', state: 'NY', salePct: 0.30 },
  { date: '2023-11-03', fruit: 'oranges', state: 'NY', salePct: 0.20 },
  { date: '2023-11-03', fruit: 'apples', state: 'NY', salePct: 0.20 },
  { date: '2023-11-03', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-03', fruit: 'strawberries', state: 'NY', salePct: 0.30 },
  { date: '2023-11-03', fruit: 'others', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'oranges', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'apples', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'pears', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'strawberries', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'others', state: 'NY', salePct: 0.50 },
];

但是,非聚合水果的最大数量 (

maxFruit
) 也需要强制执行。各日期销售百分比总和较低的水果将逐步添加到
others
,直到满足
maxFruit
。如果
maxFruit = 3
,则
aggregatedByThresholdAndQuantity
数组将如下所示:

aggregatedByThresholdAndQuantity = [
  { date: '2023-11-01', fruit: 'oranges', state: 'NY', salePct: 0.10 },
  { date: '2023-11-01', fruit: 'apples', state: 'NY', salePct: 0.25 },
  { date: '2023-11-01', fruit: 'strawberries', state: 'NY', salePct: 0.15 },
  { dat3: '2023-11-01', fruit: 'others', state: 'NY', salePct: 0.50},
  { date: '2023-11-02', fruit: 'oranges', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'apples', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'strawberries', state: 'NY', salePct: 0.20 },
  { date: '2023-11-02', fruit: 'others', state: 'NY', salePct: 0.40 },
  { date: '2023-11-03', fruit: 'oranges', state: 'NY', salePct: 0.20 },
  { date: '2023-11-03', fruit: 'apples', state: 'NY', salePct: 0.20 },
  { date: '2023-11-03', fruit: 'strawberries', state: 'NY', salePct: 0.30 },
  { date: '2023-11-03', fruit: 'others', state: 'NY', salePct: 0.20 },
  { date: '2023-11-05', fruit: 'oranges', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'apples', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'strawberries', state: 'NY', salePct: 0.10 },
  { date: '2023-11-05', fruit: 'others', state: 'NY', salePct: 0.60 },
];

此外,我想要一个数组,其中包含与其他水果一起聚合的水果名称:

otherFruit = ['pears', 'figs', 'raspberries', 'cranberries', 'blueberries', 'pineapples', 'peaches'];

这些解决方案解决了部分问题,但我无法将它们结合起来 生成所需的输出:

尽管我使用了两个数组来解释场景(

aggregatedByThreshold
aggregatedByThresholdAndQuantity
),但解决方案不需要中间步骤。
aggregatedByThresholdAndQuantity
otherFruit
,并且设置两个标准(
minSalePct
maxFruit
)的可能性就足够了。我也同意使用其他库(例如 lodash)的解决方案。

谢谢!

丹妮拉

javascript aggregate data-wrangling
1个回答
0
投票

如果效率不重要,您可以简单地循环列表两次,首先确定顶部

maxFruit
水果列表中的水果,然后在下一个循环中进行实际聚合:

const maxFruit = 3;
const minSalePct = 0.10;

//find top N SalePct fruits
totalSalePct = {}
for (const item of rawData) {
  if (item["fruit"] in totalSalePct === false)
    totalSalePct[item["fruit"]] = 0;
  totalSalePct[item["fruit"]] += item["salePct"];
}
const topNFruites = Object.keys(totalSalePct).sort((a,b) => totalSalePct[b]- totalSalePct[a]).slice(0,maxFruit);

//aggregate
let otherFruit = new Set();
let result = {}
for (const {date, fruit, state, salePct} of rawData) {
  if(date in result === false){
      result[date] = {}
  }
  const isOther = salePct < minSalePct || !topNFruites.includes(fruit);
  const name = isOther? "others" : fruit;
  if(name in result[date] === false){
    result[date][name] = {"state":state, "salePct":0.0}
  }
  result[date][name]["salePct"] += salePct;
  if(isOther)
    otherFruit.add(fruit);
}

//convert result to original list format
let resultList = []
for (const [date, fruits] of Object.entries(result)) {
  for (const [fruit, item] of Object.entries(fruits)) {
    resultList.push({
      "date": date,
      "fruit": fruit,
      "state": item["state"],
      "salePct": item["salePct"].toFixed(2)
    })
  }
}

console.log(resultList)
console.log(otherFruit)
© www.soinside.com 2019 - 2024. All rights reserved.