背景
前段时间,接到一个sku摆放的需求,要求sku划分到指定的位置。因此,需要我们对每一条原始的sku数据按位置进行分组展示(见originSku)。其中位置是多层次划分,大致划分为:区域 -> 货架 -> 分区 -> 具体位置 -> 商品明细(如下图所示)。
sku摆放.png
本次需求涉及多层嵌套分组,为了保证执行效率,本次采用方案是按需查询所有的sku,在内存中进行分组处理。对于内存处理分组,我们很自然想到了JDK 8的新特性Stream,利用Stream下的Group By可以很好完成本次功能。
因为涉及的嵌套分组过多,达到4次,当sku数量增多时,这里很快就会出行性能问题,因此不得不考虑分组效率的问题。本次列举了3种分组的实现方式,并通过JMH工具进行性能测试,得到多层嵌套分组的最佳实践。
Stream 多字段 group by
多层嵌套 group by
group by 后遍历再 group by
测试类
@BenchmarkMode(Mode.SingleShotTime) // 测量调用1次耗时
@Warmup(iterations = 2) // 预热2次,避免JIT机制对结果进行干扰
@Threads(2) // 2个线程,由执行环境cpu数量而定,本次电脑cpu核心数为8
@Fork(2) // fork 出两个进程
@OutputTimeUnit(TimeUnit.MILLISECONDS) // 单位 ms
public class MultiGroupByTest {
private static List<OriginSku> originSkuList = getOriginSku();
public static List<OriginSku> getOriginSku() {
// 数据准备: 10 个区域, 区域下有 20 个货架, 货架下有 30 个分区, 分区下有 30 个位置, 每个位置放 10 sku
List<OriginSku> originSkuList = new ArrayList<>();
for (int i = 0; i < 10; i++) {
for (int j = 0; j < 20; j++) {
for (int k = 0; k < 30; k++) {
for (int l = 0; l < 30; l++) {
for (int m = 0; m < 10; m++) {
OriginSku sku = new OriginSku();
sku.setAreaNo("AreaNo-" + i);
sku.setShelfNo("ShelfNo-" + j);
sku.setPartitionNo("Partition-" + k);
sku.setLocationNo("LocationNo-" + l);
sku.setSkuCode(i + "-SkuCode-" + m);
sku.setSkuName(i + "-SkuName-" + m);
sku.setSkuQty(new BigDecimal(m));
sku.setOrder(m);
originSkuList.add(sku);
}
}
}
}
}
return originSkuList;
}
public static void main(String[] args) throws RunnerException {
Options options = new OptionsBuilder()
.include(MultiGroupByTest.class.getSimpleName())
.build();
new Runner(options).run();
}
}
Stream 多字段 group by
先多字段group by,之后遍历组装数据。
@Benchmark
public List<AreaVo> buildSkuLocation3() {
Map<String, Map<String, Map<String, Map<String, List<OriginSku>>>>> map = getOriginSku()
.stream()
.collect(
Collectors.groupingBy(OriginSku::getAreaNo,
Collectors.groupingBy(OriginSku::getShelfNo,
Collectors.groupingBy(OriginSku::getPartitionNo,
Collectors.groupingBy(OriginSku::getLocationNo,
Collectors.toList()
)
)
)
)
);
// 区域分组
List<AreaVo> resultList = new ArrayList<>();
map.forEach((k1, v1) -> {
AreaVo skuAreaVo = new AreaVo();
skuAreaVo.setAreaNo(k1);
// 货架分组
List<ShelfVo> shelfVoList = new ArrayList<>();
v1.forEach((k2, v2) -> {
ShelfVo skuShelfVo = new ShelfVo();
skuShelfVo.setShelfNo(k2);
// 分区分组
List<PartitionVo> partitionVoList = new ArrayList<>();
v2.forEach((k3, v3) -> {
PartitionVo positionVo = new PartitionVo();
positionVo.setPartitionNo(k3);
// 位置分组
List<LocationVo> locationVoList = new ArrayList<>();
v3.forEach((k4, v4) -> {
LocationVo locationVo = new LocationVo();
locationVo.setLocationNo(k4);
List<SkuDetailVo> skuDetailVoList = new ArrayList<>();
for (OriginSku sku : v4) {
SkuDetailVo skuDetailVo = new SkuDetailVo();
skuDetailVo.setSkuCode(sku.getSkuCode());
skuDetailVo.setSkuName(sku.getSkuName());
skuDetailVo.setSkuQty(sku.getSkuQty());
skuDetailVo.setOrder(sku.getOrder());
skuDetailVoList.add(skuDetailVo);
}
locationVo.setSkuDetailVoList(skuDetailVoList);
locationVoList.add(locationVo);
});
positionVo.setLocationVoList(locationVoList);
partitionVoList.add(positionVo);
});
skuShelfVo.setPartitionVoList(partitionVoList);
shelfVoList.add(skuShelfVo);
});
skuAreaVo.setShelfVoList(shelfVoList);
resultList.add(skuAreaVo);
});
return resultList;
}
多层嵌套 group by
层层嵌套group by,并在group by后遍历组装数据。
@Benchmark
public List<AreaVo> buildSkuLocation2() {
// 区域分组
List<AreaVo> resultList = originSkuList.stream().collect(Collectors.groupingBy(OriginSku::getAreaNo))
.entrySet().stream().map(e -> {
AreaVo skuAreaVo = new AreaVo();
skuAreaVo.setAreaNo(e.getKey());
// 货架分组
List<ShelfVo> shelfVoList = e.getValue().stream().collect(Collectors.groupingBy(OriginSku::getShelfNo)).entrySet().stream().map(e2 -> {
OriginSku originSku2 = e2.getValue().get(0);
ShelfVo skuShelfVo = new ShelfVo();
skuShelfVo.setShelfNo(originSku2.getShelfNo());
// 分区分组
List<PartitionVo> partitionVoList = e2.getValue().stream().collect(Collectors.groupingBy(OriginSku::getPartitionNo)).entrySet().stream().map(e3 -> {
OriginSku originSku3 = e3.getValue().get(0);
PartitionVo positionVo = new PartitionVo();
positionVo.setPartitionNo(originSku3.getPartitionNo());
// 位置分组
List<LocationVo> locationVoList = e3.getValue().stream().collect(Collectors.groupingBy(OriginSku::getLocationNo)).entrySet().stream().map(e4 -> {
OriginSku originSku4 = e4.getValue().get(0);
LocationVo locationVo = new LocationVo();
locationVo.setLocationNo(originSku4.getLocationNo());
// 商品详情
List<SkuDetailVo> skuDetailVoList = e4.getValue().stream().map(e5 -> {
SkuDetailVo skuDetailVo = new SkuDetailVo();
skuDetailVo.setSkuCode(e5.getSkuCode());
skuDetailVo.setSkuName(e5.getSkuName());
skuDetailVo.setSkuQty(e5.getSkuQty());
skuDetailVo.setOrder(e5.getOrder());
return skuDetailVo;
}).sorted(Comparator.comparing(SkuDetailVo::getOrder)).collect(Collectors.toList());
locationVo.setSkuDetailVoList(skuDetailVoList);
return locationVo;
}).collect(Collectors.toList());
positionVo.setLocationVoList(locationVoList);
return positionVo;
}).collect(Collectors.toList());
skuShelfVo.setPartitionVoList(partitionVoList);
return skuShelfVo;
}).collect(Collectors.toList());
skuAreaVo.setShelfVoList(shelfVoList);
return skuAreaVo;
}).collect(Collectors.toList());
return resultList;
}
group by 后遍历再 group by
先使用Stream按区域进行group by之后,遍历list组装数据并进行group by。
@Benchmark
public List<AreaVo> buildSkuLocation1() {
Map<String, List<OriginSku>> areaListMap = originSkuList.stream().collect(Collectors.groupingBy(OriginSku::getAreaNo));
// 区域分组
List<AreaVo> resultList = new ArrayList<>();
areaListMap.forEach((key1, value1) -> {
AreaVo skuAreaVo = new AreaVo();
skuAreaVo.setAreaNo(key1);
// 货架分组
List<ShelfVo> shelfVoList = new ArrayList<>();
Map<String, List<OriginSku>> shelfListMap = value1.stream().collect(Collectors.groupingBy(OriginSku::getShelfNo));
shelfListMap.forEach((key2, value2) -> {
ShelfVo skuShelfVo = new ShelfVo();
skuShelfVo.setShelfNo(key2);
// 分区分组
List<PartitionVo> partitionVoList = new ArrayList<>();
Map<String, List<OriginSku>> positionListMap = value2.stream().collect(Collectors.groupingBy(OriginSku::getPartitionNo));
positionListMap.forEach((key3, value3) -> {
PartitionVo positionVo = new PartitionVo();
positionVo.setPartitionNo(key3);
// 位置分组
List<LocationVo> locationVoList = new ArrayList<>();
Map<String, List<OriginSku>> locationListMap = value3.stream().collect(Collectors.groupingBy(OriginSku::getLocationNo));
// 商品详情
locationListMap.forEach((key4, value4) -> {
LocationVo locationVo = new LocationVo();
locationVo.setLocationNo(key4);
List<SkuDetailVo> skuDetailVoList = new ArrayList<>();
for (OriginSku sku : value4) {
SkuDetailVo skuDetailVo = new SkuDetailVo();
skuDetailVo.setSkuCode(sku.getSkuCode());
skuDetailVo.setSkuName(sku.getSkuName());
skuDetailVo.setSkuQty(sku.getSkuQty());
skuDetailVo.setOrder(sku.getOrder());
skuDetailVoList.add(skuDetailVo);
}
locationVo.setSkuDetailVoList(skuDetailVoList);
locationVoList.add(locationVo);
});
positionVo.setLocationVoList(locationVoList);
partitionVoList.add(positionVo);
});
skuShelfVo.setPartitionVoList(partitionVoList);
shelfVoList.add(skuShelfVo);
});
skuAreaVo.setShelfVoList(shelfVoList);
resultList.add(skuAreaVo);
});
return resultList;
}
测试结果
执行效率:Stream 多字段 group by
> 多层嵌套 group by
> group by 后遍历再 group by
Benchmark Mode Cnt Score Error Units
MultiGroupByTest.buildSkuLocation1 ss 2 535.147 ms/op
MultiGroupByTest.buildSkuLocation2 ss 2 622.658 ms/op
MultiGroupByTest.buildSkuLocation3 ss 2 5479.129 ms/op
当我们数据量不大时,上述三种方式执行效率时差不多的。如果数据量上来之后,Stream 多字段 group by
的分组方式可以带来显著的效率提升。
后续思考:从源码角度分析上述方式执行的效率差别,以及他们的时间复杂度是如何?
本次使用的vo
为了方便调试和重现,下面列举本次有使用的vo。
@Setter
@Getter
public class AreaVo {
@ApiModelProperty("区域编号")
private String areaNo;
@ApiModelProperty("货架列表")
private List<ShelfVo> shelfVoList;
}
@Setter
@Getter
public class ShelfVo {
@ApiModelProperty("货架编号")
private String shelfNo;
@ApiModelProperty("区位")
List<PartitionVo> partitionVoList;
}
@Setter
@Getter
public class PartitionVo {
@ApiModelProperty("分区编码")
private String partitionNo;
@ApiModelProperty("存放位置列表")
List<LocationVo> locationVoList;
}
@Setter
@Getter
public class LocationVo {
@ApiModelProperty("位置编号")
private String locationNo;
@ApiModelProperty("商品详情")
private List<SkuDetailVo> skuDetailVoList;
}
@Setter
@Getter
public class SkuDetailVo {
private String skuCode;
private String skuName;
private BigDecimal skuQty;
private Integer order;
}