/**
* 从数组中根据权重字段按概率选择一项
*
* @param array $data 输入数组,如 ['a' => ['weight' => 10, ...], ...]
* @param string $weightKey 权重字段名,默认 'weight'
* @param bool $returnKey 是否返回键名(true: 返回键名,false: 返回整个值)
*
* @return mixed 返回值或键名,失败时返回 null
*/
function selectByProbability(array $data, string $weightKey = 'weight', bool $returnKey = false) {
$totalWeight = 0;
foreach ($data as $item) {
$w = $item[$weightKey] ?? 0;
if (is_numeric($w)) {
$totalWeight += $w;
}
}
if ($totalWeight <= 0) {
return null;
}
$rand = mt_rand() / mt_getrandmax() * $totalWeight;
$currentWeight = 0;
foreach ($data as $key => $item) {
$weight = $item[$weightKey] ?? 0;
if (!is_numeric($weight)) {
continue;
}
$currentWeight += $weight;
if ($rand <= $currentWeight) {
return $returnKey ? $key : $item;
}
}
// 万一浮点误差导致漏选,返回最后一个
end($data);
return $returnKey ? key($data) : current($data);
}
$data = [
"a" => ["weight" => 90, "v" => "A"],
"b" => ["weight" => 10, "v" => "B"],
"c" => ["weight" => 20, "v" => "C"],
];
// 返回选中的值
$selected = selectByProbability($data);
var_dump($selected);
// 只返回键名
$key = selectByProbability($data, 'weight', true);
echo "选中键名: $key\n";
上面是新版,让最新的GPT重新写的。下面是旧版本,用于记录。如果数据量大,还可以考虑使用二分查找进行欧化。
$arr = array(
"a"=>array("weight"=>90,"v" => 'a'),
"b"=>array("weight"=>10,"v" => 'b'),
"c"=>array("weight"=>20,"v" => 'c')
);
function selectValueByProbability($arr){
$weightsSum = array_sum(array_column($arr, 'weight'));
$tmpArr = [];
foreach ($arr as $key => $val) {
$weight = $val['weight'];
$float = round($weight/$weightsSum * 100);
for ($i=0; $i < $float; $i++) {
$tmpArr[] = $key;
}
}
$key = $tmpArr[array_rand($tmpArr)];
unset($tmpArr);
return $arr[$key];
}
var_dump(selectValueByProbability($arr));
chatGPT问了一大圈,还是没弄出来,可能是我问的不对吧,最终我整合了下,再换了思路
- 先通过array_sum 、array_column获取数组中所有权重的总值
- 定义一个临时数组(空值)
- 遍历目标数组,并获取值的权重和权重总值的比例乘以100,再四舍五入
- 网前面临时数组中循环(3得到的值)插入值的key
- 通过array_rand随机获取临时数组中的值
- 返回结果
咱不知道有没有更好的方案,虽不是非常精确,但凑合用吧,可以适当调整3中的100这个值,来实现更高的精度