考虑下面代表经纪人的类:
public class Broker
{
public string Name = string.Empty;
public int Weight = 0;
public Broker(string n, int w)
{
this.Name = n;
this.Weight = w;
}
}
我想从阵列中随机选择一个Broker,同时考虑它们的权重。
您如何看待下面的代码?
class Program
{
private static Random _rnd = new Random();
public static Broker GetBroker(List<Broker> brokers, int totalWeight)
{
// totalWeight is the sum of all brokers' weight
int randomNumber = _rnd.Next(0, totalWeight);
Broker selectedBroker = null;
foreach (Broker broker in brokers)
{
if (randomNumber <= broker.Weight)
{
selectedBroker = broker;
break;
}
randomNumber = randomNumber - broker.Weight;
}
return selectedBroker;
}
static void Main(string[] args)
{
List<Broker> brokers = new List<Broker>();
brokers.Add(new Broker("A", 10));
brokers.Add(new Broker("B", 20));
brokers.Add(new Broker("C", 20));
brokers.Add(new Broker("D", 10));
// total the weigth
int totalWeight = 0;
foreach (Broker broker in brokers)
{
totalWeight += broker.Weight;
}
while (true)
{
Dictionary<string, int> result = new Dictionary<string, int>();
Broker selectedBroker = null;
for (int i = 0; i < 1000; i++)
{
selectedBroker = GetBroker(brokers, totalWeight);
if (selectedBroker != null)
{
if (result.ContainsKey(selectedBroker.Name))
{
result[selectedBroker.Name] = result[selectedBroker.Name] + 1;
}
else
{
result.Add(selectedBroker.Name, 1);
}
}
}
Console.WriteLine("A\t\t" + result["A"]);
Console.WriteLine("B\t\t" + result["B"]);
Console.WriteLine("C\t\t" + result["C"]);
Console.WriteLine("D\t\t" + result["D"]);
result.Clear();
Console.WriteLine();
Console.ReadLine();
}
}
}
我不那么自信。当我运行它时,经纪人A总是获得比经纪人D更多的点击量,并且他们具有相同的权重。
有更准确的算法吗?
谢谢!
你的算法几乎是正确的。但是,测试应该是<
而不是<=
:
if (randomNumber < broker.Weight)
这是因为0在随机数中包括在内,而totalWeight
是独占的。换句话说,权重为0的经纪人仍然很少被选中 - 根本不是你想要的。这说明经纪人A的点击次数比经纪人D多。
除此之外,你的算法很好,实际上是解决这个问题的规范方法。
那些可以用于任何数据类型的更通用的东西怎么样?
using System;
using System.Linq;
using System.Collections;
using System.Collections.Generic;
public static class IEnumerableExtensions {
public static T RandomElementByWeight<T>(this IEnumerable<T> sequence, Func<T, float> weightSelector) {
float totalWeight = sequence.Sum(weightSelector);
// The weight we are after...
float itemWeightIndex = new Random().NextDouble * totalWeight;
float currentWeightIndex = 0;
foreach(var item in from weightedItem in sequence select new { Value = weightedItem, Weight = weightSelector(weightedItem) }) {
currentWeightIndex += item.Weight;
// If we've hit or passed the weight we are after for this item then it's the one we want....
if(currentWeightIndex >= itemWeightIndex)
return item.Value;
}
return default(T);
}
}
只需打电话
Dictionary<string, float> foo = new Dictionary<string, float>();
foo.Add("Item 25% 1", 0.5f);
foo.Add("Item 25% 2", 0.5f);
foo.Add("Item 50%", 1f);
for(int i = 0; i < 10; i++)
Console.WriteLine(this, "Item Chosen {0}", foo.RandomElementByWeight(e => e.Value));
class Program
{
static void Main(string[] args)
{
var books = new List<Book> {
new Book{Isbn=1,Name="A",Weight=1},
new Book{Isbn=2,Name="B",Weight=100},
new Book{Isbn=3,Name="C",Weight=1000},
new Book{Isbn=4,Name="D",Weight=10000},
new Book{Isbn=5,Name="E",Weight=100000}};
Book randomlySelectedBook = WeightedRandomization.Choose(books);
}
}
public static class WeightedRandomization
{
public static T Choose<T>(List<T> list) where T : IWeighted
{
if (list.Count == 0)
{
return default(T);
}
int totalweight = list.Sum(c => c.Weight);
Random rand = new Random();
int choice = rand.Next(totalweight);
int sum = 0;
foreach (var obj in list)
{
for (int i = sum; i < obj.Weight + sum; i++)
{
if (i >= choice)
{
return obj;
}
}
sum += obj.Weight;
}
return list.First();
}
}
public interface IWeighted
{
int Weight { get; set; }
}
public class Book : IWeighted
{
public int Isbn { get; set; }
public string Name { get; set; }
public int Weight { get; set; }
}
在选择代理超过内存使用时,另一种方法有利于速度。基本上,我们创建包含与代理实例相同数量的引用的列表作为指定权重。
List<Broker> brokers = new List<Broker>();
for (int i=0; i<10; i++)
brokers.Add(new Broker("A", 10));
for (int i=0; i<20; i++)
brokers.Add(new Broker("B", 20));
for (int i=0; i<20; i++)
brokers.Add(new Broker("C", 20));
for (int i=0; i<10; i++)
brokers.Add(new Broker("D", 10));
然后,选择随机加权的实例是O(1)操作:
int randomNumber = _rnd.Next(0, brokers.length);
selectedBroker = brokers[randomNumber];
由于这是Google的最佳结果:
我创造了a C# library for randomly selected weighted items。
一些示例代码:
IWeightedRandomizer<string> randomizer = new DynamicWeightedRandomizer<string>();
randomizer["Joe"] = 1;
randomizer["Ryan"] = 2;
randomizer["Jason"] = 2;
string name1 = randomizer.RandomWithReplacement();
//name1 has a 20% chance of being "Joe", 40% of "Ryan", 40% of "Jason"
string name2 = randomizer.RandomWithRemoval();
//Same as above, except whichever one was chosen has been removed from the list.
如果您想要更高的速度,您可以考虑加权油藏采样,而不必提前找到总重量(但您可以更频繁地从随机数发生器中采样)。代码可能看起来像
Broker selected = null;
int s = 0;
foreach(Broker broker in brokers) {
s += broker.Weight;
if (broker.Weight <= _rnd.Next(0,s)) {
selected = broker;
}
}
这需要通过列表经纪人一次。但是,如果经纪人列表是固定的或不经常改变,则可以保留累积总和数组,即A [i]是所有经纪人0,...,i-1的权重之和。然后A [n]是总权重,如果你选择一个介于1和A [n-1]之间的数字,比如说x你找到经纪人j s.t. A [j-1] <= x <A [j]。为方便起见,让A [0] = 0.您可以使用二进制搜索以log(n)步骤找到此代理编号j,我将保留代码作为一个简单的练习。如果您的数据经常更改,这可能不是一个好方法,因为每次重量变化时您可能需要更新数组的大部分。
我想出了这个解决方案的通用版本:
public static class WeightedEx
{
/// <summary>
/// Select an item from the given sequence according to their respective weights.
/// </summary>
/// <typeparam name="TItem">Type of item item in the given sequence.</typeparam>
/// <param name="a_source">Given sequence of weighted items.</param>
/// <returns>Randomly picked item.</returns>
public static TItem PickWeighted<TItem>(this IEnumerable<TItem> a_source)
where TItem : IWeighted
{
if (!a_source.Any())
return default(TItem);
var source= a_source.OrderBy(i => i.Weight);
double dTotalWeight = source.Sum(i => i.Weight);
Random rand = new Random();
while (true)
{
double dRandom = rand.NextDouble() * dTotalWeight;
foreach (var item in source)
{
if (dRandom < item.Weight)
return item;
dRandom -= item.Weight;
}
}
}
}
/// <summary>
/// IWeighted: Implementation of an item that is weighted.
/// </summary>
public interface IWeighted
{
double Weight { get; }
}
只是为了分享我自己的实现。希望你会发现它很有用。
// Author: Giovanni Costagliola <[email protected]>
using System;
using System.Collections.Generic;
using System.Linq;
namespace Utils
{
/// <summary>
/// Represent a Weighted Item.
/// </summary>
public interface IWeighted
{
/// <summary>
/// A positive weight. It's up to the implementer ensure this requirement
/// </summary>
int Weight { get; }
}
/// <summary>
/// Pick up an element reflecting its weight.
/// </summary>
/// <typeparam name="T"></typeparam>
public class RandomWeightedPicker<T> where T:IWeighted
{
private readonly IEnumerable<T> items;
private readonly int totalWeight;
private Random random = new Random();
/// <summary>
/// Initiliaze the structure. O(1) or O(n) depending by the options, default O(n).
/// </summary>
/// <param name="items">The items</param>
/// <param name="checkWeights">If <c>true</c> will check that the weights are positive. O(N)</param>
/// <param name="shallowCopy">If <c>true</c> will copy the original collection structure (not the items). Keep in mind that items lifecycle is impacted.</param>
public RandomWeightedPicker(IEnumerable<T> items, bool checkWeights = true, bool shallowCopy = true)
{
if (items == null) throw new ArgumentNullException("items");
if (!items.Any()) throw new ArgumentException("items cannot be empty");
if (shallowCopy)
this.items = new List<T>(items);
else
this.items = items;
if (checkWeights && this.items.Any(i => i.Weight <= 0))
{
throw new ArgumentException("There exists some items with a non positive weight");
}
totalWeight = this.items.Sum(i => i.Weight);
}
/// <summary>
/// Pick a random item based on its chance. O(n)
/// </summary>
/// <param name="defaultValue">The value returned in case the element has not been found</param>
/// <returns></returns>
public T PickAnItem()
{
int rnd = random.Next(totalWeight);
return items.First(i => (rnd -= i.Weight) < 0);
}
/// <summary>
/// Resets the internal random generator. O(1)
/// </summary>
/// <param name="seed"></param>
public void ResetRandomGenerator(int? seed)
{
random = seed.HasValue ? new Random(seed.Value) : new Random();
}
}
}
要点:https://gist.github.com/MrBogomips/ae6f6c9af8032392e4b93aaa393df447
原始问题的实现对我来说似乎有些奇怪;
列表的总重量为60,因此随机数为0-59。它总是根据权重检查随机数,然后减少它。在我看来,它会根据订单的顺序支持列表中的内容。
这是我正在使用的通用实现 - 关键是Random属性:
using System;
using System.Collections.Generic;
using System.Linq;
public class WeightedList<T>
{
private readonly Dictionary<T,int> _items = new Dictionary<T,int>();
// Doesn't allow items with zero weight; to remove an item, set its weight to zero
public void SetWeight(T item, int weight)
{
if (_items.ContainsKey(item))
{
if (weight != _items[item])
{
if (weight > 0)
{
_items[item] = weight;
}
else
{
_items.Remove(item);
}
_totalWeight = null; // Will recalculate the total weight later
}
}
else if (weight > 0)
{
_items.Add(item, weight);
_totalWeight = null; // Will recalculate the total weight later
}
}
public int GetWeight(T item)
{
return _items.ContainsKey(item) ? _items[item] : 0;
}
private int? _totalWeight;
public int totalWeight
{
get
{
if (!_totalWeight.HasValue) _totalWeight = _items.Sum(x => x.Value);
return _totalWeight.Value;
}
}
public T Random
{
get
{
var temp = 0;
var random = new Random().Next(totalWeight);
foreach (var item in _items)
{
temp += item.Value;
if (random < temp) return item.Key;
}
throw new Exception($"unable to determine random {typeof(T)} at {random} in {totalWeight}");
}
}
}