Pick a number randomly from an unlimited stream
Pick a number randomly from an unlimited stream
Problem
This problem was asked by Facebook.
Given a stream of elements too large to store in memory, pick a random element from the stream with uniform probability.
Solution
This problem can be extended to pick k number randomly with equal probability.
- Pick one number
- Pick K number
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#include <algorithm>
#include <iostream>
#include <iterator>
#include <vector>
#include <cstdlib>
using namespace std;
template<class Type>
class Generator {
public:
typedef Type DataType;
virtual bool HasNext() = 0;
virtual Type Next() = 0;
virtual void Reset() = 0;
};
class IntegerGenerator: public Generator<int> {
public:
IntegerGenerator(int n, const int limit): i(0), _v(n) {
generate(begin(_v), end(_v), [&]() {
return rand() % limit;
});
copy(begin(_v), end(_v), ostream_iterator<int>(cout, ","));
cout << "\n";
}
bool HasNext() {
return i < _v.size();
}
int Next() {
return _v[i++];
}
void Reset() { i = 0; }
private:
vector<Generator<int>::DataType> _v;
int i;
};
template<class Type>
int pick_one_number(Generator<Type>& g) {
int result = 0;
int cur = 1;
while (g.HasNext()) {
auto r = rand();
auto p = r % cur;
auto next = g.Next();
cout << "R: " << r << " cur: " << cur << " Prob: " << p << " next: " << next << "\n";
if (p < 1) {
result = next;
}
++cur;
}
return result;
}
template<class Type>
vector<int> pick_k_number(Generator<Type>& g, int k) {
vector<int> result;
int cur = 1;
while (g.HasNext()) {
auto next = g.Next();
if (result.size() < k) {
result.push_back(next);
} else {
auto r = rand() % cur;
cout << "Whether select: " << r << "/" << cur << "\n";
if (r < k) {
auto kk = rand() % k;
cout << "Select element: " << kk << "\n";
result[kk] = next;
}
}
++cur;
}
return result;
}
int main() {
srand(time(nullptr));
IntegerGenerator gen(100, 1000);
auto n = pick_one_number(gen);
cout << "Picked number = " << n<< endl;
gen.Reset();
auto r = pick_k_number(gen, 20);
cout << "Select K = ";
copy(begin(r), end(r), ostream_iterator<int>(cout, ","));
cout << "\n";
return EXIT_SUCCESS;
}
Comments
- Reservoir Sampling
This post is licensed under CC BY 4.0 by the author.