[23] Find top k (or most frequent) numbers in a stream
Given an array of n numbers. Your task is to read numbers from the array and keep at-most K numbers at the top (According to their decreasing frequency) every time a new number is read. We basically need to print top k numbers sorted by frequency when input stream has included k distinct elements, else need to print all distinct elements sorted by frequency.
Examples:
Input : arr[] = {5, 2, 1, 3, 2}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 2 1 3 5
Input : arr[] = {5, 2, 1, 3, 4}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 1 2 3 4
Expected time complexity is O(n * k)
Given an array of n numbers. Your task is to read numbers from the array and keep at-most K numbers at the top (According to their decreasing frequency) every time a new number is read. We basically need to print top k numbers sorted by frequency when input stream has included k distinct elements, else need to print all distinct elements sorted by frequency.
Examples:
Input : arr[] = {5, 2, 1, 3, 2}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 2 1 3 5
Input : arr[] = {5, 2, 1, 3, 4}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 1 2 3 4
Expected time complexity is O(n * k)
Explanation of 1st example:
Given array is arr[] = {5, 2, 1, 3, 2} and k = 4
Step 1:After reading 5, there is only one element 5 whose frequency is max till now. so print 5.
Step 2:After reading 2, we will have two elements 2 and 5 with same frequency. As 2, is smaller than 5 but their frequency is same so we will print 2 5.
Step 3: After reading 1, we will have 3 elements 1,2 and 5 with same frequency, so print 1 2 5.
Step 4: Similarly after reading 3, print 1 2 3 5
Step 5: After reading last element 2, since 2 has already occurred so we have now frequency of 2 as 2. So we keep 2 at the top and then rest of element with same frequency in sorted order. So print, 2 1 3 5.
Below is the step by step algorithm to do this:
Iterate through the array which contains stream of numbers.
To keep track of top k elements, make a top vector of size k+1.
For every element in the stream increase its frequency and store it in the last position of top vector. We can use hashing for efficiently fetching frequency of an element and increasing it.
Now find the position of element in top vector and iterate from that position to zero. For finding position we can make use of the find() function in C++ STL, it returns an iterator pointing to element if found in the vector.
And make that list of k+1 numbers sorted according to frequency and their value.
Print top k elements form top vector.
Repeat the above steps for every element in the stream.
Below is the implementation of above idea:
// C++ program to find top k elements in a stream
#include <bits/stdc++.h>
using namespace std;
// Function to print top k numbers
void kTop(int a[], int n, int k)
{
// vector of size k+1 to store elements
vector<int> top(k + 1);
// array to keep track of frequency
unordered_map<int, int> freq;
// iterate till the end of stream
for (int m = 0; m < n; m++)
{
// increase the frequency
freq[a[m]]++;
// store that element in top vector
top[k] = a[m];
// search in top vector for same element
auto it = find(top.begin(), top.end() - 1, a[m]);
// iterate from the position of element to zero
for (int i = distance(top.begin(), it) - 1; i >= 0; --i)
{
// compare the frequency and swap if higher
// frequency element is stored next to it
if (freq[top[i]] < freq[top[i + 1]])
swap(top[i], top[i + 1]);
// if frequency is same compare the elements
// and swap if next element is high
else if ((freq[top[i]] == freq[top[i + 1]])
&& (top[i] > top[i + 1]))
swap(top[i], top[i + 1]);
else
break;
}
// print top k elements
for (int i = 0; i < k && top[i] != 0; ++i)
cout << top[i] << ' ';
}
cout << endl;
}
// Driver program to test above function
int main()
{
int k = 4;
int arr[] = { 5, 2, 1, 3, 2 };
int n = sizeof(arr)/sizeof(arr[0]);
kTop(arr, n, k);
return 0;
}
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Time Complexity: O( n * k )
import java.io.*;
import java.util.*;
class GFG {
// function to search in top vector for element
static int find(int[] arr, int ele)
{
for (int i = 0; i < arr.length; i++)
if (arr[i] == ele)
return i;
return -1;
}
// Function to print top k numbers
static void kTop(int[] a, int n, int k)
{
// vector of size k+1 to store elements
int[] top = new int[k + 1];
// array to keep track of frequency
HashMap<Integer, Integer> freq = new HashMap<>();
for (int i = 0; i < k + 1; i++)
freq.put(i, 0);
// iterate till the end of stream
for (int m = 0; m < n; m++)
{
// increase the frequency
if (freq.containsKey(a[m]))
freq.put(a[m], freq.get(a[m]) + 1);
else
freq.put(a[m], 1);
// store that element in top vector
top[k] = a[m];
// search in top vector for same element
int i = find(top, a[m]);
i -= 1;
// iterate from the position of element to zero
while (i >= 0)
{
// compare the frequency and swap if higher
// frequency element is stored next to it
if (freq.get(top[i]) < freq.get(top[i + 1]))
{
int temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
// if frequency is same compare the elements
// and swap if next element is high
else if ((freq.get(top[i]) == freq.get(top[i + 1])) &&
(top[i] > top[i + 1]))
{
int temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
else
break;
i -= 1;
}
// print top k elements
for (int j = 0; j < k && top[j] != 0; ++j)
System.out.print(top[j] + " ");
}
System.out.println();
}
// Driver program to test above function
public static void main(String args[])
{
int k = 4;
int[] arr = { 5, 2, 1, 3, 2 };
int n = arr.length;
kTop(arr, n, k);
}
}
// This code is contributed by rachana soma
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Time Complexity: O( n * k )
# Python program to find top k elements in a stream
# Function to print top k numbers
def kTop(a, n, k):
# list of size k+1 to store elements
top = [0 for i in range(k + 1)]
# dictionary to keep track of frequency
freq = {i:0 for i in range(k + 1)}
# iterate till the end of stream
for m in range(n):
# increase the frequency
if a[m] in freq.keys():
freq[a[m]] += 1
else:
freq[a[m]] = 1
# store that element in top vector
top[k] = a[m]
i = top.index(a[m])
i -= 1
while i >= 0:
# compare the frequency and swap if higher
# frequency element is stored next to it
if (freq[top[i]] < freq[top[i + 1]]):
t = top[i]
top[i] = top[i + 1]
top[i + 1] = t
# if frequency is same compare the elements
# and swap if next element is high
elif ((freq[top[i]] == freq[top[i + 1]]) and (top[i] > top[i + 1])):
t = top[i]
top[i] = top[i + 1]
top[i + 1] = t
else:
break
i -= 1
# print top k elements
i = 0
while i < k and top[i] != 0:
print top[i],
i += 1
print
# Driver program to test above function
k = 4
arr = [ 5, 2, 1, 3, 2 ]
n = len(arr)
kTop(arr, n, k)
# This code is contributed by Sachin Bisht
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Time Complexity: O( n * k )
Examples:
Input : arr[] = {5, 2, 1, 3, 2}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 2 1 3 5
Input : arr[] = {5, 2, 1, 3, 4}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 1 2 3 4
Expected time complexity is O(n * k)
Given an array of n numbers. Your task is to read numbers from the array and keep at-most K numbers at the top (According to their decreasing frequency) every time a new number is read. We basically need to print top k numbers sorted by frequency when input stream has included k distinct elements, else need to print all distinct elements sorted by frequency.
Examples:
Input : arr[] = {5, 2, 1, 3, 2}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 2 1 3 5
Input : arr[] = {5, 2, 1, 3, 4}
k = 4
Output : 5 2 5 1 2 5 1 2 3 5 1 2 3 4
Expected time complexity is O(n * k)
Explanation of 1st example:
Given array is arr[] = {5, 2, 1, 3, 2} and k = 4
Step 1:After reading 5, there is only one element 5 whose frequency is max till now. so print 5.
Step 2:After reading 2, we will have two elements 2 and 5 with same frequency. As 2, is smaller than 5 but their frequency is same so we will print 2 5.
Step 3: After reading 1, we will have 3 elements 1,2 and 5 with same frequency, so print 1 2 5.
Step 4: Similarly after reading 3, print 1 2 3 5
Step 5: After reading last element 2, since 2 has already occurred so we have now frequency of 2 as 2. So we keep 2 at the top and then rest of element with same frequency in sorted order. So print, 2 1 3 5.
Below is the step by step algorithm to do this:
Iterate through the array which contains stream of numbers.
To keep track of top k elements, make a top vector of size k+1.
For every element in the stream increase its frequency and store it in the last position of top vector. We can use hashing for efficiently fetching frequency of an element and increasing it.
Now find the position of element in top vector and iterate from that position to zero. For finding position we can make use of the find() function in C++ STL, it returns an iterator pointing to element if found in the vector.
And make that list of k+1 numbers sorted according to frequency and their value.
Print top k elements form top vector.
Repeat the above steps for every element in the stream.
Below is the implementation of above idea:
// C++ program to find top k elements in a stream
#include <bits/stdc++.h>
using namespace std;
// Function to print top k numbers
void kTop(int a[], int n, int k)
{
// vector of size k+1 to store elements
vector<int> top(k + 1);
// array to keep track of frequency
unordered_map<int, int> freq;
// iterate till the end of stream
for (int m = 0; m < n; m++)
{
// increase the frequency
freq[a[m]]++;
// store that element in top vector
top[k] = a[m];
// search in top vector for same element
auto it = find(top.begin(), top.end() - 1, a[m]);
// iterate from the position of element to zero
for (int i = distance(top.begin(), it) - 1; i >= 0; --i)
{
// compare the frequency and swap if higher
// frequency element is stored next to it
if (freq[top[i]] < freq[top[i + 1]])
swap(top[i], top[i + 1]);
// if frequency is same compare the elements
// and swap if next element is high
else if ((freq[top[i]] == freq[top[i + 1]])
&& (top[i] > top[i + 1]))
swap(top[i], top[i + 1]);
else
break;
}
// print top k elements
for (int i = 0; i < k && top[i] != 0; ++i)
cout << top[i] << ' ';
}
cout << endl;
}
// Driver program to test above function
int main()
{
int k = 4;
int arr[] = { 5, 2, 1, 3, 2 };
int n = sizeof(arr)/sizeof(arr[0]);
kTop(arr, n, k);
return 0;
}
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Time Complexity: O( n * k )
import java.io.*;
import java.util.*;
class GFG {
// function to search in top vector for element
static int find(int[] arr, int ele)
{
for (int i = 0; i < arr.length; i++)
if (arr[i] == ele)
return i;
return -1;
}
// Function to print top k numbers
static void kTop(int[] a, int n, int k)
{
// vector of size k+1 to store elements
int[] top = new int[k + 1];
// array to keep track of frequency
HashMap<Integer, Integer> freq = new HashMap<>();
for (int i = 0; i < k + 1; i++)
freq.put(i, 0);
// iterate till the end of stream
for (int m = 0; m < n; m++)
{
// increase the frequency
if (freq.containsKey(a[m]))
freq.put(a[m], freq.get(a[m]) + 1);
else
freq.put(a[m], 1);
// store that element in top vector
top[k] = a[m];
// search in top vector for same element
int i = find(top, a[m]);
i -= 1;
// iterate from the position of element to zero
while (i >= 0)
{
// compare the frequency and swap if higher
// frequency element is stored next to it
if (freq.get(top[i]) < freq.get(top[i + 1]))
{
int temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
// if frequency is same compare the elements
// and swap if next element is high
else if ((freq.get(top[i]) == freq.get(top[i + 1])) &&
(top[i] > top[i + 1]))
{
int temp = top[i];
top[i] = top[i + 1];
top[i + 1] = temp;
}
else
break;
i -= 1;
}
// print top k elements
for (int j = 0; j < k && top[j] != 0; ++j)
System.out.print(top[j] + " ");
}
System.out.println();
}
// Driver program to test above function
public static void main(String args[])
{
int k = 4;
int[] arr = { 5, 2, 1, 3, 2 };
int n = arr.length;
kTop(arr, n, k);
}
}
// This code is contributed by rachana soma
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Time Complexity: O( n * k )
# Python program to find top k elements in a stream
# Function to print top k numbers
def kTop(a, n, k):
# list of size k+1 to store elements
top = [0 for i in range(k + 1)]
# dictionary to keep track of frequency
freq = {i:0 for i in range(k + 1)}
# iterate till the end of stream
for m in range(n):
# increase the frequency
if a[m] in freq.keys():
freq[a[m]] += 1
else:
freq[a[m]] = 1
# store that element in top vector
top[k] = a[m]
i = top.index(a[m])
i -= 1
while i >= 0:
# compare the frequency and swap if higher
# frequency element is stored next to it
if (freq[top[i]] < freq[top[i + 1]]):
t = top[i]
top[i] = top[i + 1]
top[i + 1] = t
# if frequency is same compare the elements
# and swap if next element is high
elif ((freq[top[i]] == freq[top[i + 1]]) and (top[i] > top[i + 1])):
t = top[i]
top[i] = top[i + 1]
top[i + 1] = t
else:
break
i -= 1
# print top k elements
i = 0
while i < k and top[i] != 0:
print top[i],
i += 1
# Driver program to test above function
k = 4
arr = [ 5, 2, 1, 3, 2 ]
n = len(arr)
kTop(arr, n, k)
# This code is contributed by Sachin Bisht
Output:
5 2 5 1 2 5 1 2 3 5 2 1 3 5
Time Complexity: O( n * k )
Comments
Post a Comment