Top K Frequent Words

Difficulty Level Medium
Frequently asked in Accolite Fourkites Infosys
Hashing Heap TrieViews 2618

In top K frequent words problem, we have given a list of words and an integer k. Print k most frequently used strings in the list.

Top K Frequent Words

Example

Input :

list = {“code”, “sky”, “pen”, “sky”, “sky”, “blue”, “code”}

k = 2

Output : 

sky

code

Input :

list = {“yes”, “no”, “yes”, “yes”}

k = 1

Output : 

yes

Explanation for Top K Frequent Words

Let the given list = {“cpp”, “java”, “java”, “cpp”, “python”, “java”, “cpp”, “kotlin”, “kotlin”, “java”} and k = 3

Total number of occurrence of each word –

  • cpp occured 3 times in the given list.
  • java occured 4 times in the given list.
  • python occured 1 time in the given list.
  • kotlin occured 2 times in the given list.

Decreasing order of the number of occurrence of each word –

java, cpp, kotlin, python

Therefore, the top k (i.e. 3) frequently used words in the given list are –

  1. java
  2. cpp
  3. kotlin

Algorithm for Top K Frequent Words

  1. Initialize a list of words and an integer k.
  2. Initialize a map and a priority queue.
  3. Traverse the list and increment map[list[i]]].
  4. Traverse the map and check if the size of the priority queue is less than k push the current element in the queue.
  5. Else if a top element in the queue is less than the current element removes the top element and inserts the current element in the queue.
  6. Else if the top element in the queue is equal to the current element and key of the top element is greater than the key of current element remove the top element and insert the current element in the queue.
  7. Create an empty list.
  8. Traverse the queue and store the top element in it in a temporary variable, delete the top element and store it’s key in the new list.
  9. Reverse the new list and return it.

C++ Program to find top k frequent words

#include <bits/stdc++.h>
using namespace std;

void frequent(vector<auto> v){
   for(int i = 0; i<v.size(); i++){
      cout<<v[i]<< endl;
   }
}

struct Comparator{
    bool operator()(pair <string ,int> a, pair <string, int> b){
        if(a.second != b.second) 
            return !(a.second < b.second);
        return !(a.first > b.first);
    }
};

class Solution{
    
    public:
        static bool cmp(pair <string, int> a, pair <string, int> b){
            if(a.second != b.second) 
                return a.second > b.second;
            return a.first < b.first;
        }
        
        vector<string> topFrequent(vector<string>& words, int k){
            map<string, int> m;
            
            priority_queue < pair <string, int>, vector < pair <string, int> >, Comparator > v;
            
            for(int i = 0; i < words.size(); i++){
                m[words[i]]++;
            }
            
            map<string, int> :: iterator i = m.begin();
            
            while(i != m.end()){
                if(v.size() < k){
                    v.push(*i);
                }
                else if(v.top().second < i->second){
                    v.pop();
                    v.push(*i);
                }
                else if(v.top().second == i->second && v.top().first > i->first){
                    v.pop();
                    v.push(*i);
                }
                i++;
            }
            
            vector <string> res;
            
            while(!v.empty()){
                pair <string, int> temp = v.top();
                v.pop();
                res.push_back(temp.first);
            }
            
            reverse(res.begin(), res.end());
            return res;
        }
};

int main(){
    
   Solution s;
   
   vector<string> v = {"code", "sky", "pen", "sky", "sky", "blue", "code"};
   int k = 2;
   frequent(s.topFrequent(v, k));
   
   return 0;
   
}
sky
code

Java Program to find top k frequent words

import java.util.*;

class text{
    public List<String> topKFrequentAlternate(final String[] words, int k) {
    	final Map<String, Integer> freq = new HashMap<>();
    	
    	final Queue<WordFreq> queue = new PriorityQueue<>((w1, w2) -> {
    		if (w1.getFreq() != w2.getFreq()) {
    			return w1.getFreq() - w2.getFreq();
    		}
    		return w2.getWord().compareTo(w1.getWord());
    	});
    	
    	final List<String> result = new ArrayList<>();
    
    	for (final String word : words) {
    		if (freq.containsKey(word)) {
    			final int count = freq.get(word);
    			freq.put(word, count + 1);
    		} else {
    			freq.put(word, 1);
    		}
    	}
    
    	for (final Map.Entry<String, Integer> entry : freq.entrySet()) {
    		queue.offer(new WordFreq(entry.getKey(), entry.getValue()));
    
    		if (queue.size() > k) {
    			queue.poll();
    		}
    	}
    
    	while (k-- > 0) {
    		result.add(queue.poll().getWord());
    	}
    
    	Collections.reverse(result);
    	return result;
    }
}
class WordFreq {
  private final String word;
  private final int freq;

  WordFreq(final String word, final int freq) {
    this.word = word;
    this.freq = freq;
  }

  String getWord() {
    return this.word;
  }

  int getFreq() {
    return this.freq;
  }

  @Override
  public String toString() {
    return Objects.toString(word) + "->" + Objects.toString(freq);
  }
  
  public static void main (String[] args){
      text t = new text();
      String[] words = {"code", "sky", "pen", "sky", "sky", "blue", "code"};
      int k = 2;
      List<String> res = new ArrayList<String>();
      res = t.topKFrequentAlternate(words,k);
        ListIterator<String> lItr = res.listIterator();
    while (lItr.hasNext()){
      System.out.println(lItr.next());
    }
  }
}
sky
code

References

Translate »