YO! Faux, Bro! No,No,No!

I searched Google to translate this and I got :

was searching Google for the English translation of this Hindi poem “with time” little did I know it was actually sarcastic code.. well I had a suspicion.. MFs!


IMG_7372.MOV

from google.colab import drive import pandas as pd import numpy as np from numpy import array from numpy import asarray from numpy import zeros import nltk from nltk.corpus import stopwords import re import string from itertools import groupby from collections import Counter import matplotlib.pyplot as plt from scipy.sparse import hstack from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score from sklearn.metrics import recall_score, f1_score from sklearn.model_selection import train_test_split from sklearn.utils import shuffle from fuzzywuzzy import process

In [ ]:

pip install fuzzywuzzy

( FUZZY WUZZY WAS A “BEAR” wasn’t he? Stupid mfs !!! all year long!!! I have been plagued by the hackers (and my roommate) calling me black bear and making reference to it constantly . I knew it was something ,. But not being technologically advanced, I didn’t know what ! Do you know how hard it is to look someone in the face whom you know is lying to you and talking shit and making fun of you .. but you can’t prove it ( other than gut intuition) and you have to respond like the dumb twat they think you are .. and continue being nice and in the dark but no really .. I want to kill them ) and they keep doing it constantly degrading you’re very self-esteem and not only them but everybody you know until that’s happened to you. You’ve never walked a mile in my shoes.

Collecting fuzzywuzzy Downloading <https://files.pythonhosted.org/packages/43/ff/74f23998ad2f93b945c0309f825be92e04e0348e062026998b5eefef4c33/fuzzywuzzy-0.18.0-py2.py3-none-any.whl> Installing collected packages: fuzzywuzzy Successfully installed fuzzywuzzy-0.18.0

In [ ]:

drive.mount(‘/content/drive’)

`Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&response_type=code&scope=email https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly

Enter your authorization code: ·········· Mounted at /content/drive`

In [ ]:

# sarcasm datawith open(‘/content/drive/My Drive/Data Files/code-mixed analysis data/Sarcasm_tweets.txt’) as f: lines = [line.rstrip() for line in f]

new_lines = [] for line in lines: if line == ”: continue new_lines.append(line)

tweet_ids = [] tweets = []

for i in range(len(new_lines)): if i%2 == 0: tweet_ids.append(new_lines[i]) else: tweets.append(new_lines[i])

# annotationswith open(‘/content/drive/My Drive/Data Files/code-mixed analysis data/Sarcasm_tweet_truth.txt’) as f1: lines1 = [line.rstrip() for line in f1]

labels = []

for i in range(len(lines1)): if i%2 != 0: labels.append(lines1[i])

# tweets with language f2 = open(‘/content/drive/My Drive/Data Files/code-mixed analysis data/Sarcasm_tweets_with_language.txt’, ‘r’) tokens_list = [] tokens = [] languages_list = [] languages = [] for line in f2: line = line.strip() line = line.split(‘ ‘) line = [token.strip() for token in line if token != ” and token != ‘ ‘ and token != ‘\n’] if len(line) == 0: tokens_list.append(tokens) languages_list.append(languages) tokens = [] languages = [] elif len(line) == 1: continue else: tokens.append(line[0]) languages.append(line[1])

tokens_list.append(tokens) languages_list.append(languages)

In [ ]:

df = pd.DataFrame(data=tweet_ids, columns=[‘Tweet ID’]) df[‘Tweet’] = tweets df[‘Label’] = labels df[‘Tokens’] = tokens_list df[‘Languages’] = languages_list df

Out[ ]:

5250 rows × 5 columns

In [ ]:

np.random.seed(10) df_y = df[df.Label ==”YES”] df_n = df[df.Label == “NO”] drop_indices = np.random.choice(df_n.index, 4000, replace=False) df_subset_n = df_n.drop(drop_indices) frames = [df_y , df_subset_n] df = pd.concat(frames, ignore_index = True) df

Out[ ]:

1250 rows × 5 columns

In [ ]:

# preprocessing# emoticon analysis all_emoticons = [ { “icons”: [“:-)”, “:)”, “:-]”, “:]”, “:-3”, “:3”, “:->”, “:>”, “8-)”, “8)”, “:-}”, “:}”, “:o)”, “:c)”, “:^)”, “=]”, “=)”], “keywords”: [“happy”, “smile”, “face”] }, { “icons”: [“:-D”, “:D”, “8-D”, “8D”, “x-D”, “xD”, “X-D”, “XD”, “=D”, “=3”, “B^D”], “keywords”: [“laugh”, “grin”, “wide-eyed”, “surprise”, “face”] }, { “icons”: [“:-))”], “keywords”: [“very”, “happy”, “double”, “chin”] }, { “icons”: [“:-(“, “:(“, “:-c”, “:c”, “:-<“, “:<“, “:-[“, “:[“, “:-||”, “>:[“, “:{“, “:@”, “>:(“, “;(“], “keywords”: [“frown”, “sad”, “angry”, “pout”] }, { “icons”: [“:’-(“, “:'(“, “:,(“, ‘:”(‘, “:((“, “:\'(“, “:\“], “keywords”: [“tears”, “cry”, “sad”] }, { “icons”: [“:’-)”, “:’)”], “keywords”: [“tears”, “happy”] }, { “icons”: [“D-‘:”, “D:<“, “D:”, “D;”, “D=”, “DX”], “keywords”: [“horror”, “disgust”, “sad”, “dismay”] }, { “icons”: [“:-“, “:“, “:x”], “keywords”: [“kiss”] }, { “icons”: [“:-O”, “:O”, “:-o”, “:o”, “:-0”, “8-0”, “>:0”], “keywords”: [“surprise”, “shock”] }, { “icons”: [“;-)”, “;)”, “-)”, “*)”, “;-]”, “;]”, “;-D”, “;D”], “keywords”: [“wink”, “smile”] }, { “icons”: [“:-P”, “:P”, “X-P”, “XP”, “x-p”, “xp”, “:-p”, “:p”, “=p”, “>:P”], “keywords”: [“tongue”, “stick”, “out”, “cheeky”, “playful”] }, { “icons”: [“:-|”, “:|”], “keywords”: [“straight”, “face”, “expressionless”] }, { “icons”:[“:-/”, “:/”, “>:/”, “:\’”, “>:\’”, “=/”, “=\’”], “keywords”: [“skeptical”, “annoy”, “uneasy”] }, { “icons”: [“(:”, “(-:”], “keywords”: [“sarcasm”, “irony”] }, { “icons”: [“<3”], “keywords”: [“love”, “heart”] }, { “icons”: [“3”], “keywords”: [“broken”, “heart”] } ]

def get_emoticons(tokens): emoticons = []for token in tokens: for i in range(len(all_emoticons)): if token in all_emoticons[i]['icons']: emoticons.append(token) return emoticons

def get_keywords(emoticons): emoticons = list(set(emoticons)) keywords = []for emoticon in emoticons: for i in range(len(all_emoticons)): if emoticon in all_emoticons[i]['icons']: keywords.append(all_emoticons[i]['keywords']) return keywords

df[‘Emoticons’] = df[‘Tokens’].apply(lambda x: get_emoticons(x)) df[‘Keywords’] = df[‘Emoticons’].apply(lambda x: get_keywords(x))

In [ ]:

pip install wordninja

Collecting wordninja Downloading <https://files.pythonhosted.org/packages/30/15/abe4af50f4be92b60c25e43c1c64d08453b51e46c32981d80b3aebec0260/wordninja-2.0.0.tar.gz> (541kB) |████████████████████████████████| 542kB 3.3MB/s Building wheels for collected packages: wordninja Building wheel for wordninja (setup.py) ... done Created wheel for wordninja: filename=wordninja-2.0.0-cp36-none-any.whl size=541552 sha256=e9a3bdd1bf5a646c23085f21d6486664a00ad5e839bad2bc3d58948d83b283c0 Stored in directory: /root/.cache/pip/wheels/22/46/06/9b6d10ed02c85e93c3bb33ac50e2d368b2586248f192a2e22a Successfully built wordninja Installing collected packages: wordninja Successfully installed wordninja-2.0.0

In [ ]:

# hashtag analysis# complete words list words_list = []

with open(‘/content/drive/My Drive/Data Files/code-mixed analysis data/words.txt’) as f3: for line in f3: line = line.strip() words_list.append(line)

# this ensures we ony deal with full words rather than each individual letter. Normalize the words basicallydef words(text): return re.findall(‘[a-z]+’, text.lower())

# calculate the probability of a word based on occurrences in the dictionarydef word_prob(word): return dictionary[word] / total

# this gets us a hash where the keys are words and the values are the number of ocurrances in the dictionary dictionary = Counter(words(open(‘/content/drive/My Drive/Data Files/code-mixed analysis data/words_alpha.txt’).read())) # dictionary = dict((w, len(list(ws))) # for w, ws in groupby(sorted(words(open(‘/content/drive/My Drive/Data Files/sarcasm detection data/big.txt’).read()))))# assign the length of the longest word in the dictionary max_word_length = max(map(len, dictionary))

# assign the total number of words in the dictionary. It’s a float because we’re going to divide by it later on total = float(sum(dictionary.values()))

In [ ]:

import wordninja

# case 1, when each word in a hashtag starts by an uppercase letterdef hashtag_analysis1(hashtag): hashtag = re.findall(‘^[a-z]+|[A-Z][^A-Z]*’, hashtag) hashtag = [tag.lower() for tag in hashtag] hashtag = ” “.join(hashtag) return hashtag

# case 2, when the words are separated by special charactersdef hashtag_analysis2(hashtag): hashtag = re.split(r”[^a-zA-Z0-9\s]”, hashtag) hashtag = [tag.lower() for tag in hashtag] hashtag = ” “.join(hashtag) return hashtag

*# case 3, when each word starts by a lowercase letter, separate the hashtag in the fewest # possible number of words, from left to right using the english words corpus (eng_words)# hashtag will be a compound word such as ‘wickedweather’*def hashtag_analysis3(hashtag): if hashtag == ‘irony’ or hashtag == ‘sarcasm’: return hashtagprobs, lasts = [1.0], [0] *# iterate over the letters in the compound# eg. [w, ickedweather], [wi, ckedweather], and so on* probs, lasts = [1.0], [0] for i in range(1, len(hashtag) + 1): prob_k, k = max((probs[j] * word_prob(hashtag[j:i]), j) for j in range(max(0, i - max_word_length), i)) probs.append(prob_k) lasts.append(k) words = [] i = len(hashtag) while 0 < i: words.append(hashtag[lasts[i]:i]) i = lasts[i] words.reverse() words = " ".join(words) return words

# case 3 analysis, but using wordninjadef hashtag_analysis3_wordninja(hashtag): hashtag = wordninja.split(hashtag) hashtag = ” “.join(hashtag) return hashtag

# get hashtags from tokensdef get_hashtags(tokens): hashtags = [] for token in tokens: if token[0] == ‘#’: hashtags.append(token) return hashtags

# hashtag analysisdef hashtag_analysis(hashtags_list): final_tags = [] for hashtags in hashtags_list: tags = [] for hashtag in hashtags: hashtag = hashtag[1:] if ‘_’ in hashtag or ‘-‘ in hashtag: tags.append(hashtag_analysis2(hashtag)) elif hashtag.islower() == False: tags.append(hashtag_analysis1(hashtag)) else: tags.append(hashtag_analysis3_wordninja(hashtag)) final_tags.append(tags) return final_tags

hashtags_list = df[‘Tokens’].apply(lambda x: get_hashtags(x)) df[‘Hashtags’] = hashtag_analysis(hashtags_list)

In [ ]:

# get mentionsdef get_mentions(tokens): mentions = [] for token in tokens: if token[0] == ‘@’: mentions.append(token)return mentions

# get urlsdef get_urls(tweet): url_regex = [r’http[s]?://(?:[a-z]|[0-9]|[$-_@.&+]|[!*,]|(?:%[0-9a-f][0-9a-f]))+’] url_re = re.compile(r'(‘+’|’.join(url_regex)+’)’, re.VERBOSE | re.IGNORECASE) urls = url_re.findall(tweet) return urls

mentions_list = df[‘Tokens’].apply(lambda x: get_mentions(x)) urls_list = df[‘Tweet’].apply(lambda x: get_urls(x))

In [ ]:

nltk.download(‘stopwords’)

[nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Unzipping corpora/stopwords.zip.

Out[ ]:

True

In [ ]:

# preprocess tweet, remove hashtags, emoticons, mentions, urls, punctuations, stopwords stop_words = set(stopwords.words(‘english’))

def preprocess_tweet(tokens): preprocessed_tokens = []for i in range(len(tokens)): if tokens[i][0] == '#': continue elif tokens[i][0] == '@': continue elif 'pic.twitter.com' in tokens[i] or 'http' in tokens[i]: continue elif tokens[i] in df['Emoticons'][i]: continue elif tokens[i][0] in string.punctuation: continue elif tokens[i] in stop_words: continue elif tokens[i] == 'RT': continue preprocessed_tokens.append(tokens[i].lower()) return preprocessed_tokens

# add hashtags except irony, sarcasmdef add_decomposed_hashtags(tweets, hashtags_list): for i in range(len(hashtags_list)): for hashtag in hashtags_list[i]: if hashtag == ‘irony’ or hashtag == ‘sarcasm’: continue tweets[i] = tweets[i] + ” ” + hashtag tweets[i] = tweets[i].strip()return tweets

df[‘Preprocessed Tokens’] = df[‘Tokens’].apply(lambda x: preprocess_tweet(x)) df[‘Preprocessed Tweets’] = [” “.join(tokens) for tokens in df[‘Preprocessed Tokens’]] df[‘Preprocessed Tweets’] = add_decomposed_hashtags(df[‘Preprocessed Tweets’], df[‘Hashtags’]) df[‘Preprocessed Tokens’] = ]

In [ ]:

*# encoding, 0 for ‘NO’ and 1 for ‘YES’*def class_parse(): class_types = [] for i in range(df.shape[0]): if df[‘Label’][i] == “NO”: class_types.append(0) else: class_types.append(1)return class_types

df[‘Class Type’] = class_parse()

In [ ]:

df

Out[ ]:

1250 rows × 11 columns

In [ ]:

tweet_class = df[‘Label’].tolist() print(len(tweet_class)) print(tweet_class.count(‘NO’)) print(tweet_class.count(‘YES’))

1250 746 504

In [ ]:

# bar representation objects = (‘Non Sarcastic’, ‘Sarcastic’) y_pos = np.arange(len(objects)) performance = [tweet_class.count(‘NO’), tweet_class.count(‘YES’)]

plt.bar(y_pos, performance, align=’center’, alpha=0.5) plt.xticks(y_pos, objects) plt.ylabel(‘Count’) plt.title(‘Classes’)

plt.show()



In [ ]:

# feature vector# 1. char n-grams (1-3)# 2. word n-grams (1-2)# 3. punctuations (count of each punctuation mark)# 4. laugh words (“lmao”, “lol”, “haha”, “rofl”, “lel”, “hehehe”, “lolol”)# 5. intensifiers (number of intensifiers in a tweet)# 6. negation words (number of negation words in a tweet)# 7. (i) number of characters present in the tweet (ii) number of words in the tweet # (iii) average word length in the tweet# 8. hashtags -> (split text added to preprocessed token list)# 9. emoticons (2 ways): (i) replace emoticons with respective keywords. (ii) build emoticon feature vector

In [ ]:

# emoticon feature vector. presence or absence of various emoticons in the tweet

all_emoticons1 = \ [ ‘:-)’, ‘:)’, ‘(:’, ‘(-:’,\ ‘:-D’, ‘:D’, ‘X-D’, ‘XD’, ‘xD’,\ ‘<3’, ‘:\*’,\ ‘;-)’, ‘;)’, ‘;-D’, ‘;D’, ‘(;’, ‘(-;’,\ ‘:-(‘, ‘:(‘,\ ‘:,(‘, ‘:\'(‘, ‘:”(‘, ‘:((‘,\ ‘:-P’, ‘:P’, ‘:p’, ‘:-p’,\ ]

def add_emoticon_features(): emoticon_feature_vector = [] for i in range(len(all_emoticons1)): temp_vector = [] for emoticons in df[‘Emoticons’]: if all_emoticons1[i] in emoticons: temp_vector.append(1) else: temp_vector.append(0) emoticon_feature_vector.append(temp_vector) return emoticon_feature_vector

emoticon_feature_vector = add_emoticon_features() emoticon_feature_vector = np.array(np.transpose(emoticon_feature_vector)) emoticon_feature_vector.shape

Out[ ]:

(1250, 27)

In [ ]:

from sklearn.feature_extraction.text import TfidfVectorizer

all_tokens = df[‘Preprocessed Tokens’] all_tweets = df[‘Preprocessed Tweets’]

# char n grams char_vectorizer = TfidfVectorizer( sublinear_tf=True, strip_accents=’unicode’, analyzer=’char’, stop_words=’english’, ngram_range=(1,3), max_features=500) char_vectorizer.fit(all_tweets) char_features = char_vectorizer.transform(all_tweets)

# word n grams word_vectorizer = TfidfVectorizer( sublinear_tf=True, strip_accents=’unicode’, analyzer=’word’, token_pattern=r’\w{1,}’, stop_words=’english’, ngram_range=(1, 3), max_features=500) word_vectorizer.fit(all_tweets) word_features = word_vectorizer.transform(all_tweets)

char_features = char_features.todense() word_features = word_features.todense()

print(char_features.shape, word_features.shape)

/usr/local/lib/python3.6/dist-packages/sklearn/feature_extraction/text.py:520: UserWarning: The parameter 'stop_words' will not be used since 'analyzer' != 'word' warnings.warn("The parameter 'stop_words' will not be used"

(1250, 500) (1250, 500)

In [ ]:

n_for_char = 3 n_for_word = 2

# get character n-grams (n=1-3) for a tweetdef get_char_n_grams(tweet): char_n_grams = [] for i in range(1, n_for_char + 1): char_i_grams = for j in range(len(tweet) – (i-1))] char_n_grams.extend(char_i_grams) return char_n_grams

# get word n-grams (n=1-2) for a tweetdef get_word_n_grams(tokens): word_n_grams = [] for i in range(1, n_for_word + 1): word_i_grams = [” “.join(tokens[j:j+i]) for j in range(len(tokens) – (i-1))] word_n_grams.extend(word_i_grams) return word_n_grams

In [ ]:

# all character n-grams for values of n ranging from 1 to 3. consider only those n-grams which occur at least 10# times in the dataset char_n_grams_index = {} word_n_grams_index = {}

def get_all_char_n_grams(all_tweets): char_n_grams = [] n_grams_count = {} for tweet in all_tweets: char_i_grams = get_char_n_grams(tweet) for i_gram in char_i_grams: if i_gram in n_grams_count: n_grams_count[i_gram] += 1 else: n_grams_count[i_gram] = 1for i_gram, count in n_grams_count.items(): if count >= 10: char_n_grams.append(i_gram) return char_n_grams

# all word n-grams for values of n ranging from 1 to 2. consider only those n-grams which occur at least 10# times in the datasetdef get_all_word_n_grams(all_tokens): word_n_grams = [] n_grams_count = {} for tokens in all_tokens: word_i_grams = get_word_n_grams(tokens) for i_gram in word_i_grams: if i_gram in n_grams_count: n_grams_count[i_gram] += 1 else: n_grams_count[i_gram] = 1 for i_gram, count in n_grams_count.items(): if count >= 10: word_n_grams.append(i_gram) return word_n_grams

all_char_n_grams = list(set(get_all_char_n_grams(all_tweets))) all_word_n_grams = list(set(get_all_word_n_grams(all_tokens))) print(len(all_char_n_grams)) print(len(all_word_n_grams))

2124 354

In [ ]:

# char n gram feature vectordef add_char_n_gram_features(): char_feature_vector = [] for char_i_gram in all_char_n_grams: temp_vector = [] for tweet in all_tweets: char_n_grams = get_char_n_grams(tweet) if char_i_gram in char_n_grams: temp_vector.append(1) else: temp_vector.append(0) char_feature_vector.append(temp_vector) return char_feature_vector

char_feature_vector = add_char_n_gram_features() char_feature_vector = np.array(np.transpose(char_feature_vector)) print(char_feature_vector.shape)

(1250, 2124)

In [ ]:

# word n gram feature vectordef add_word_n_gram_features(): word_feature_vector = [] for word_i_gram in all_word_n_grams: temp_vector = [] for tokens in all_tokens: word_n_grams = get_word_n_grams(tokens) if word_i_gram in word_n_grams: temp_vector.append(1) else: temp_vector.append(0) word_feature_vector.append(temp_vector) return word_feature_vector

word_feature_vector = add_word_n_gram_features() word_feature_vector = np.array(np.transpose(word_feature_vector)) print(word_feature_vector.shape)

(1250, 354)

In [ ]:

feat_df = pd.DataFrame(data=np.concatenate((char_feature_vector, word_feature_vector), axis=1)) feat_df.shape

Out[ ]:

(1250, 2478)

In [ ]:

feat_df.to_csv(‘/content/drive/My Drive/Data Files/code-mixed analysis data/features.csv’, sep=’,’)

In [ ]:

feat_df = pd.read_csv(‘/content/drive/My Drive/Data Files/code-mixed analysis data/features.csv’) feat_df.shape

Out[ ]:

(1250, 2479)

In [ ]:

# count occurences of “lol”, “lmao”, “hahaha”, “hehehe” laugh_words = [“lol”, “lolol”, “lololol”, “loll”, “lolll”, “lollll”, “looll”, “loolll”, “lmao”, “lmaoo”, “lmaooo”, “lmaoooo”, “haha”, “hahah”, “hahaha”, “hahahah”, “hahahaha”, “bahaha”, “bahahah”, “bahahaha”, “bwahahaha”, “hehe”, “heheh”, “hehehe”, “heheheh”, “hehehehe”]

def get_laugh_words_count(tokens): count = 0 for token in tokens: if token in laugh_words: count = count + 1return count

laugh_words_count = df[‘Preprocessed Tokens’].apply(lambda x: get_laugh_words_count(x))

In [ ]:

# count occurrences of intensifiers intensifiers = [“amazingly”, “astoundingly”, “awful”, “bare”, “bloddy”, “crazy”, “dead”, “colossally”, “especially”, “excptionally”, “excessively”, “extremely”, “extraodinarily”, “fantastically”, “frightfully”, “fucking”, “fully”, “hella”, “incredibly”, “insanely”, “literally”, “mad”, “mightly”, “most”, “outrageously”, “phenomenally”, “precious”, “quite”, “radically”, “rather”, “real”, “really”, “remarkably”, “ridicously”, “right”, “sick”, “so”, “somewhat”, “strikingly”, “super”, “supremely”, “surpassingly”, “terribly”, “terrifically”, “too”, “totally”, “veritable”, “very”, “wicked”]

# partial matching to be done# wordnet synonymsdef get_intensifiers_count(tokens): count = 0 for token in tokens: if token in intensifiers: count = count + 1 return count

# def get_intensifiers_count(tokens):# count = 0# for token in tokens:# highest = process.extractOne(token, intensifiers)# if highest[1] > 90:# count = count + 1# return count

intensifiers_count = df[‘Preprocessed Tokens’].apply(lambda x: get_intensifiers_count(x))

In [ ]:

# count occurrences of negations negations = [“never”, “no”, “nothing”, “nowhere”, “noone”, “none”, “not”, “havent”,”have not”, “hasnt”,”has not”, “hadnt”,”had not”, “cant”,”cannot”,”couldnt”, “could not”,”shant”,”shall not”, “shouldnt”,”should not”, “wont”,”will not”,”wouldnt”,”would not”, “dont”,”do not”,”doesnt”,”does not”, “didnt”,”did not”, “isnt”,”is not”, “arent”,”are not”, “aint”, “am not”, “may not”, “might not”, “wasnt”,”was not”,”werent”, “were not”]

#contracted formsdef get_negations_count(tokens): count = 0 for token in tokens: if token in negations: count = count + 1return count

negations_count = df[‘Preprocessed Tokens’].apply(lambda x: get_negations_count(x))

In [ ]:

# number of characters present in the tweetdef get_char_count(tokens): tweet = ” “.join(tokens) return len(tweet)

# number of words in a tweetdef get_word_count(tokens): return len(tokens)

# punctuation countdef get_punct_count(tokens): tweet = “” for i in range(len(tokens)): if tokens[i][0] == ‘#’ or tokens[i][0] == ‘@’ or ‘pic.twitter.com‘ in tokens[i] or tokens[i] in df[‘Emoticons’][i]: continue tweet = tweet + ” ” + tokens[i]tweet = tweet.strip() return len("".join(_ for _ in tweet if _ in string.punctuation))

char_count = df[‘Preprocessed Tokens’].apply(lambda x: get_char_count(x)) word_count = df[‘Preprocessed Tokens’].apply(lambda x: get_word_count(x)) word_density = char_count/word_count punct_count = df[‘Tokens’].apply(lambda x: get_punct_count(x))

In [ ]:

features1 = np.concatenate((char_feature_vector, word_feature_vector), axis=1) features2 = np.vstack((laugh_words_count, intensifiers_count, negations_count, char_count, word_count, word_density, punct_count))

In [ ]:

# feature selectionfrom sklearn.feature_selection import SelectKBest, chi2, SelectFromModel, f_classif

def feature_selection(features, y): model = SelectKBest(score_func=chi2, k=500) fit = model.fit(features, y) new_features = fit.transform(features) return features.tolist()

new_features = feature_selection(features, df[‘Tweet Class’])

ML MODELS

In [ ]:

X = features1 y = df[‘Class Type’] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,random_state=1)

In [ ]:

# svcfrom sklearn.svm import SVC

svm = SVC(kernel=’linear’, probability=True) svm.fit(X_train, y_train) y_pred = svm.predict(X_test) svm_probs = svm.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred)) print(len(y_pred))

0.9361702127659575 188

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred, digits=4))

`[[102 6] [ 6 74]] precision recall f1-score support 0 0.9444 0.9444 0.9444 108 1 0.9250 0.9250 0.9250 80 accuracy 0.9362 188

macro avg 0.9347 0.9347 0.9347 188 weighted avg 0.9362 0.9362 0.9362 188`

In [ ]:

# naive bayesfrom sklearn.naive_bayes import GaussianNB

gnb = GaussianNB() gnb.fit(X_train, y_train) y_pred = gnb.predict(X_test) gnb_probs = gnb.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred))

0.8617021276595744

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred, digits=4))

`[[101 7] [ 19 61]] precision recall f1-score support 0 0.8417 0.9352 0.8860 108 1 0.8971 0.7625 0.8243 80 accuracy 0.8617 188

macro avg 0.8694 0.8488 0.8551 188 weighted avg 0.8652 0.8617 0.8597 188`

In [ ]:

# random forestfrom sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, bootstrap = True, max_features = ‘sqrt’) rf.fit(X_train, y_train) y_pred = rf.predict(X_test) # Probabilities for each class rf_probs = rf.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred))

0.9680851063829787

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred))

`[[103 5] [ 1 79]] precision recall f1-score support 0 0.99 0.95 0.97 108 1 0.94 0.99 0.96 80 accuracy 0.97 188

macro avg 0.97 0.97 0.97 188 weighted avg 0.97 0.97 0.97 188`

In [ ]:

# knnfrom sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier() knn.fit(X_train, y_train) y_pred = knn.predict(X_test) print(accuracy_score(y_test,y_pred))

0.925531914893617

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred))

`[[103 5] [ 9 71]] precision recall f1-score support 0 0.92 0.95 0.94 108 1 0.93 0.89 0.91 80 accuracy 0.93 188

macro avg 0.93 0.92 0.92 188 weighted avg 0.93 0.93 0.93 188`

In [ ]:

# multinomial NBfrom sklearn.naive_bayes import MultinomialNB

mnb = MultinomialNB() mnb.fit(X_train, y_train) y_pred = mnb.predict(X_test) mnb_probs = mnb.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred))

0.9574468085106383

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred))

`[[103 5] [ 3 77]] precision recall f1-score support 0 0.97 0.95 0.96 108 1 0.94 0.96 0.95 80 accuracy 0.96 188

macro avg 0.96 0.96 0.96 188 weighted avg 0.96 0.96 0.96 188`

In [ ]:

# logistic regressionfrom sklearn.linear_model import LogisticRegression

logreg = LogisticRegression() logreg.fit(X_train, y_train) y_pred = logreg.predict(X_test) logreg_probs = logreg.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred))

0.9574468085106383

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred, digits=4))

`[[103 5] [ 3 77]] precision recall f1-score support 0 0.9717 0.9537 0.9626 108 1 0.9390 0.9625 0.9506 80 accuracy 0.9574 188

macro avg 0.9554 0.9581 0.9566 188 weighted avg 0.9578 0.9574 0.9575 188`

In [ ]:

# xgboostfrom xgboost import XGBClassifier

xg = XGBClassifier() xg.fit(X_train, y_train) y_pred = xg.predict(X_test) xg_probs = xg.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred))

0.9574468085106383

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred, digits=4))

`[[103 5] [ 3 77]] precision recall f1-score support 0 0.9717 0.9537 0.9626 108 1 0.9390 0.9625 0.9506 80 accuracy 0.9574 188

macro avg 0.9554 0.9581 0.9566 188 weighted avg 0.9578 0.9574 0.9575 188`

In [ ]:

# adaboostfrom sklearn.ensemble import AdaBoostClassifier from sklearn.tree import DecisionTreeClassifier

ada = AdaBoostClassifier( DecisionTreeClassifier(max_depth=1), n_estimators=200 ) ada.fit(X_train, y_train) y_pred = ada.predict(X_test) ada_probs = ada.predict_proba(X_test)[:, 1] print(accuracy_score(y_test,y_pred))

0.9468085106382979

In [ ]:

print(confusion_matrix(y_test,y_pred)) print(classification_report(y_test,y_pred, digits=4))

`[[102 6] [ 4 76]] precision recall f1-score support 0 0.9623 0.9444 0.9533 108 1 0.9268 0.9500 0.9383 80 accuracy 0.9468 188

macro avg 0.9445 0.9472 0.9458 188 weighted avg 0.9472 0.9468 0.9469 188`

DL MODELS

In [ ]:

from tensorflow.python.keras.preprocessing.text import one_hot from tensorflow.python.keras.preprocessing.sequence import pad_sequences from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers.core import Activation, Dropout, Dense from tensorflow.python.keras.layers import Flatten, GlobalMaxPooling1D, Bidirectional from tensorflow.python.keras.layers.recurrent import LSTM from tensorflow.python.keras.layers.embeddings import Embedding from tensorflow.python.keras.preprocessing.text import Tokenizer from tensorflow.python.keras.callbacks import EarlyStopping

from tensorflow.python.keras.layers import Input, CuDNNLSTM, CuDNNGRU, Conv1D from tensorflow.python.keras.layers import GlobalMaxPool1D, GlobalAveragePooling1D from tensorflow.python.keras.layers import Input, Conv2D, MaxPool2D, concatenate from tensorflow.python.keras.layers import Reshape, Concatenate, SpatialDropout1D from tensorflow.python.keras.optimizers import Adam from tensorflow.python.keras.models import Model from tensorflow.python.keras.layers import Layer, InputSpec from tensorflow.python.keras import initializers, regularizers, constraints, optimizers, layers

from tensorflow.python.keras.layers import * from tensorflow.python.keras.models import * from tensorflow.python.keras.initializers import * from tensorflow.python.keras.optimizers import * import tensorflow.python.keras.backend as K from tensorflow.python.keras.callbacks import * import tensorflow as tf

from data_helpers import BPE

In [ ]:

pip install git+git://github.com/qevo/py_data_helper.git

Collecting git+git://github.com/qevo/py_data_helper.git Cloning git://github.com/qevo/py_data_helper.git to /tmp/pip-req-build-aljph9i6 Running command git clone -q git://github.com/qevo/py_data_helper.git /tmp/pip-req-build-aljph9i6 Building wheels for collected packages: data-helper Building wheel for data-helper (setup.py) ... done Created wheel for data-helper: filename=data_helper-0.2.3-cp36-none-any.whl size=6682 sha256=20644c6a90f210ff23b4b8990e3bddea0c1562243d819f27ff94b18718aa7645 Stored in directory: /tmp/pip-ephem-wheel-cache-qu26iw9h/wheels/ac/95/cc/b588c9a7148810267975c68ab54c447dddfdd2ba26b3a1ed01 Successfully built data-helper Installing collected packages: data-helper Successfully installed data-helper-0.2.3

In [ ]:

X = df[‘Preprocessed Tweets’] y = df[‘Class Type’] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15,random_state=1)

Word Embeddings

In [ ]:

# tokenizing using Keras Tokenizer num_words = 30000 tokenizer = Tokenizer(num_words=num_words, lower=True, split=’ ‘) tokenizer.fit_on_texts(X_train)

X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test)

word_index = tokenizer.word_index print(‘Found %s unique tokens.’ % len(word_index)) print(word_index)

Found 13430 unique tokens. {'hai': 1, 'ki': 2, 'ko': 3, 'politics': 4, 'ke': 5, 'talaq': 6, 'bhi': 7, 'se': 8, 'triple': 9, 'cricket': 10, 'ka': 11, 'aur': 12, 'ho': 13, 'nahi': 14, 'k': 15, 'ye': 16, 'hi': 17, 'bollywood': 18, 'kar': 19, 'hain': 20, 'aap': 21, 'kya': 22, 'liye': 23, 'kuch': 24, 'h': 25, 'par': 26, 'koi': 27, 'jo': 28, 'na': 29, 'ek': 30, 'bhai': 31, 'ab': 32, 'log': 33, 'ne': 34, 'rahe': 35, 'mein': 36, 'pe': 37, 'aaj': 38, 'desh': 39, 'baat': 40, 'raha': 41, 'nhi': 42, 'ji': 43, 'sab': 44, 'tha': 45, 'main': 46, 'toh': 47, 'wo': 48, 'sir': 49, 'i': 50, 'gaya': 51, 'hota': 52, 'jab': 53, 'kiya': 54, 'apni': 55, 'tum': 56, 'karte': 57, 'a': 58, 'b': 59, 'mai': 60, 'rahi': 61, 'abhi': 62, 'kabhi': 63, 'apne': 64, 'karne': 65, 'chahiye': 66, 'pakistan': 67, 'india': 68, 's': 69, 'modi': 70, 'sirf': 71, 'tu': 72, 'muslim': 73, 'jaise': 74, 'hoga': 75, 'kisi': 76, 'diya': 77, 'tak': 78, 'hum': 79, 'n': 80, 'ya': 81, 'aa': 82, 'karo': 83, 'kaam': 84, 'de': 85, 'agar': 86, 'din': 87, 'ban': 88, 'yeh': 89, 'naam': 90, 'wale': 91, 'kr': 92, 'hua': 93, 'gaye': 94, 'har': 95, 'ram': 96, 'halala': 97, 'woh': 98, 'khan': 99, 'logo': 100, 'nai': 101, 'news': 102, 'karna': 103, 'sahi': 104, 'vote': 105, 'hoti': 106, 'bahut': 107, 'p': 108, 'team': 109, 'thi': 110, 'kam': 111, 'ghar': 112, 'hindu': 113, 'pata': 114, 'ha': 115, 'kyu': 116, 'dekh': 117, 'mujhe': 118, 'baad': 119, 'phir': 120, 'khel': 121, 'v': 122, 'ja': 123, 'le': 124, 'khud': 125, '2': 126, 'l': 127, 'bas': 128, 'khatam': 129, 'to': 130, 'sakta': 131, 'fir': 132, 'kaha': 133, 'bjp': 134, 'm': 135, 'sath': 136, 'karta': 137, 'bharat': 138, 'gayi': 139, 'band': 140, 'match': 141, 'hote': 142, 'lagta': 143, 'per': 144, 'hu': 145, 'r': 146, 'mat': 147, 'nahin': 148, 'aise': 149, 'kal': 150, 'jaye': 151, 'u': 152, 'pehle': 153, 'aisa': 154, 'bol': 155, 'me': 156, 'rhe': 157, 'dete': 158, 'hone': 159, 'maa': 160, 'dil': 161, 'yaar': 162, 'e': 163, 'pr': 164, 'lo': 165, 'party': 166, 'ni': 167, 'saath': 168, 'janta': 169, 'lekin': 170, 'jyada': 171, 'congress': 172, 'kare': 173, 'baap': 174, 'q': 175, 'tarah': 176, '1': 177, 'saal': 178, 'kaise': 179, 'hogi': 180, 'jata': 181, 'acha': 182, 'bana': 183, 'rahim': 184, 'bank': 185, 'kuchh': 186, 'c': 187, 'sakte': 188, 'jai': 189, 'jis': 190, 'gya': 191, 'mere': 192, '3': 193, 'waqt': 194, 'baba': 195, 'is': 196, 'apna': 197, 'kab': 198, 'aage': 199, 'chal': 200, 'rha': 201, 'dhoni': 202, 'logon': 203, 'tab': 204, 'mar': 205, 'itna': 206, 'jagah': 207, 'us': 208, 'pak': 209, 'islam': 210, 'baar': 211, 'wala': 212, 'o': 213, 'liya': 214, 'tujhe': 215, 'khush': 216, 'the': 217, 'soch': 218, 'yogi': 219, 'jaan': 220, 'hue': 221, 'wali': 222, 't': 223, 'aata': 224, 'mil': 225, 'world': 226, 'film': 227, 'samajh': 228, 'tere': 229, 'd': 230, 'j': 231, 'indian': 232, 'bad': 233, 'love': 234, 'hui': 235, 'waise': 236, 'teri': 237, 'kumar': 238, 'yaha': 239, 'hona': 240, 'ap': 241, 'aurat': 242, 'mera': 243, 'movie': 244, 'day': 245, 'yahi': 246, 'tumhara': 247, 'wajah': 248, '5': 249, '4': 250, 'meri': 251, 'laga': 252, 'dekho': 253, 'jao': 254, 'dena': 255, 'kaun': 256, 'bihar': 257, 'support': 258, 'dharm': 259, 'duniya': 260, 'hindi': 261, 'g': 262, 'lag': 263, 'yahan': 264, 'gai': 265, 'one': 266, 'galat': 267, 'iss': 268, 'maut': 269, 'aisi': 270, 'walo': 271, 'aaye': 272, 'vo': 273, 'sarkar': 274, 'jayega': 275, 'jati': 276, 'abe': 277, 'itni': 278, 'karke': 279, 'time': 280, 'bhagwan': 281, 'kyun': 282, 'singh': 283, 'use': 284, 'hamare': 285, 'sabhi': 286, 'pakistani': 287, 'w': 288, 'deta': 289, 'yaad': 290, 'bat': 291, 'tumhe': 292, 'nawaz': 293, 'jate': 294, 'sach': 295, 'gandi': 296, 'rhi': 297, 'sahab': 298, 'jaisa': 299, 'ga': 300, 'media': 301, 'gye': 302, 'star': 303, 'chor': 304, 'upar': 305, 'aapne': 306, 'x': 307, 'uske': 308, 'kahi': 309, 'wahi': 310, 'neta': 311, 'mei': 312, 'chutiye': 313, 'gandhi': 314, 'kay': 315, 'women': 316, 'maar': 317, 'hoon': 318, 'dene': 319, 'khushi': 320, 'naa': 321, 'mudda': 322, 'bada': 323, 'pasand': 324, 'baare': 325, 'aati': 326, 'isliye': 327, 'kitne': 328, 'chutiya': 329, 'kapoor': 330, 'jeet': 331, 'wah': 332, 'of': 333, 'matlab': 334, 'fan': 335, 'virat': 336, 'sc': 337, 'aj': 338, 'life': 339, 'dekha': 340, 'case': 341, 'in': 342, 'lage': 343, 'kha': 344, 'say': 345, 'bolo': 346, 'army': 347, 'ghatiya': 348, 'sabse': 349, 'tera': 350, 'un': 351, 'salman': 352, 'bolte': 353, 'bade': 354, 'maine': 355, 'bahar': 356, 'tweet': 357, 'law': 358, 'keh': 359, 'lena': 360, 'dekhte': 361, 'lahore': 362, 'la': 363, 'shuru': 364, 'door': 365, 'rahul': 366, 'like': 367, 'uska': 368, 'shayad': 369, 'cheez': 370, 'badal': 371, 'khilaf': 372, 'mulk': 373, 'unke': 374, 'gyi': 375, 'di': 376, 'najam': 377, 'accha': 378, 'achha': 379, 'bi': 380, 'kyon': 381, '6': 382, 'khatm': 383, 'kanoon': 384, 'problem': 385, 'tumhare': 386, 'zyada': 387, 'kis': 388, 'madam': 389, 'mandir': 390, 'unhe': 391, 'khelne': 392, 'bilkul': 393, 'chalta': 394, 'bar': 395, 'chhod': 396, 'paise': 397, 'zindagi': 398, 'aik': 399, 'court': 400, 'karachi': 401, 'rakha': 402, 'achi': 403, 'shadi': 404, 'haan': 405, 'kitna': 406, 'inko': 407, 'bare': 408, 'unko': 409, 'raees': 410, 'rahega': 411, 'karni': 412, 'allah': 413, 'denge': 414, 'jaate': 415, 'usko': 416, 'tripletalaq': 417, 'rape': 418, 'aapko': 419, 'saare': 420, 'baaki': 421, 'mila': 422, 'dekhne': 423, 'pm': 424, 'karega': 425, 'haq': 426, 'pur': 427, 'inki': 428, 'sare': 429, 'badi': 430, 'kon': 431, 'jaa': 432, 'or': 433, 'kharab': 434, 'umar': 435, 'badh': 436, 'bhaiya': 437, 'aapki': 438, 'kyo': 439, '10': 440, 'afridi': 441, 'best': 442, 'aaya': 443, 'tumhari': 444, 'biwi': 445, 'hay': 446, 'muslims': 447, 'thoda': 448, 'dhyan': 449, 'tabhi': 450, 'dekhna': 451, 'lanka': 452, 'pura': 453, 'politicians': 454, 'thik': 455, 'batao': 456, 'are': 457, 'isse': 458, 'pyar': 459, '20': 460, 'bache': 461, 'lol': 462, 'haar': 463, 'jana': 464, 'nay': 465, 'baki': 466, 'bata': 467, 'alag': 468, 'mata': 469, 'faisla': 470, 'hamari': 471, 'kehte': 472, 'honge': 473, 'akmal': 474, 'bola': 475, 'kia': 476, 'aane': 477, 'bina': 478, 'y': 479, 'captain': 480, 'sar': 481, 'virodh': 482, 'unki': 483, 'nazar': 484, 'kahan': 485, 'shah': 486, 'rakh': 487, 'shame': 488, 'wohi': 489, 'up': 490, 'banane': 491, 'jihad': 492, 'insaan': 493, 'karti': 494, 'khelna': 495, 'jaldi': 496, 'sl': 497, 'dirty': 498, 'sharjeel': 499, 'pagal': 500, 'dv': 501, 'bachcho': 502, 'man': 503, 'gali': 504, 'akhilesh': 505, 'taraf': 506, 'modiji': 507, 'chala': 508, 'bura': 509, 'sharam': 510, 'debate': 511, 'kahin': 512, 'chod': 513, 'he': 514, 'tho': 515, 'insan': 516, 'hein': 517, 'sri': 518, 'dard': 519, 'pahle': 520, 'aam': 521, 'please': 522, 'waale': 523, 'raho': 524, 'khela': 525, 'sub': 526, 'saale': 527, 'ladki': 528, 'rahay': 529, 'sakti': 530, 'ker': 531, 'diye': 532, 'uski': 533, 'triplet': 534, 'raj': 535, 'delhi': 536, 'imran': 537, 'lete': 538, 'ahmed': 539, 'koshish': 540, 'gorakhpur': 541, 'usse': 542, 'bus': 543, 'sharm': 544, 'dusre': 545, 'help': 546, 'magar': 547, 'game': 548, 'kashmir': 549, 'dono': 550, 'sal': 551, 'odi': 552, 'aayi': 553, 'musalman': 554, '30': 555, 'hey': 556, 'hy': 557, 'kiye': 558, 'chaiye': 559, 'mare': 560, 'sharif': 561, 'bhe': 562, 'samaj': 563, 'hame': 564, 'start': 565, 'ham': 566, 'fans': 567, 'marne': 568, 'verdict': 569, 'ache': 570, 'sb': 571, 'dukh': 572, 'aana': 573, 'indvs': 574, 'agr': 575, 'chale': 576, 'puri': 577, 'kro': 578, 'ik': 579, 'family': 580, 'barbad': 581, 'bijapur': 582, 'bulls': 583, 'pta': 584, 'ge': 585, 'hun': 586, 'roti': 587, 'jisko': 588, 'jarurat': 589, 'jee': 590, 'no': 591, 'ise': 592, 'tune': 593, 'unka': 594, 'movies': 595, 'karan': 596, 'kapil': 597, 'shaadi': 598, 'jaisi': 599, 'bhakt': 600, 'kv': 601, 'jail': 602, 'see': 603, 'bole': 604, 'lene': 605, 'khelte': 606, 'nehi': 607, 'bolne': 608, 'beta': 609, 'dikhta': 610, 'yar': 611, 'kohli': 612, 'haal': 613, 'rohit': 614, 'srk': 615, 'chahte': 616, 'gay': 617, 'deti': 618, 'iski': 619, 'hind': 620, 'chahe': 621, 'milega': 622, 'sabko': 623, 'wahan': 624, 'dost': 625, 'jayegi': 626, 'banaya': 627, 'tou': 628, 'poora': 629, 'mumbai': 630, 'china': 631, 'free': 632, 'bc': 633, 'join': 634, 'mile': 635, 'isi': 636, '0': 637, 'lalu': 638, 'krte': 639, 'roshan': 640, 'cinema': 641, 'app': 642, 'azadi': 643, 'suna': 644, 'samjh': 645, 'paisa': 646, 'harami': 647, 'kyunki': 648, 'international': 649, 'teen': 650, 'bandh': 651, 'warna': 652, 'musalmano': 653, 'khuda': 654, 'han': 655, 'vikas': 656, 'public': 657, 'socho': 658, 'behen': 659, 'instant': 660, 'bhar': 661, 'eleven': 662, 'khelta': 663, 'beef': 664, 'haryana': 665, 'isme': 666, 'mauka': 667, 'bacha': 668, 'mahila': 669, 'theek': 670, 'hamesha': 671, 'right': 672, 'line': 673, 'dikh': 674, 'kijiye': 675, 'izzat': 676, 'nikal': 677, 'hrithik': 678, 'samay': 679, 'pad': 680, 'jaati': 681, 'iska': 682, 'rukh': 683, 'king': 684, 'raksha': 685, 'banate': 686, 'name': 687, 'dekhta': 688, 'tv': 689, 'padi': 690, 'paye': 691, 'number': 692, 'channel': 693, 'khelo': 694, 'chakkar': 695, 'dijiye': 696, '9': 697, 'sari': 698, 'awam': 699, 'wicket': 700, 'behan': 701, 'nd': 702, 'isko': 703, 'aya': 704, 'chinta': 705, 'kejriwal': 706, 'bs': 707, 'viru': 708, 'rok': 709, 'dega': 710, 'focus': 711, 'anti': 712, 'bhari': 713, 'aajkal': 714, 'karwa': 715, 'likha': 716, 'president': 717, 'panti': 718, 'inke': 719, 'pyaar': 720, 'paida': 721, 'mana': 722, 'issue': 723, 'bolta': 724, 'dharam': 725, 'sy': 726, 'rajniti': 727, 'pani': 728, 'setthi': 729, 'jahan': 730, 'dalit': 731, 'sachin': 732, 'twitter': 733, 'song': 734, 'jaane': 735, 'sale': 736, 'stadium': 737, 'but': 738, 'govt': 739, 'halat': 740, 'gareeb': 741, 'aapke': 742, '7': 743, '15': 744, 'mahilao': 745, 'sharma': 746, 'lakh': 747, 'wapis': 748, 'jitna': 749, 'dar': 750, 'kee': 751, 'beti': 752, 'yadav': 753, 'krna': 754, 'yehi': 755, 'chup': 756, 'vande': 757, 'shahid': 758, 'jaata': 759, 'bhutto': 760, 'sun': 761, 'raat': 762, 'payega': 763, 'uss': 764, 'kai': 765, 'khilaaf': 766, 'train': 767, 'muh': 768, 'be': 769, 'jane': 770, 'lie': 771, 'astha': 772, 'waha': 773, 'ind': 774, 'bachho': 775, 'aankh': 776, 'gau': 777, 'topic': 778, 'bachche': 779, 'mushkil': 780, 'mulle': 781, 'pappu': 782, 'mard': 783, 'jan': 784, 'batting': 785, 'krne': 786, 'board': 787, 'naya': 788, 'nikah': 789, 'pas': 790, 'aulad': 791, 'need': 792, 'tumko': 793, 'gi': 794, 'phle': 795, 'dikha': 796, 'karen': 797, 'mataram': 798, 'phone': 799, 'ky': 800, 'li': 801, 'khana': 802, 'plz': 803, 'code': 804, 'hm': 805, 'matter': 806, 'award': 807, 'khattar': 808, 'bahot': 809, 'chinese': 810, 'ramrahim': 811, 'dhamki': 812, 'jese': 813, 'video': 814, 'happy': 815, 'bhut': 816, 'satta': 817, 'terrorist': 818, 'new': 819, 'karenge': 820, 'appeasement': 821, 'aate': 822, 'padta': 823, 'you': 824, 'dr': 825, 'daal': 826, 'haha': 827, 'oh': 828, 'baccho': 829, 'drama': 830, 'change': 831, 'bachao': 832, 'paas': 833, 'pass': 834, 'thodi': 835, 'bech': 836, 'chalo': 837, 'kutta': 838, 'films': 839, 'chuka': 840, 'humare': 841, 'admi': 842, 'bhool': 843, 'behtar': 844, 'hero': 845, 'milti': 846, 'dimag': 847, '100': 848, 'shri': 849, 'kahte': 850, 'sala': 851, 'jada': 852, 'andar': 853, 'haath': 854, 'ata': 855, 'aye': 856, 'corruption': 857, 'ta': 858, 'zara': 859, 'maza': 860, 'saaf': 861, 'nafrat': 862, '70': 863, 'itne': 864, 'jin': 865, 'civil': 866, 'mann': 867, 'banao': 868, 'urdu': 869, 'ati': 870, 'khilari': 871, 'polygamy': 872, 'hate': 873, 'ak': 874, 'pure': 875, 'kah': 876, 'hoo': 877, 'may': 878, 'bacche': 879, 'qaum': 880, 'iske': 881, 'reh': 882, 'chalte': 883, 'sad': 884, 'power': 885, 'rehta': 886, 'abey': 887, 'nam': 888, 'bowler': 889, 'actor': 890, 'on': 891, 'hospital': 892, 'inka': 893, 'lekar': 894, 'men': 895, 'hindustan': 896, 'aamir': 897, 'apko': 898, 'karoge': 899, 'phr': 900, 'hn': 901, 'arjun': 902, 'kyuki': 903, 'sarfaraz': 904, 'bete': 905, '11': 906, 'it': 907, 'zinda': 908, 'inhe': 909, 'roz': 910, 'sa': 911, 'lv': 912, 'usme': 913, 'petrol': 914, 'himmat': 915, 'bane': 916, 'saab': 917, 'gae': 918, 'leader': 919, 'pareshan': 920, 'kariye': 921, 'bibi': 922, 'bhaii': 923, 'mamta': 924, 'walon': 925, 'leta': 926, '50': 927, '17': 928, 'jayenge': 929, 'kami': 930, 'bachane': 931, 'akshay': 932, 'dubai': 933, 'choti': 934, 'kewal': 935, 'prem': 936, 'arey': 937, 'aman': 938, 'aadmi': 939, 'jaha': 940, 'chalega': 941, '8': 942, '2019': 943, 'toilet': 944, 'chalti': 945, 'kara': 946, 'apke': 947, 'years': 948, 'independence': 949, 'jisne': 950, 'religion': 951, 'buhat': 952, 'kasam': 953, 'vahi': 954, 'bahan': 955, 'maulana': 956, 'asli': 957, 'cm': 958, 'fast': 959, 'etc': 960, 'achhe': 961, 'birthday': 962, 'honi': 963, 'peeche': 964, 'acchi': 965, 'leke': 966, 'cow': 967, 'over': 968, 'punjab': 969, 'sports': 970, 'chalu': 971, 'dur': 972, 'flop': 973, 'badhai': 974, 'jaana': 975, 'bahal': 976, 'muje': 977, 'hit': 978, 'bacho': 979, 'milta': 980, 'dia': 981, 'crore': 982, 'run': 983, 'real': 984, 'god': 985, 'waah': 986, 'roman': 987, 'rule': 988, 'player': 989, 'haram': 990, 'political': 991, 'rapist': 992, 'future': 993, 'chahta': 994, 'azam': 995, 'malik': 996, 'sohail': 997, 'kash': 998, 'ganesh': 999, 'mulla': 1000, 'sikhaya': 1001, 'aayegi': 1002, 'cup': 1003, 'papa': 1004, 'insaf': 1005, 'suru': 1006, 'rakho': 1007, 'chori': 1008, 'sati': 1009, 'hawa': 1010, 'nh': 1011, 'achcha': 1012, 'galti': 1013, '72': 1014, 'jise': 1015, 'banda': 1016, 'thaa': 1017, 'huye': 1018, 'lagi': 1019, 'aazadi': 1020, 'sethi': 1021, 'babar': 1022, 'samne': 1023, 'shukriya': 1024, 'maan': 1025, 'sake': 1026, 'halalala': 1027, 'zindabad': 1028, 'bhot': 1029, 'tuje': 1030, 'what': 1031, 'krta': 1032, 'baithe': 1033, 'rawalpindi': 1034, 'khail': 1035, 'do': 1036, 'jitni': 1037, 'mudde': 1038, 'saala': 1039, 'akki': 1040, 'es': 1041, 'poori': 1042, 'sawal': 1043, 'usi': 1044, 'uniform': 1045, 'marte': 1046, 'milte': 1047, 'utha': 1048, 'salute': 1049, 'hee': 1050, 'sudhar': 1051, 'samman': 1052, 'nahe': 1053, 'pade': 1054, 'gir': 1055, 'aao': 1056, 'shanti': 1057, 'kamino': 1058, 'alawa': 1059, 'national': 1060, 'dusri': 1061, 'good': 1062, 'players': 1063, 'today': 1064, 'kaho': 1065, 'ummid': 1066, 'yah': 1067, 'sabiq': 1068, 'chuke': 1069, 'padhai': 1070, 'thay': 1071, 'hope': 1072, 'hamara': 1073, 'pate': 1074, 'hindustani': 1075, 'socha': 1076, 'series': 1077, 'bach': 1078, 'pehlay': 1079, 'australia': 1080, 'kursi': 1081, 'yaa': 1082, 'bachpan': 1083, 'kisne': 1084, 'haa': 1085, 'vaha': 1086, '25': 1087, 'saza': 1088, 'kangana': 1089, 'kaafi': 1090, 'hissa': 1091, 'karein': 1092, 'bara': 1093, 'pradesh': 1094, 'lagu': 1095, 'bro': 1096, 'mante': 1097, 'janam': 1098, 'bachhe': 1099, 'sabit': 1100, 'supreme': 1101, 'abb': 1102, 'lenge': 1103, 'banana': 1104, 'jisse': 1105, 'jarur': 1106, 'jaaye': 1107, 'go': 1108, 'huwa': 1109, 'shaiqeen': 1110, 'masjid': 1111, 'karlo': 1112, 'home': 1113, 'kamzor': 1114, 'so': 1115, 'samjha': 1116, 'rehna': 1117, 'level': 1118, 'sapne': 1119, 'khushamdeed': 1120, 'kahtay': 1121, 'maizbani': 1122, 'baytab': 1123, 'dg': 1124, 'chote': 1125, 'rakhte': 1126, 'dusro': 1127, 'politician': 1128, 'madarchod': 1129, 'madad': 1130, 'fark': 1131, 'hen': 1132, 'hogaye': 1133, 'bolna': 1134, 'zaroorat': 1135, 'chuki': 1136, 'isis': 1137, 'election': 1138, 'padhe': 1139, 'rahte': 1140, 'lady': 1141, 'sibbal': 1142, 'gujarat': 1143, 'inning': 1144, 'khatre': 1145, 'actress': 1146, 'cong': 1147, 'bardasht': 1148, 'maja': 1149, 'aapka': 1150, 'arre': 1151, 'amla': 1152, 'lijiye': 1153, 'bumrah': 1154, 'jiski': 1155, 'jara': 1156, 'insaniyat': 1157, 'zardari': 1158, 'acche': 1159, 'ungli': 1160, 'chu': 1161, 'beech': 1162, 'baatein': 1163, 'english': 1164, 'vale': 1165, 'hahaha': 1166, 'kitni': 1167, 'vs': 1168, 'bhej': 1169, 'kheli': 1170, 'goli': 1171, 'janwar': 1172, 'mr': 1173, 'sirji': 1174, 'bhul': 1175, 'dala': 1176, 'follow': 1177, 'hoge': 1178, 'inse': 1179, 'ku': 1180, 'ch': 1181, 'rahenge': 1182, 'show': 1183, 'banned': 1184, 'miya': 1185, 'bhabhi': 1186, 'interest': 1187, 'school': 1188, 'kahe': 1189, 'final': 1190, 'darr': 1191, 'hat': 1192, 'nitish': 1193, 'ov': 1194, 'coach': 1195, 'uth': 1196, 'jari': 1197, 'africa': 1198, 'for': 1199, 'innings': 1200, 'burkha': 1201, 'janab': 1202, 'part': 1203, 'agree': 1204, 'gambhir': 1205, 'hume': 1206, 'amitabh': 1207, 'lane': 1208, 'pls': 1209, 'bomb': 1210, 'ganda': 1211, 'badla': 1212, 'resign': 1213, 'fail': 1214, 'kabi': 1215, 'gadhe': 1216, 'krenge': 1217, 'te': 1218, 'fatwa': 1219, 'pay': 1220, 'bohat': 1221, 'eid': 1222, 'perera': 1223, 'amit': 1224, 'kese': 1225, 'mahine': 1226, 'khali': 1227, 'aurto': 1228, 'ane': 1229, 'accept': 1230, 'gov': 1231, 'kafi': 1232, 'bohot': 1233, 'ispr': 1234, 'miandaad': 1235, 'sarfraz': 1236, 'banne': 1237, 'tan': 1238, 'age': 1239, 'bhala': 1240, 'si': 1241, 'aastha': 1242, 'jaat': 1243, 'and': 1244, 'raja': 1245, 'sara': 1246, 'call': 1247, 'bharta': 1248, 'taki': 1249, 'saja': 1250, 'pati': 1251, 'irony': 1252, 'dulhaniya': 1253, 'milne': 1254, 'kehta': 1255, 'huyi': 1256, 'divorce': 1257, 'open': 1258, 'mujh': 1259, 'share': 1260, 'hashim': 1261, 'ana': 1262, 'key': 1263, 'loot': 1264, 'mubarak': 1265, 'kutte': 1266, 'sakshi': 1267, 'bachchan': 1268, 'filmo': 1269, 'joh': 1270, '60': 1271, 'test': 1272, 'aunty': 1273, 'bhee': 1274, 'yes': 1275, 'rai': 1276, 'privacy': 1277, 'mey': 1278, 'sanjay': 1279, 'talak': 1280, '1st': 1281, 'gaali': 1282, 'kahani': 1283, 'bhejo': 1284, 'sikh': 1285, 'bandhan': 1286, 'franchies': 1287, 'career': 1288, 'lad': 1289, 'kise': 1290, 'sayad': 1291, 'muddo': 1292, 'hath': 1293, 'jaruri': 1294, 'people': 1295, 'mam': 1296, 'ishq': 1297, 'tag': 1298, 'rakhi': 1299, 'dear': 1300, 'humara': 1301, 'khabar': 1302, 'banaye': 1303, 'tumse': 1304, 'side': 1305, 'aayega': 1306, 'ar': 1307, 'summary': 1308, '21': 1309, 'buri': 1310, 'kehna': 1311, 'ney': 1312, 'ranking': 1313, 'vaise': 1314, 'veer': 1315, 'khoon': 1316, 'behno': 1317, 'inn': 1318, 'matches': 1319, 'barbaad': 1320, '33': 1321, 'bhag': 1322, 'aniyin': 1323, 'jayasuriya': 1324, 'mara': 1325, 'ask': 1326, 'sarkaar': 1327, 'balls': 1328, 'mantri': 1329, 'jisme': 1330, 'banega': 1331, 'unse': 1332, 'karnay': 1333, 'besharam': 1334, 'bataya': 1335, 'dua': 1336, 'deke': 1337, 'abi': 1338, '16': 1339, 'mahilaon': 1340, 'kaa': 1341, 'gaana': 1342, 'legal': 1343, 'khula': 1344, 'kashmiri': 1345, 'shaandaar': 1346, 'achhi': 1347, 'chutiyo': 1348, 'badnaam': 1349, 'now': 1350, 'playing': 1351, 'minister': 1352, 'ayega': 1353, 'nation': 1354, 'reservation': 1355, '90': 1356, 'gussa': 1357, 'decision': 1358, 'bachon': 1359, 'behtreen': 1360, 'bani': 1361, 'baccha': 1362, 'comes': 1363, 'fursat': 1364, 'dushman': 1365, 'kae': 1366, 'rajneeti': 1367, 'halal': 1368, 'betha': 1369, 'hatya': 1370, 'dhawan': 1371, 'ckt': 1372, 'ajmal': 1373, 'shareef': 1374, 'mp': 1375, 'insaaf': 1376, 'isiliye': 1377, 'bano': 1378, 'live': 1379, 'ayodhya': 1380, 'aai': 1381, 'job': 1382, 'uttar': 1383, 'hojaye': 1384, 'quran': 1385, 'called': 1386, 'ladne': 1387, 'ro': 1388, 'lagti': 1389, 'personal': 1390, 'sazish': 1391, 'pandey': 1392, 'awaam': 1393, 'ladai': 1394, 'gand': 1395, 'dum': 1396, 'guru': 1397, 'humari': 1398, 'mamla': 1399, 'masla': 1400, 'jaake': 1401, 'credit': 1402, 'social': 1403, 'ul': 1404, 'namak': 1405, 'baqi': 1406, 'sher': 1407, 'rumman': 1408, 'talent': 1409, 'wapas': 1410, 'this': 1411, 'raaz': 1412, 'dera': 1413, 'south': 1414, 'aukat': 1415, 'faltu': 1416, 'inhone': 1417, 'hon': 1418, 'nae': 1419, 'lia': 1420, 'poetry': 1421, 'kyonki': 1422, 'jaayega': 1423, 'tiwari': 1424, 'gy': 1425, 'compare': 1426, 'bhakti': 1427, 'karegi': 1428, 'tk': 1429, 'yad': 1430, 'jitne': 1431, 'wait': 1432, 'barish': 1433, 'balki': 1434, 'jaao': 1435, 'rasta': 1436, 'alka': 1437, 'sakht': 1438, 'shikar': 1439, 'tweets': 1440, 'dukhi': 1441, 'takleef': 1442, 'bht': 1443, 'ispe': 1444, 'dikkat': 1445, 'raaj': 1446, 'sant': 1447, 'garib': 1448, 'bakwas': 1449, 'zameen': 1450, 'py': 1451, 'wi': 1452, 'chai': 1453, 'bante': 1454, 'paani': 1455, 'rani': 1456, 'azeem': 1457, 'gyaan': 1458, 'padega': 1459, 'khul': 1460, 'sikhna': 1461, 'sey': 1462, 'music': 1463, 'dikhte': 1464, 'masoom': 1465, 'mahi': 1466, 'apka': 1467, 'kaisi': 1468, 'ulta': 1469, 'sabka': 1470, 'kartay': 1471, 'dada': 1472, 'jayada': 1473, 'great': 1474, 'way': 1475, 'bt': 1476, 'hr': 1477, 'vi': 1478, 'back': 1479, 'rakhe': 1480, 'umeed': 1481, 'varna': 1482, 'girls': 1483, 'nijam': 1484, 'gst': 1485, 'poor': 1486, 'bhartiya': 1487, 'community': 1488, 'baaz': 1489, 'hadd': 1490, 'khele': 1491, 'filme': 1492, 'junobi': 1493, 'dosray': 1494, 'teesray': 1495, 'chattay': 1496, 'chutiyapa': 1497, 'vishwas': 1498, 'bahu': 1499, 'babu': 1500, 'hatao': 1501, 'zaroori': 1502, 'chakar': 1503, 'seat': 1504, 'khiladi': 1505, 'owaisi': 1506, 'last': 1507, 'kajol': 1508, '18': 1509, 'well': 1510, 'tumare': 1511, 'chodo': 1512, 'yuvraj': 1513, 'vivah': 1514, 'ghr': 1515, 'kala': 1516, 'apki': 1517, 'asal': 1518, 'bachha': 1519, 'stars': 1520, 'khair': 1521, 'suar': 1522, 'qanoon': 1523, 'jinki': 1524, 'bhara': 1525, 'bik': 1526, 'qk': 1527, 'trend': 1528, 'sahib': 1529, 'sharab': 1530, 'respect': 1531, 'takreeb': 1532, 'gunde': 1533, '2017': 1534, 'baith': 1535, 'gulzar': 1536, 'sufyan': 1537, 'event': 1538, 'lagte': 1539, 'retire': 1540, 'azaadi': 1541, 'lana': 1542, 'pti': 1543, 'laya': 1544, 'commentary': 1545, 'bta': 1546, 'mobile': 1547, 'ball': 1548, 'auraton': 1549, 'yuva': 1550, 'non': 1551, 'jb': 1552, 'ranaut': 1553, 'corrupt': 1554, 'deshdrohi': 1555, 'km': 1556, 'kre': 1557, 'joke': 1558, 'true': 1559, 'hal': 1560, 'jihadi': 1561, 'idhar': 1562, 'paji': 1563, 'laash': 1564, 'accident': 1565, 'esi': 1566, 'likh': 1567, 'ee': 1568, 'gadha': 1569, 'thode': 1570, 'deewana': 1571, 'maaf': 1572, 'boli': 1573, 'turant': 1574, 'aag': 1575, 'truth': 1576, 'jante': 1577, 'indians': 1578, 'sochte': 1579, 'rahegi': 1580, 'ali': 1581, 'clear': 1582, 'wife': 1583, 'garam': 1584, 'even': 1585, 'thee': 1586, 'saamne': 1587, 'zaroor': 1588, 'kareena': 1589, 'thy': 1590, 'rights': 1591, 'kabaddi': 1592, 'nautanki': 1593, 'ghanta': 1594, 'tareef': 1595, 'ullu': 1596, 'actors': 1597, 'performance': 1598, 'maro': 1599, 'dunya': 1600, 'shoaib': 1601, 'puch': 1602, 'pahuch': 1603, 'chacha': 1604, 'wajon': 1605, 'bate': 1606, 'bhakto': 1607, 'dialogue': 1608, 'chali': 1609, 'choice': 1610, 'jake': 1611, 'illegal': 1612, 'ladies': 1613, 'jay': 1614, 'subah': 1615, 'saat': 1616, 'murkh': 1617, 'ti': 1618, 'flood': 1619, 'waly': 1620, 'jal': 1621, 'utna': 1622, 'tm': 1623, 'chhor': 1624, 'safai': 1625, 'bhale': 1626, 'pic': 1627, 'veeru': 1628, 'jinda': 1629, 'tumne': 1630, 'du': 1631, 'kamal': 1632, 'state': 1633, 'paaji': 1634, 'ladke': 1635, 'sikhe': 1636, 'thank': 1637, 'model': 1638, 'burqa': 1639, 'paya': 1640, 'aadha': 1641, 'photo': 1642, 'kaash': 1643, 'yagnik': 1644, 'netao': 1645, 'not': 1646, 'london': 1647, 'awaaz': 1648, 'kch': 1649, 'dowry': 1650, 'bari': 1651, 'dekhe': 1652, 'jahil': 1653, 'seekh': 1654, 'parivaar': 1655, '80': 1656, 'usne': 1657, 'maal': 1658, 'pratha': 1659, 'top': 1660, 'den': 1661, 'hinduo': 1662, 'gaand': 1663, 'khade': 1664, 'says': 1665, 'pal': 1666, 'hazar': 1667, 'baja': 1668, 'koe': 1669, 'pita': 1670, 'islamic': 1671, 'khans': 1672, 'dekhiye': 1673, 'baje': 1674, 'bula': 1675, 'samarthan': 1676, 'a120': 1677, 'mu': 1678, 'thakur': 1679, 'likhe': 1680, 'mukherjee': 1681, 'rains': 1682, 'hara': 1683, 'very': 1684, 'akal': 1685, 'vivad': 1686, 'ghusa': 1687, 'dusra': 1688, 'gayab': 1689, 'wapsi': 1690, 'votebank': 1691, 'mullo': 1692, 'zimmedar': 1693, 'niche': 1694, 'humko': 1695, 'aarakshan': 1696, 'roj': 1697, 'pandya': 1698, 'thali': 1699, 'raina': 1700, 'choro': 1701, 'hokar': 1702, 'rakhna': 1703, 'kisiko': 1704, 'kbhi': 1705, 'jashn': 1706, 'holi': 1707, 'jit': 1708, 'zarurat': 1709, 'filmy': 1710, 'khtm': 1711, 'bullet': 1712, 'role': 1713, 'chunav': 1714, 'jinke': 1715, 'nikaah': 1716, 'marriage': 1717, 'banu': 1718, 'end': 1719, 'word': 1720, 'want': 1721, 'khusi': 1722, 'purani': 1723, 'big': 1724, 'amir': 1725, 'dekhenge': 1726, 'bheed': 1727, 'qeyadat': 1728, 'izaz': 1729, 'villiers': 1730, 'hot': 1731, 'acting': 1732, 'jiske': 1733, 'kari': 1734, 'get': 1735, 'hoi': 1736, 'mano': 1737, 'shadab': 1738, 'piche': 1739, 'ajeeb': 1740, 'hojata': 1741, 'an': 1742, 'hash': 1743, 'wasim': 1744, 'purane': 1745, '29': 1746, 'fayada': 1747, 'bahubali': 1748, 'ansari': 1749, 'pahunch': 1750, 'educated': 1751, 'saman': 1752, 'icc': 1753, 'entertainment': 1754, 'rahy': 1755, 'bhosdike': 1756, 'celebrities': 1757, 'hei': 1758, 'jala': 1759, 'utar': 1760, 'jawab': 1761, 'railway': 1762, 'samjhte': 1763, 'bayan': 1764, 'blb': 1765, 'therivu': 1766, 'kulu': 1767, 'karu': 1768, 'lala': 1769, 'kaar': 1770, 'jayaz': 1771, 'kangna': 1772, 'jain': 1773, 'total': 1774, 'gala': 1775, 'bajaye': 1776, 'deni': 1777, 'kyaa': 1778, 'apana': 1779, 'win': 1780, 'smart': 1781, 'sarcasm': 1782, 'its': 1783, 'mishra': 1784, 'fake': 1785, 'baal': 1786, 'padegi': 1787, 'save': 1788, 'jeene': 1789, 'nau': 1790, 'padho': 1791, 'dikhao': 1792, 'mn': 1793, 'gandagi': 1794, 'aadat': 1795, 'sena': 1796, 'bachana': 1797, 'bachhon': 1798, 'hongi': 1799, 'roop': 1800, 'poore': 1801, 'order': 1802, 'khatna': 1803, 'old': 1804, 'opinion': 1805, 'humne': 1806, 'industry': 1807, 'kaat': 1808, 'week': 1809, 'beghairat': 1810, 'yeah': 1811, 'shree': 1812, 'mazak': 1813, 'tasveer': 1814, 'bachcha': 1815, 'manish': 1816, 'saram': 1817, 'laye': 1818, 'terrorism': 1819, 'khas': 1820, 'post': 1821, 'smjh': 1822, 'logic': 1823, 'rahne': 1824, 'hardik': 1825, 'liay': 1826, 'kyoki': 1827, 'bnaya': 1828, 'yr': 1829, 'shariyat': 1830, 'hisab': 1831, 'picture': 1832, 'aaplog': 1833, 'billi': 1834, 'vah': 1835, 'aani': 1836, 'security': 1837, 'thanks': 1838, 'tamasha': 1839, 'burai': 1840, 'saheb': 1841, 'bhadwe': 1842, 'ra': 1843, 'heera': 1844, 'mae': 1845, 'huwi': 1846, 'unhone': 1847, 'jald': 1848, 'dein': 1849, 'chand': 1850, 'sibal': 1851, 't20': 1852, 'whatsapp': 1853, 'scam': 1854, 'ghatna': 1855, 'koyi': 1856, 'waseem': 1857, '370': 1858, 'rahain': 1859, 'all': 1860, 'jiska': 1861, 'ma': 1862, 'maare': 1863, 'dikhaya': 1864, 'hazaro': 1865, 'bahana': 1866, 'jod': 1867, 'bap': 1868, 'ajj': 1869, 'je': 1870, 'dekar': 1871, 'kvs': 1872, 'kabool': 1873, 'hair': 1874, 'baarish': 1875, 'skta': 1876, 'der': 1877, 'mehnat': 1878, 'jeeta': 1879, 'opposition': 1880, 'lye': 1881, 'peace': 1882, 'ben': 1883, 'aae': 1884, 'vijay': 1885, 'sharia': 1886, 'khate': 1887, 'udhar': 1888, 'kadam': 1889, 'ijjat': 1890, 'bangladesh': 1891, 'sv': 1892, 'f': 1893, 'channels': 1894, 'siasat': 1895, 'bewakoof': 1896, 'min': 1897, 'hogaya': 1898, 'karane': 1899, 'mahan': 1900, 'jaroorat': 1901, 'field': 1902, 'chance': 1903, 'aus': 1904, 'siyasat': 1905, 'jinhe': 1906, 'shot': 1907, 'rahta': 1908, 'karay': 1909, 'with': 1910, 'rang': 1911, 'rohingya': 1912, 'dahej': 1913, '3rd': 1914, 'bharosa': 1915, 'bhart': 1916, 'dam': 1917, 'left': 1918, 'first': 1919, 'machli': 1920, 'shraddha': 1921, 'bijli': 1922, 'farak': 1923, 'sharminda': 1924, 'girl': 1925, 'thisara': 1926, 'bahi': 1927, 'least': 1928, 'lagata': 1929, 'virodhi': 1930, 'sunne': 1931, 'history': 1932, 'shikhar': 1933, 'bolega': 1934, 'ipl': 1935, 'born': 1936, 'ayegi': 1937, 'namaz': 1938, 'wow': 1939, 'take': 1940, 'queen': 1941, 'bengal': 1942, 'blast': 1943, 'issues': 1944, 'pehli': 1945, 'krti': 1946, 'saari': 1947, 'taliban': 1948, 'waqar': 1949, 'nhe': 1950, 'deshbhakt': 1951, 'arakshan': 1952, 'inhain': 1953, 'younis': 1954, 'phele': 1955, 'lagaya': 1956, 'maloom': 1957, 'paar': 1958, 'caste': 1959, 'kum': 1960, 'entry': 1961, 'mili': 1962, 'baitha': 1963, 'uspe': 1964, 'chutti': 1965, 'shuro': 1966, 'layen': 1967, 'hindus': 1968, 'babri': 1969, 'batana': 1970, 'sacche': 1971, 'banti': 1972, 'road': 1973, 'far': 1974, 'nagar': 1975, 'value': 1976, 'ruk': 1977, 'baate': 1978, 'bhaijaan': 1979, 'skte': 1980, 'ravi': 1981, 'wa': 1982, 'bhagwaan': 1983, 'society': 1984, 'talaaq': 1985, 'shameful': 1986, 'sochta': 1987, 'aacha': 1988, 'kahen': 1989, 'super': 1990, 'haala': 1991, 'laala': 1992, 'kho': 1993, 'maharaj': 1994, 'shi': 1995, 'bande': 1996, 'kasoor': 1997, 'chalane': 1998, 'hisaab': 1999, 'think': 2000, 'uper': 2001, 'waala': 2002, 'dalal': 2003, 'don': 2004, 'sabha': 2005, 'barabar': 2006, 'chij': 2007, 'sabar': 2008, 'com': 2009, 'aukaat': 2010, 'nasal': 2011, 'mane': 2012, 'rote': 2013, '19': 2014, 'uspar': 2015, 'esa': 2016, 'pakka': 2017, 'keep': 2018, 'ludo': 2019, 'jasparit': 2020, 'chot': 2021, 'krke': 2022, 'bekar': 2023, 'lagate': 2024, 'gajab': 2025, 'rehte': 2026, 'kaisa': 2027, 'zameer': 2028, 'apny': 2029, 'shaheed': 2030, 'dogle': 2031, 'dutt': 2032, 'akaas': 2033, 'unwaani': 2034, 'mashkon': 2035, 'qatai': 2036, 'city': 2037, 'question': 2038, 'larki': 2039, 'ur': 2040, 'leti': 2041, 'nostalgia': 2042, 'jago': 2043, 'hoke': 2044, 'khule': 2045, 'speech': 2046, 'awaz': 2047, 'anna': 2048, 'padh': 2049, 'rahen': 2050, 'jio': 2051, 'jhuth': 2052, 'rah': 2053, 'bhagao': 2054, 'trah': 2055, 'let': 2056, 'air': 2057, 'meray': 2058, 'leye': 2059, 'sanskrit': 2060, 'sonia': 2061, 'mujhey': 2062, 'monday': 2063, 'prabhu': 2064, 'sukh': 2065, 'shehzad': 2066, 'bharti': 2067, 'long': 2068, 'riots': 2069, 'killed': 2070, 'ghoom': 2071, 'ah': 2072, 'chaturthi': 2073, 'kismat': 2074, 'khelen': 2075, 'shukar': 2076, 'yese': 2077, 'inhi': 2078, 'admin': 2079, 'manta': 2080, 'hamid': 2081, 'sehwag': 2082, 'smriti': 2083, 'bhaiyo': 2084, 'naare': 2085, 'shram': 2086, 'bataiye': 2087, 'rahiye': 2088, 'jawano': 2089, 'wish': 2090, 'pan': 2091, 'dekhkar': 2092, 'gareebi': 2093, 'cast': 2094, 'fazool': 2095, 'chahiy': 2096, 'milegi': 2097, 'rahane': 2098, '28': 2099, 'bv': 2100, 'ammi': 2101, 'email': 2102, 'thalaivar': 2103, 'sanath': 2104, 'nei': 2105, 'sanam': 2106, 'praveen': 2107, 'jv': 2108, 'vyakti': 2109, 'defend': 2110, 'mazeed': 2111, 'gurmeet': 2112, 'khol': 2113, 'chhoti': 2114, 'ticket': 2115, 'moka': 2116, 'low': 2117, 'paki': 2118, 'gutter': 2119, 'bhed': 2120, 'kaya': 2121, 'juda': 2122, 'kadar': 2123, 'trp': 2124, 'kaunsa': 2125, 'chin': 2126, 'dobara': 2127, 'jae': 2128, 'ground': 2129, 'culture': 2130, 'chaddi': 2131, 'ameen': 2132, 'original': 2133, 'nakli': 2134, 'mausam': 2135, 'jinko': 2136, 'suwar': 2137, 'baaton': 2138, 'gyan': 2139, 'nasha': 2140, 'population': 2141, 'aake': 2142, 'superstar': 2143, 'host': 2144, 'lulli': 2145, 'sulag': 2146, 'classic': 2147, 'xd': 2148, 'sai': 2149, 'august': 2150, 'bechare': 2151, 'bahas': 2152, 'republic': 2153, 'words': 2154, 'degree': 2155, 'stop': 2156, 'ager': 2157, '12': 2158, 'legend': 2159, 'naheen': 2160, 'hotay': 2161, 'gupta': 2162, 'must': 2163, 'said': 2164, 'situation': 2165, 'boring': 2166, 'enjoy': 2167, 'khuch': 2168, 'naukari': 2169, 'vrs': 2170, 'dae': 2171, 'pa': 2172, 'image': 2173, 'khelnay': 2174, 'aurte': 2175, 'khub': 2176, 'abdul': 2177, 'choor': 2178, 'jani': 2179, 'rakhne': 2180, 'violence': 2181, 'lakin': 2182, 'development': 2183, 'kisan': 2184, 'dance': 2185, 'ijazat': 2186, 'naukri': 2187, 'rona': 2188, 'thora': 2189, 'samjhaye': 2190, 'milke': 2191, 'shamil': 2192, 'fayda': 2193, 'kartey': 2194, 'year': 2195, 'throwback': 2196, 'tarif': 2197, 'quality': 2198, 'lay': 2199, 'qurbani': 2200, 'aadhi': 2201, 'dhyaan': 2202, 'mene': 2203, 'sekna': 2204, 'mujy': 2205, 'chutiyon': 2206, 'maat': 2207, 'uljha': 2208, 'lok': 2209, 'dharmik': 2210, 'muslimo': 2211, 'hinduon': 2212, 'tragedy': 2213, 'easy': 2214, 'tod': 2215, 'dikhane': 2216, 'unn': 2217, 'shahar': 2218, 'yudh': 2219, 'jaanti': 2220, 'parivar': 2221, 'baaten': 2222, 'nuqsan': 2223, 'adalat': 2224, 'doosra': 2225, 'walay': 2226, 'dhak': 2227, 'marti': 2228, 'aab': 2229, 'bhashan': 2230, 'banaoge': 2231, 'mood': 2232, 'launch': 2233, 'topi': 2234, 'rakhta': 2235, 'ladkiyo': 2236, 'mahesh': 2237, 'bhatt': 2238, 'mahatma': 2239, 'jashoda': 2240, 'farzi': 2241, 'jhanda': 2242, 'sobha': 2243, 'pakad': 2244, 'sambhal': 2245, 'rishi': 2246, 'samjhi': 2247, 'bitha': 2248, 'reality': 2249, 'layenge': 2250, 'swachh': 2251, 'khi': 2252, 'kardiya': 2253, 'story': 2254, 'rss': 2255, 'sone': 2256, 'inshallah': 2257, 'nehru': 2258, 'bhaut': 2259, 'card': 2260, 'try': 2261, 'gate': 2262, 'dekhi': 2263, 'karnewala': 2264, 'didi': 2265, 're': 2266, 'cricekt': 2267, 'dere': 2268, 'jaoge': 2269, 'marzi': 2270, 'aug': 2271, 'balle': 2272, 'boliye': 2273, 'banna': 2274, 'bheekh': 2275, 'pet': 2276, 'guzaar': 2277, 'met': 2278, '47': 2279, 'safar': 2280, 'guzar': 2281, 'achchha': 2282, 'education': 2283, 'dalali': 2284, 'salon': 2285, 'rashtra': 2286, 'mahabharat': 2287, 'banta': 2288, 'paiso': 2289, 'kharid': 2290, 'shant': 2291, 'harane': 2292, 'bhad': 2293, 'peet': 2294, 'chalana': 2295, 'late': 2296, 'leni': 2297, 'freedom': 2298, 'self': 2299, 'teem': 2300, 'mukti': 2301, 'tet': 2302, 'farq': 2303, 'dhang': 2304, 'cool': 2305, 'garmi': 2306, 'sadak': 2307, 'congratulations': 2308, 'irfan': 2309, 'darshan': 2310, 'cha': 2311, 'naraaz': 2312, 'parties': 2313, 'protest': 2314, 'haye': 2315, 'bye': 2316, 'sikha': 2317, 'thaher': 2318, 'chalne': 2319, 'lago': 2320, 'melody': 2321, '14': 2322, 'manoj': 2323, 'ladh': 2324, 'ehsaas': 2325, 'shakti': 2326, 'kalyug': 2327, 'zimbabwe': 2328, 'voh': 2329, 'isne': 2330, 'tanqeed': 2331, '36': 2332, 'kamran': 2333, 'khilai': 2334, 'burnol': 2335, 'amma': 2336, 'pada': 2337, 'mujhse': 2338, 'woman': 2339, 'khamosh': 2340, 'police': 2341, 'crime': 2342, 'face': 2343, 'chanakya': 2344, 'mout': 2345, 'jodi': 2346, 'we': 2347, 'damad': 2348, 'sako': 2349, 'behas': 2350, 'daram': 2351, 'mujhko': 2352, 'mohabbat': 2353, 'country': 2354, 'laal': 2355, 'bohut': 2356, 'karate': 2357, 'uthaya': 2358, 'kahaa': 2359, 'aakhir': 2360, 'chalegi': 2361, 'minority': 2362, 'uk': 2363, 'sunday': 2364, 'hoty': 2365, 'rokne': 2366, 'cricketer': 2367, 'janti': 2368, 'thori': 2369, 'samjhta': 2370, 'toot': 2371, 'means': 2372, 'sabak': 2373, 'because': 2374, 'ghalti': 2375, 'west': 2376, 'kafee': 2377, 'zz': 2378, 'mayor': 2379, 'sarcastic': 2380, 'prabhas': 2381, 'zee': 2382, 'patra': 2383, 'ratna': 2384, 'child': 2385, 'ada': 2386, 'hae': 2387, 'leaders': 2388, 'tahir': 2389, 'rhega': 2390, 'feel': 2391, 'rahna': 2392, 'vikrant': 2393, 'dukan': 2394, 'manmohan': 2395, 'aapni': 2396, 'josh': 2397, 'maari': 2398, 'pehla': 2399, 'bharati': 2400, 'karny': 2401, 'opner': 2402, 'maqsad': 2403, 'barrh': 2404, 'waali': 2405, 'aajtak': 2406, 'sochna': 2407, 'jawan': 2408, 'chunaav': 2409, 'huve': 2410, 'asi': 2411, 'bihari': 2412, 'jehad': 2413, 'kiyu': 2414, 'chaat': 2415, 'huwe': 2416, 'paksh': 2417, 'aapse': 2418, 'baadme': 2419, 'milenge': 2420, 'bachchon': 2421, 'based': 2422, 'aalmi': 2423, 'dare': 2424, 'mom': 2425, 'huay': 2426, 'bolenge': 2427, 'ustaad': 2428, 'england': 2429, 'rail': 2430, 'kutto': 2431, 'moment': 2432, 'helo': 2433, 'budhi': 2434, 'jhooth': 2435, 'urmila': 2436, 'vajah': 2437, 'sunni': 2438, 'puchta': 2439, 'bhojpuri': 2440, 'apse': 2441, 'tabdeeli': 2442, 'sponcer': 2443, 'tayyar': 2444, 'tharki': 2445, 'tey': 2446, 'reply': 2447, 'jokes': 2448, 'apane': 2449, 'seva': 2450, 'qayadat': 2451, 'micromax': 2452, 'salam': 2453, 'gham': 2454, 'dy': 2455, 'kriti': 2456, 'sanon': 2457, 'songs': 2458, 'dhoop': 2459, 'aqib': 2460, 'maidan': 2461, 'contest': 2462, 'andolan': 2463, 'uncle': 2464, 'she': 2465, 'kisko': 2466, 'pari': 2467, 'vichar': 2468, 'hollywood': 2469, 'garibo': 2470, 'khelti': 2471, 'dimaag': 2472, 'rishta': 2473, 'tatti': 2474, 'journalist': 2475, 'memes': 2476, 'jali': 2477, 'palesi': 2478, 'just': 2479, 'dali': 2480, '40': 2481, 'khata': 2482, 'saeed': 2483, 'badnam': 2484, 'marr': 2485, 'ghum': 2486, 'bhasan': 2487, 'knowledge': 2488, 'keliye': 2489, 'dunia': 2490, 'chamka': 2491, 'karty': 2492, 'jannat': 2493, 'wrong': 2494, 'kiske': 2495, 'september': 2496, 'baadh': 2497, 'doosron': 2498, 'shaanti': 2499, 'bakra': 2500, 'hallala': 2501, 'sidha': 2502, 'quotes': 2503, 'returns': 2504, 'samsya': 2505, 'baray': 2506, 'drohi': 2507, 'dh': 2508, 'lakho': 2509, 'roye': 2510, 'made': 2511, 'bai': 2512, 'nayi': 2513, 'kisse': 2514, 'gayee': 2515, 'justice': 2516, 'rajya': 2517, 'metro': 2518, 'chak': 2519, 'jaya': 2520, 'chota': 2521, 'hafiz': 2522, 'jaroori': 2523, 'layak': 2524, 'hotel': 2525, 'sentencing': 2526, 'padha': 2527, 'dukhad': 2528, 'ranveer': 2529, 'mumkin': 2530, 'dilip': 2531, 'saira': 2532, 'baheno': 2533, 'hitler': 2534, 'mang': 2535, 'aaho': 2536, 'dhum': 2537, 'bhejte': 2538, 'nu': 2539, 'huee': 2540, 'ri': 2541, 'uday': 2542, 'izhaar': 2543, 'aagay': 2544, 'bhaag': 2545, 'aussie': 2546, 'jaanta': 2547, 'aisay': 2548, 'neend': 2549, 'guarantee': 2550, 'tujhko': 2551, 'piya': 2552, 'block': 2553, 'play': 2554, 'imad': 2555, 'kc': 2556, 'fantasy': 2557, 'besharmi': 2558, 'phool': 2559, 'friend': 2560, 'rahey': 2561, 'hoor': 2562, 'victims': 2563, 'vali': 2564, 'pichle': 2565, 'form': 2566, '99': 2567, 'gaane': 2568, 'mayawati': 2569, 'ht': 2570, '46': 2571, 'vinay': 2572, 'shreyas': 2573, 'charcha': 2574, 'gande': 2575, 'padhna': 2576, 'waja': 2577, 'zulm': 2578, 'nanga': 2579, 'certificate': 2580, 'moti': 2581, 'oxygen': 2582, 'che': 2583, 'khao': 2584, 'apno': 2585, 'deshon': 2586, 'allha': 2587, 'ani': 2588, 'kanun': 2589, 'uthta': 2590, 'zaleel': 2591, 'dhongi': 2592, 'sangh': 2593, 'jagha': 2594, 'nahee': 2595, 'anushka': 2596, 'kush': 2597, 'karengi': 2598, 'hati': 2599, 'anay': 2600, 'kehne': 2601, 'rehi': 2602, 'misbah': 2603, 'puhnchaya': 2604, 'ish': 2605, 'bazi': 2606, 'jadhav': 2607, 'discuss': 2608, '102': 2609, 'reddy': 2610, 'deshpande': 2611, 'sakuja': 2612, 'abba': 2613, 'resigned': 2614, 'thodar': 2615, 'athan': 2616, 'raajinaamaa': 2617, 'bechara': 2618, 'padhne': 2619, 'khatoon': 2620, 'raga': 2621, 'pl': 2622, 'nidhish': 2623, 'avi': 2624, 'pooja': 2625, 'apman': 2626, 'shit': 2627, 'know': 2628, 'tejashwi': 2629, 'kriye': 2630, 'balochistan': 2631, 'wafaqi': 2632, 'wazir': 2633, 'amoor': 2634, 'chhakka': 2635, 'jalane': 2636, 'thak': 2637, 'gaaliyan': 2638, 'ehsaan': 2639, 'rehne': 2640, 'issey': 2641, 'unme': 2642, '123': 2643, '55': 2644, 'prateek': 2645, 'puja': 2646, 'muda': 2647, 'yea': 2648, 'valo': 2649, 'sadhvi': 2650, 'bna': 2651, 'samapt': 2652, 'a2': 2653, 'chashma': 2654, 'paayi': 2655, 'wahin': 2656, 'bnd': 2657, 'katue': 2658, 'option': 2659, 'sada': 2660, 'insider': 2661, 'id': 2662, 'dhokha': 2663, 'najar': 2664, 'badiya': 2665, 'loge': 2666, 'itani': 2667, 'samjho': 2668, 'aksar': 2669, 'kaum': 2670, 'dusare': 2671, 'dash': 2672, 'safalta': 2673, 'problems': 2674, 'kripya': 2675, 'vala': 2676, 'common': 2677, 'gire': 2678, 'koyla': 2679, 'vaisa': 2680, 'aray': 2681, 'lakhon': 2682, 'bagair': 2683, 'udaas': 2684, 'shair': 2685, 'sbse': 2686, 'krega': 2687, 'laat': 2688, 'attention': 2689, 'bhikari': 2690, 'jaiye': 2691, 'bachan': 2692, 'bolti': 2693, 'atay': 2694, 'sonam': 2695, 'yateem': 2696, 'il': 2697, 'paul': 2698, 'dosi': 2699, 'handle': 2700, 'batne': 2701, 'gair': 2702, 'negative': 2703, 'likhne': 2704, 'jadoo': 2705, 'samajte': 2706, 'leave': 2707, 'empowerment': 2708, 'selective': 2709, 'bheja': 2710, 'nyay': 2711, 'full': 2712, 'jivan': 2713, 'biopic': 2714, 'srilanka': 2715, 'meeting': 2716, 'paagal': 2717, 'oppo': 2718, 'pado': 2719, 'nashe': 2720, 'ghalat': 2721, 'vipaksh': 2722, 'mullas': 2723, 'bahali': 2724, 'responsibility': 2725, 'beth': 2726, 'jaty': 2727, 'loha': 2728, 'why': 2729, 'bichara': 2730, 'az': 2731, 'ghazab': 2732, '23': 2733, 'jumle': 2734, 'sindh': 2735, 'wapasi': 2736, 'gaay': 2737, 'sidhi': 2738, 'dvs': 2739, 'brave': 2740, 'hadiya': 2741, 'fund': 2742, '35a': 2743, 'puchte': 2744, 'huvi': 2745, 'sikhane': 2746, 'sawaal': 2747, 'kidhar': 2748, 'sadiq': 2749, 'unbelievable': 2750, 'musalmaan': 2751, 'connection': 2752, 'shart': 2753, 'apradhi': 2754, 'normal': 2755, 'uchhal': 2756, 'ladkiyon': 2757, 'shabd': 2758, 'masle': 2759, 'adityanath': 2760, 'things': 2761, 'chiz': 2762, 'tejas': 2763, 'ninda': 2764, 'jhanki': 2765, 'dhund': 2766, 'anil': 2767, 'boys': 2768, 'kutch': 2769, 'jayse': 2770, 'mukh': 2771, 'ukhad': 2772, 'kejruddin': 2773, 'panama': 2774, 'jinhone': 2775, 'tukde': 2776, 'jaroor': 2777, 'cheezein': 2778, 'aish': 2779, 'nikle': 2780, 'kutton': 2781, 'nidhi': 2782, 'anand': 2783, 'shukla': 2784, 'kartha': 2785, 'virasat': 2786, 'jhute': 2787, 'action': 2788, 'pehen': 2789, 'kahta': 2790, '90s': 2791, 'kareeb': 2792, 'votes': 2793, 'jalta': 2794, 'jalne': 2795, 'khete': 2796, 'quote': 2797, 'democracy': 2798, 'tau': 2799, 'check': 2800, 'issliye': 2801, 'darte': 2802, 'dharna': 2803, 'mullah': 2804, 'imp': 2805, 'tay': 2806, 'batein': 2807, 'loksabha': 2808, 'gobar': 2809, 'qoum': 2810, 'jaega': 2811, 'chutia': 2812, 'altaf': 2813, 'nv': 2814, 'ranbir': 2815, 'busy': 2816, 'teams': 2817, 'achaa': 2818, 'batane': 2819, 'lines': 2820, 'display': 2821, 'baloch': 2822, 'madarchodo': 2823, 'ske': 2824, 'saf': 2825, '2013': 2826, 'mitro': 2827, 'jita': 2828, 'pit': 2829, 'shameless': 2830, 'dakhal': 2831, 'ppl': 2832, 'basic': 2833, 'permanent': 2834, 'bhikhari': 2835, 'ambulance': 2836, 'milna': 2837, 'gujrat': 2838, 'rip': 2839, 'karao': 2840, 'pcb': 2841, 'samvidhan': 2842, 'ae': 2843, 'site': 2844, 'nikaal': 2845, 'ajay': 2846, 'turn': 2847, 'work': 2848, 'khaan': 2849, 'aaoge': 2850, 'hala': 2851, 'amal': 2852, 'kat': 2853, 'someone': 2854, 'chaman': 2855, 'bhav': 2856, 'ration': 2857, 'kela': 2858, 'tomar': 2859, 'anchor': 2860, 'chun': 2861, '4th': 2862, 'hogayi': 2863, 'loss': 2864, 'deen': 2865, 'araha': 2866, 'sadakchhap': 2867, 'katrina': 2868, 'kaif': 2869, 'same': 2870, 'thought': 2871, 'aawaz': 2872, 'send': 2873, 'aas': 2874, 'tamil': 2875, 'hoshiyar': 2876, 'jeetne': 2877, 'luck': 2878, 'pare': 2879, 'united': 2880, '22': 2881, 'haad': 2882, 'manu': 2883, 'bhakts': 2884, 'chamkani': 2885, 'target': 2886, 'lasho': 2887, 'aulaad': 2888, 'zaheer': 2889, 'rajper': 2890, 'jat': 2891, 'rajasthan': 2892, '35': 2893, 'barfi': 2894, 'special': 2895, 'judge': 2896, 'behaal': 2897, 'kadhi': 2898, 'sejal': 2899, 'bhookh': 2900, 'soldiers': 2901, 'rain': 2902, 'siriwardana': 2903, 'chameera': 2904, '192': 2905, 'idea': 2906, 'ran': 2907, 'akhtar': 2908, 'apnay': 2909, 'uae': 2910, 'foreign': 2911, 'phly': 2912, 'faida': 2913, 'important': 2914, 'dushmani': 2915, 'intolerance': 2916, 'bikta': 2917, 'advertising': 2918, 'dikhana': 2919, 'asa': 2920, 'sadhu': 2921, 'young': 2922, 'baz': 2923, 'judiciary': 2924, 'step': 2925, 'rajiv': 2926, 'yrs': 2927, 'limit': 2928, 'unk': 2929, 'pahele': 2930, 'burka': 2931, 'salary': 2932, 'bharpoor': 2933, 'tujh': 2934, 'bhaga': 2935, 'ghanshyam': 2936, 'saccha': 2937, 'payegi': 2938, 'kardo': 2939, 'zindgi': 2940, 'at': 2941, 'hamse': 2942, 'nuksan': 2943, 'chhota': 2944, 'lucknow': 2945, 'shahrukh': 2946, 'base': 2947, 'guzarish': 2948, 'control': 2949, 'shukra': 2950, 'thanda': 2951, 'qki': 2952, 'gandgi': 2953, 'des': 2954, 'setting': 2955, 'rate': 2956, 'dhandha': 2957, 'mission': 2958, 'tanwar': 2959, 'chahy': 2960, 'red': 2961, 'karthe': 2962, 'chahti': 2963, 'doklam': 2964, 'staff': 2965, 'rakhega': 2966, 'haryanvi': 2967, 'raajneeti': 2968, 'yun': 2969, 'muhammad': 2970, 'supporter': 2971, 'huva': 2972, 'ake': 2973, 'nali': 2974, 'beheno': 2975, 'narendra': 2976, 'hila': 2977, 'thappad': 2978, 'disaster': 2979, 'gaandu': 2980, 'gandu': 2981, 'pey': 2982, 'night': 2983, 'soche': 2984, 'sp': 2985, 'abhinav': 2986, '38': 2987, 'list': 2988, 'lagati': 2989, 'koun': 2990, 'uthane': 2991, 'sukun': 2992, 'bal': 2993, 'mt': 2994, 'zamane': 2995, 'hasan': 2996, 'bhagwa': 2997, 'hafeez': 2998, 'srf': 2999, 'grow': 3000, 'always': 3001, 'khyal': 3002, 'bachay': 3003, 'thodarchiyaana': 3004, 'tholvi': 3005, 'kuritthu': 3006, 'palveru': 3007, 'tharappilum': 3008, 'vimarsanam': 3009, 'thalamayilaana': 3010, 'qom': 3011, 'trump': 3012, 'pichay': 3013, 'jankari': 3014, 'saw': 3015, 'bhula': 3016, 'chat': 3017, 'joshi': 3018, 'mazza': 3019, 'dhoondo': 3020, 'lagao': 3021, 'mentality': 3022, 'jashodaben': 3023, 'chi': 3024, 'aahe': 3025, 'dawood': 3026, 'bhasha': 3027, 'aurton': 3028, 'mulayam': 3029, 'kisano': 3030, 'bali': 3031, 'rehti': 3032, 'ashraf': 3033, 'mehsoos': 3034, 'himat': 3035, 'inme': 3036, 'pucho': 3037, 'oye': 3038, 'taaki': 3039, 'communal': 3040, 'waalo': 3041, 'laana': 3042, 'fawad': 3043, 'company': 3044, 'reliance': 3045, 'chaly': 3046, 'patthar': 3047, 'dho': 3048, 'samjo': 3049, 'soraksa': 3050, 'rakhy': 3051, 'chauhan': 3052, 'kaali': 3053, 'seekha': 3054, 'wha': 3055, 'pae': 3056, 'shakal': 3057, 'chhupa': 3058, 'dunga': 3059, 'jiye': 3060, 'loose': 3061, 'dilane': 3062, 'zaruri': 3063, 'ny': 3064, 'karwai': 3065, 'usay': 3066, 'comparison': 3067, 'cr': 3068, 'mamata': 3069, 'smell': 3070, 'rahat': 3071, 'wasiyo': 3072, 'irada': 3073, 'haii': 3074, 'blame': 3075, 'walle': 3076, 'nayak': 3077, 'wese': 3078, 'kan': 3079, 'aapas': 3080, 'chatna': 3081, 'veena': 3082, 'black': 3083, 'kanjar': 3084, 'zamana': 3085, 'muddon': 3086, 'jisey': 3087, 'pariwar': 3088, 'gv': 3089, 'paribortan': 3090, 'magic': 3091, 'they': 3092, 'never': 3093, 'chhupane': 3094, 'chhattisgarh': 3095, 'active': 3096, 'likhta': 3097, 'score': 3098, 'rashid': 3099, 'mona': 3100, 'shaikh': 3101, 'harkat': 3102, 'z': 3103, 'banerjee': 3104, 'prasad': 3105, 'many': 3106, 'kabul': 3107, 'sary': 3108, '2011': 3109, 'behn': 3110, 'cheej': 3111, 'niyam': 3112, 'ndtv': 3113, 'biceps': 3114, 'kaaran': 3115, 'sakhta': 3116, 'barre': 3117, 'naamon': 3118, 'durust': 3119, 'jeson': 3120, 'holder': 3121, 'mehr': 3122, 'patel': 3123, 'chaahiye': 3124, 'bolkar': 3125, 'mujhay': 3126, 'milay': 3127, 'degi': 3128, 'padti': 3129, 'salo': 3130, 'house': 3131, 'badho': 3132, 'bhaad': 3133, 'hogya': 3134, 'asif': 3135, 'bold': 3136, 'samna': 3137, 'beto': 3138, 'mahilaye': 3139, 'razzaq': 3140, 'already': 3141, 'solve': 3142, 'lagega': 3143, 'padhta': 3144, 'sahara': 3145, 'safety': 3146, 'khatarnak': 3147, 'janm': 3148, 'gulam': 3149, 'seedhe': 3150, 'qubool': 3151, 'samjhane': 3152, 'qabool': 3153, 'os': 3154, 'advice': 3155, 'sachche': 3156, 'also': 3157, 'border': 3158, 'horahe': 3159, 'ns': 3160, 'indies': 3161, 'deshbhakti': 3162, 'khada': 3163, 'baukhlaye': 3164, 'two': 3165, 'waley': 3166, 'set': 3167, 'paoge': 3168, 'azad': 3169, 'nikalne': 3170, 'jawaab': 3171, 'university': 3172, 'masala': 3173, 'ispar': 3174, 'maazi': 3175, 'ramzan': 3176, 'nusrat': 3177, 'pop': 3178, 'daikhain': 3179, 'daan': 3180, 'surakchhit': 3181, 'garv': 3182, 'chara': 3183, 'aamad': 3184, 'tour': 3185, 'safal': 3186, 'mangte': 3187, 'arahi': 3188, 'istemal': 3189, 'rozgar': 3190, 'hogye': 3191, 'abad': 3192, 'safe': 3193, 'etne': 3194, 'kitab': 3195, 'jag': 3196, 'hamre': 3197, 'baadshah': 3198, 'multiple': 3199, 'sae': 3200, 'dosti': 3201, 'tiger': 3202, 'samjhao': 3203, 'tara': 3204, 'istifa': 3205, 'cancer': 3206, 'patni': 3207, 'kijiy': 3208, 'oor': 3209, 'rs': 3210, 'fb': 3211, 'suni': 3212, 'kosis': 3213, 'karaya': 3214, 'jeeti': 3215, 'saalo': 3216, 'agle': 3217, 'kiyon': 3218, 'teesri': 3219, 'gazab': 3220, 'demand': 3221, 'gain': 3222, 'dikshanshu': 3223, 'kiska': 3224, 'majak': 3225, 'yog': 3226, 'bahen': 3227, 'done': 3228, 'bak': 3229, 'mla': 3230, 'jp': 3231, 'aizaz': 3232, 'jaante': 3233, 'badshah': 3234, 'caption': 3235, 'achanak': 3236, 'jakar': 3237, 'pidit': 3238, 'chaatu': 3239, 'chayiye': 3240, 'cilovb': 3241, 'brahman': 3242, 'dyan': 3243, 'gautam': 3244, 'dal': 3245, 'baten': 3246, 'kamyabi': 3247, 'rapists': 3248, 'arijit': 3249, 'toote': 3250, 'nikalta': 3251, 'abbu': 3252, 'mien': 3253, 'kernay': 3254, 'reporter': 3255, 'pakistaniyo': 3256, 'shreya': 3257, 'ghoshal': 3258, 'pratap': 3259, 'judgement': 3260, 'fraud': 3261, 'dogla': 3262, 'yojna': 3263, '2nd': 3264, 'khelega': 3265, 'daily': 3266, 'maid': 3267, 'bhuvi': 3268, 'jinnah': 3269, 'bethe': 3270, 'bhikh': 3271, 'prashn': 3272, 'bhagvan': 3273, 'ishrat': 3274, 'aakar': 3275, 'kali': 3276, 'from': 3277, 'another': 3278, 'radio': 3279, 'aadhar': 3280, 'loog': 3281, 'en': 3282, 'tarika': 3283, 'itnay': 3284, 'bangalore': 3285, 'numbers': 3286, 'todne': 3287, 'ucc': 3288, 'ad': 3289, 'gum': 3290, 'chupa': 3291, 'tl': 3292, 'jalte': 3293, 'sadko': 3294, 'durga': 3295, 'gaurakshak': 3296, 'hasil': 3297, 'yo': 3298, 'angrez': 3299, 'harr': 3300, 'unfortunately': 3301, 'cars': 3302, 'war': 3303, 'profile': 3304, 'sarm': 3305, 'google': 3306, 'sultan': 3307, 'laalu': 3308, 'legi': 3309, 'divide': 3310, '1992': 3311, 'taisi': 3312, 'oppose': 3313, 'jhanse': 3314, 'promise': 3315, 'tb': 3316, 'shok': 3317, 'matt': 3318, 'lain': 3319, '200': 3320, 'yaaro': 3321, 'yaani': 3322, 'rahoge': 3323, 'dhoni300': 3324, 'huh': 3325, 'ghat': 3326, 'dee': 3327, 'prostitute': 3328, 'nothing': 3329, 'sakoon': 3330, 'via': 3331, 'bakchodi': 3332, 'hashtag': 3333, 'singham': 3334, 'barometer': 3335, 'kodi': 3336, 'jaegi': 3337, 'paate': 3338, 'thaan': 3339, 'danga': 3340, 'sabji': 3341, 'bechne': 3342, 'taiyar': 3343, 'palti': 3344, 'laanat': 3345, 'banai': 3346, 'dange': 3347, 'wednesday': 3348, 'wisdom': 3349, 'humanity': 3350, 'mazboot': 3351, 'rajesh': 3352, 'satya': 3353, 'rare': 3354, 'khatra': 3355, 'sanghi': 3356, 'sanjeev': 3357, 'karege': 3358, 'bhajpa': 3359, 'hawaa': 3360, 'bill': 3361, 'alia': 3362, 'light': 3363, 'amp': 3364, 'dedo': 3365, 'khuwaish': 3366, 'mem': 3367, 'congrats': 3368, 'chahal': 3369, 'kuldeep': 3370, 'tension': 3371, 'yahaan': 3372, 'gandhiji': 3373, 'doob': 3374, '300': 3375, 'bure': 3376, 'janwaro': 3377, 'nepal': 3378, 'kb': 3379, 'shut': 3380, 'bawana': 3381, 'rastra': 3382, 'yani': 3383, 'neelam': 3384, 'murda': 3385, 'nojawanon': 3386, 'khilane': 3387, 'larke': 3388, 'inzamam': 3389, 'vhi': 3390, 'husband': 3391, 'jaenge': 3392, 'lanat': 3393, 'kabristan': 3394, '1bhi': 3395, 'hao': 3396, 'divya': 3397, 'jyaada': 3398, 'rescue': 3399, 'sona': 3400, 'debates': 3401, 'chadha': 3402, 'atyachar': 3403, 'sacchi': 3404, 'jaab': 3405, 'pel': 3406, 'stand': 3407, 'waisa': 3408, 'matram': 3409, 'kholi': 3410, 'likhi': 3411, 'intrest': 3412, 'sapna': 3413, 'profit': 3414, 'akela': 3415, 'lai': 3416, 'mika': 3417, 'jaayenge': 3418, 'rey': 3419, 'khwahish': 3420, 'reet': 3421, 'failure': 3422, 'manohar': 3423, 'secularism': 3424, 'baju': 3425, 'gully': 3426, 'batate': 3427, 'dp': 3428, 'buddhe': 3429, 'motivation': 3430, 'declare': 3431, 'khaate': 3432, 'chuha': 3433, 'zaman': 3434, 'sahmed': 3435, 'fashraf': 3436, 'hali': 3437, 'dream11': 3438, 'k11': 3439, 'got': 3440, 'khelney': 3441, 'cust': 3442, 'yha': 3443, 'months': 3444, 'dall': 3445, 'hazir': 3446, 'service': 3447, 'actresses': 3448, 'jaari': 3449, 'kiu': 3450, 'muqabila': 3451, 'aankhe': 3452, 'prediction': 3453, 'bss': 3454, 'belt': 3455, 'thk': 3456, '154': 3457, 'mayank': 3458, 'agarwal': 3459, '68': 3460, 'vyshak': 3461, 'gopal': 3462, 'look': 3463, 'indo': 3464, 'bhencho': 3465, 'khane': 3466, 'wande': 3467, 'asia': 3468, 'parantu': 3469, 'bhaarti': 3470, 'virender': 3471, 'honay': 3472, 'fateh': 3473, 'suport': 3474, 'actually': 3475, 'haters': 3476, 'subha': 3477, 'update': 3478, 'bulata': 3479, 'krni': 3480, 'jine': 3481, 'taiyari': 3482, 'sudhrenge': 3483, 'pohanch': 3484, 'nainsafi': 3485, 'noon': 3486, 'abbasi': 3487, 'mardo': 3488, 'promotion': 3489, 'jarori': 3490, 'damn': 3491, 'iron': 3492, 'krde': 3493, 'nara': 3494, 'elaaj': 3495, 'dhakosalay': 3496, 'pressure': 3497, 'seyasi': 3498, '1000': 3499, 'asha': 3500, 'aaram': 3501, 'a66a': 3502, 'kulbhushan': 3503, 'manjesh': 3504, 'pavan': 3505, 'ritesh': 3506, 'bhatkal': 3507, 'ethiroli': 3508, 'utpada': 3509, 'uruppinarkal': 3510, 'anari': 3511, 'equipment': 3512, 'academy': 3513, 'data': 3514, 'equality': 3515, 'ok': 3516, 'dubey': 3517, 'badalne': 3518, 'nikalenge': 3519, 'prayer': 3520, 'akhand': 3521, 'kad': 3522, 'huaa': 3523, 'marana': 3524, 'asliyat': 3525, 'stedium': 3526, 'jahen': 3527, 'parilmani': 3528, 'tournament': 3529, 'kosh': 3530, 'badsoorat': 3531, 'bey': 3532, 'sahmat': 3533, 'bed': 3534, 'nikin': 3535, 'jose': 3536, 'shivam': 3537, 'bhavesh': 3538, 'gulecha': 3539, 'keeda': 3540, 'sadi': 3541, 'filmein': 3542, 'mahilayen': 3543, 'bhaw': 3544, 'ashtha': 3545, 'pane': 3546, 'khelni': 3547, 'midia': 3548, 'dege': 3549, 'a1': 3550, 'a3': 3551, 'magzine': 3552, 'pida': 3553, 'bhaiyye': 3554, 'states': 3555, 'zimedari': 3556, 'practice': 3557, 'joo': 3558, 'usey': 3559, 'yhi': 3560, 'village': 3561, 'inshaallah': 3562, 'bhaal': 3563, 'pehley': 3564, 'pahen': 3565, 'sympathy': 3566, 'badlo': 3567, 'rahogey': 3568, 'zaban': 3569, 'ydi': 3570, 'rahulji': 3571, 'rahene': 3572, '2003': 3573, 'guna': 3574, 'nek': 3575, 'mie': 3576, 'argument': 3577, 'kaark': 3578, 'lagwa': 3579, 'hooro': 3580, 'doobti': 3581, 'padte': 3582, 'sunaai': 3583, 'dikhaayi': 3584, 'regular': 3585, 'missile': 3586, 'uffff': 3587, 'lahori': 3588, 'bhavishya': 3589, 'every': 3590, 'thing': 3591, 'punchkula': 3592, 'defence': 3593, 'haraam': 3594, 'tez': 3595, 'hong': 3596, 'brother': 3597, 'majbut': 3598, 'hoa': 3599, 'nakal': 3600, 'lagaye': 3601, 'gareebon': 3602, 'bahano': 3603, 'prati': 3604, 'badmasho': 3605, 'napunsak': 3606, 'bha': 3607, 'rohtak': 3608, 'bhol': 3609, 'chaha': 3610, 'kanhaiya': 3611, 'kapur': 3612, 'gone': 3613, 'qoumi': 3614, 'karwaya': 3615, 'wise': 3616, 'golden': 3617, 'chahay': 3618, 'naqlee': 3619, '2016': 3620, 'bhogi': 3621, 'pakvs': 3622, 'paane': 3623, 'av': 3624, 'nitesh': 3625, 'chehra': 3626, 'janata': 3627, 'mou': 3628, 'mani': 3629, 'halaala': 3630, 'khullam': 3631, 'khulla': 3632, 'chilla': 3633, 'press': 3634, 'when': 3635, 'shahenshah': 3636, 'son': 3637, 'sony': 3638, 'chakke': 3639, 'chauke': 3640, 'seedha': 3641, 'btao': 3642, 'ghareeb': 3643, 'chidiya': 3644, 'kameeno': 3645, 'utre': 3646, 'kesi': 3647, 'chaaron': 3648, 'chore': 3649, 'tarakki': 3650, 'mardana': 3651, 'kamzori': 3652, 'supporting': 3653, 'batsman': 3654, 'para': 3655, 'manaa': 3656, 'bath': 3657, 'spectators': 3658, 'aplog': 3659, 'ukharda': 3660, 'sumjh': 3661, 'kahti': 3662, 'razdan': 3663, 'aasan': 3664, 'ansar': 3665, 'chain': 3666, 'sahee': 3667, 'bahiskar': 3668, 'tshirt': 3669, 'lahoriyon': 3670, 'trophy': 3671, 'upp': 3672, 'sochate': 3673, 'dhan': 3674, 'mitron': 3675, 'parh': 3676, 'moulavis': 3677, 'daag': 3678, 'dhul': 3679, 'bheer': 3680, 'members': 3681, 'ca': 3682, 'marke': 3683, 'neech': 3684, 'antaratma': 3685, 'high': 3686, 'manage': 3687, 'diwali': 3688, 'rishtedaar': 3689, 'katta': 3690, 'katega': 3691, 'dogs': 3692, 'bhen': 3693, 'nahen': 3694, 'panapne': 3695, 'milo': 3696, 'arti': 3697, 'dhamaka': 3698, 'laut': 3699, 'math': 3700, 'haseen': 3701, 'congressi': 3702, 'chiye': 3703, 'gold': 3704, 'varun': 3705, 'andaz': 3706, 'sazaa': 3707, 'poti': 3708, 'aggressive': 3709, 'government': 3710, 'buniyadi': 3711, 'samane': 3712, 'confused': 3713, 'jiv': 3714, 'rotiya': 3715, 'sek': 3716, 'khani': 3717, 'bann': 3718, 'tana': 3719, 'laude': 3720, 'kishan': 3721, 'sheikh': 3722, 'behano': 3723, 'dekte': 3724, 'chaal': 3725, 'sewa': 3726, 'makkhi': 3727, 'propaganda': 3728, 'smaj': 3729, 'ladd': 3730, 'earthquake': 3731, 'samajhti': 3732, 'ramayan': 3733, 'maanta': 3734, 'maha': 3735, 'proper': 3736, 'bahaal': 3737, 'rkhne': 3738, 'too': 3739, 'hated': 3740, 'detay': 3741, 'pelne': 3742, 'satte': 3743, 'cine': 3744, 'valladolid': 3745, 'spain': 3746, 'cio': 3747, 'desi': 3748, 'hottie': 3749, 'gatar': 3750, 'mita': 3751, 'ptards': 3752, 'bechari': 3753, 'bhumi': 3754, 'pednekar': 3755, 'shubh': 3756, 'mangal': 3757, 'khayal': 3758, 'ziyada': 3759, 'begaane': 3760, 'abdullah': 3761, 'vohi': 3762, 'kureeti': 3763, 'manhoos': 3764, 'raz': 3765, 'kra': 3766, '1947': 3767, 'abtak': 3768, 'hatyara': 3769, 'hasa': 3770, 'mast': 3771, 'nikhil': 3772, 'iddat': 3773, 'kadi': 3774, 'itihas': 3775, 'sec': 3776, 'tirange': 3777, 'harsh': 3778, 'hii': 3779, 'bharatiya': 3780, 'choli': 3781, 'peechhe': 3782, 'wahid': 3783, 'hahahaha': 3784, 'ghadi': 3785, 'bhir': 3786, 'heroin': 3787, 'kavi': 3788, 'abbe': 3789, 'comments': 3790, 'madhe': 3791, 'expert': 3792, 'vaar': 3793, 'jooth': 3794, 'seekhe': 3795, 'faraz': 3796, 'vapsi': 3797, 'aaise': 3798, 'betiyo': 3799, 'omar': 3800, 'khus': 3801, 'aachhe': 3802, 'dikhawa': 3803, 'reforms': 3804, 'samajhna': 3805, 'person': 3806, 'dhaam': 3807, 'pee': 3808, 'bistar': 3809, 'feminism': 3810, 'inspirational': 3811, 'liberty': 3812, 'bisi': 3813, 'charche': 3814, 'who': 3815, 'hint': 3816, 'thursday': 3817, '1975': 3818, 'hatane': 3819, 'khaney': 3820, 'ilawa': 3821, 'auqat': 3822, 'ura': 3823, 'fahrana': 3824, 'report': 3825, 'record': 3826, 'radhe': 3827, 'uruppinarhal': 3828, 'failana': 3829, 'lr': 3830, 'kachche': 3831, 'dosh': 3832, 'granth': 3833, 'slow': 3834, 'sent': 3835, 'bunn': 3836, 'mal': 3837, 'kabbadi': 3838, 'karva': 3839, 'mc': 3840, 'jeetay': 3841, 'related': 3842, 'masti': 3843, 'thodri': 3844, 'junoon': 3845, 'tumlog': 3846, 'dikhaoge': 3847, 'rakkha': 3848, 'kand': 3849, 'dikhai': 3850, 'deewani': 3851, 'kama': 3852, 'producers': 3853, 'harne': 3854, 'pro': 3855, 'patna': 3856, 'ronaq': 3857, 'remember': 3858, 'kabil': 3859, 'achchi': 3860, 'doodh': 3861, 'pran': 3862, 'tumhre': 3863, 'mental': 3864, 'millat': 3865, 'kamyab': 3866, 'bataye': 3867, 'sanyas': 3868, 'karwane': 3869, 'qanon': 3870, 'naqab': 3871, 'bikne': 3872, 'tarhan': 3873, 'laaye': 3874, 'anchors': 3875, 'ajao': 3876, 'ikhlaq': 3877, 'baa': 3878, 'alld': 3879, 'aaptards': 3880, 'arvind': 3881, 'sign': 3882, 'aqal': 3883, 'theke': 3884, 'aagar': 3885, 'release': 3886, 'pathhar': 3887, 'peetal': 3888, 'baaliyon': 3889, 'biyaah': 3890, 'stage': 3891, 'bajay': 3892, 'vandematram': 3893, 'kalpana': 3894, 'kheloge': 3895, 'mazhab': 3896, 'chae': 3897, 'dead': 3898, 'aagy': 3899, 'mooh': 3900, 'aajadi': 3901, 'network': 3902, 'badhao': 3903, 'sasti': 3904, 'chamcho': 3905, 'medal': 3906, 'paan': 3907, 'paish': 3908, 'anchoring': 3909, 'vaisae': 3910, 'fikar': 3911, 'rhte': 3912, 'carpet': 3913, 'factory': 3914, 'course': 3915, 'yogiji': 3916, '63': 3917, 'baj': 3918, 'uthata': 3919, 'filhal': 3920, 'tabah': 3921, 'rakhdiya': 3922, 'hamaray': 3923, 'phoolon': 3924, 'mehak': 3925, 'usweqt': 3926, 'kafeel': 3927, 'kshati': 3928, 'morning': 3929, 'flashback': 3930, 'shaid': 3931, 'dikhaye': 3932, 'lagegi': 3933, 'sarahah': 3934, 'v4': 3935, 'khudai': 3936, 'dalo': 3937, 'zahilo': 3938, 'sikho': 3939, 'kaaranamaaha': 3940, 'jyadaa': 3941, 'system': 3942, 'dekhti': 3943, 'chairman': 3944, 'hisa': 3945, 'farz': 3946, 'jimmedari': 3947, 'tt': 3948, 'shakt': 3949, 'unemployment': 3950, 'yuvi': 3951, 'keval': 3952, 'tej': 3953, 'commentry': 3954, 'aajaadi': 3955, 'hogai': 3956, 'b10': 3957, 'shyd': 3958, 'height': 3959, 'banenge': 3960, 'prakar': 3961, 'aham': 3962, 'concentrate': 3963, 'naaa': 3964, 'aurato': 3965, 'galati': 3966, 'economy': 3967, 'marna': 3968, 'lgta': 3969, 'criticize': 3970, 'vyapam': 3971, 'our': 3972, 'bahaali': 3973, 'kharch': 3974, 'rajkumar': 3975, 'haat': 3976, 'maangne': 3977, 'gauri': 3978, 'lankesh': 3979, 'while': 3980, 'twist': 3981, 'punjabi': 3982, 'harry': 3983, 'talaak': 3984, 'gyaa': 3985, 'due': 3986, 'jija': 3987, 'blog': 3988, 'ever': 3989, 'seen': 3990, 'kamaye': 3991, 'kamaya': 3992, 'seriously': 3993, 'kaamyaabi': 3994, 'tamam': 3995, 'jaen': 3996, 'ummide': 3997, 'kroge': 3998, 'mallya': 3999, 'dhavan': 4000, 'khaati': 4001, 'uskay': 4002, 'picnic': 4003, 'saboot': 4004, 'madarsa': 4005, 'takke': 4006, 'governance': 4007, 'amalan': 4008, 'baladasti': 4009, 'hakmiyat': 4010, 'qayim': 4011, 'bhid': 4012, 'pai': 4013, 'wagra': 4014, 'vemula': 4015, 'nadeem': 4016, 'orr': 4017, 'barf': 4018, 'hypocrite': 4019, 'disturb': 4020, 'bayaan': 4021, 'ravish': 4022, 'krwa': 4023, 'mery': 4024, 'hidden': 4025, 'molvi': 4026, 'ganna': 4027, 'ghumte': 4028, 'achche': 4029, 'gana': 4030, 'rich': 4031, 'pahucha': 4032, 'lynching': 4033, 'maths': 4034, 'parr': 4035, 'prachar': 4036, 'joker': 4037, '2002': 4038, 'sunaya': 4039, 'dekhke': 4040, 'rjd': 4041, 'jamane': 4042, 'kick': 4043, 'pirito': 4044, 'kooch': 4045, 'sukoon': 4046, 'bhakton': 4047, 'shock': 4048, 'khrab': 4049, 'ay': 4050, 'dila': 4051, 'pirit': 4052, 'bakaiti': 4053, 'aankho': 4054, 'hall': 4055, 'kasab': 4056, 'khila': 4057, 'condemn': 4058, 'aapiye': 4059, 'chutiyape': 4060, 'mutne': 4061, 'aapna': 4062, 'izhar': 4063, 'cover': 4064, 'soon': 4065, 'guess': 4066, 'oyeein': 4067, 'baksh': 4068, 'internet': 4069, 'kati': 4070, 'indoor': 4071, 'manega': 4072, 'hurriyat': 4073, 'utsav': 4074, 'karnama': 4075, 'haraya': 4076, 'javan': 4077, 'poll': 4078, 'evm': 4079, 'aayege': 4080, 'maaraa': 4081, 'agaya': 4082, 'aatma': 4083, 'tyohaar': 4084, 'shikhaya': 4085, 'aalawa': 4086, 'faith': 4087, 'adhikar': 4088, 'wahe': 4089, 'rakshak': 4090, 'aurate': 4091, 'nind': 4092, 'stance': 4093, 'mns': 4094, 'mulki': 4095, 'mafaad': 4096, 'lambi': 4097, 'naari': 4098, 'bolney': 4099, 'pahan': 4100, 'episode': 4101, 'aakhri': 4102, 'karde': 4103, 'aakash': 4104, 'shubhkamnaye': 4105, 'mean': 4106, 'chhodkar': 4107, 'pahli': 4108, 'sheesh': 4109, 'feeling': 4110, 'esko': 4111, 'kesa': 4112, 'dars': 4113, 'ziada': 4114, 'dhayan': 4115, 'roza': 4116, 'haj': 4117, 'zakat': 4118, 'baghair': 4119, 'hifazat': 4120, 'samaz': 4121, 'arz': 4122, 'central': 4123, 'kuran': 4124, 'luta': 4125, 'arun': 4126, '41': 4127, 'pv': 4128, '57': 4129, '43': 4130, 'padhao': 4131, 'mithali': 4132, 'dilli': 4133, 'mukt': 4134, 'mille': 4135, 'my': 4136, 'much': 4137, 'speak': 4138, 'piles': 4139, 'kaala': 4140, 'mahaz': 4141, 'faf': 4142, 'plessis': 4143, 'simran': 4144, 'nana': 4145, 'ladna': 4146, 'icha': 4147, 'hinsa': 4148, 'naadaan': 4149, 'balatkar': 4150, 'sharafat': 4151, 'drop': 4152, 'tiranga': 4153, 'sochi': 4154, 'dehshat': 4155, 'sweet': 4156, 'rajinaama': 4157, 'badle': 4158, 'bologe': 4159, 'khairiyat': 4160, 'mustafa': 4161, 'moqa': 4162, 'jhandu': 4163, 'anirudha': 4164, '59': 4165, 'charitra': 4166, 'taubah': 4167, 'bolete': 4168, 'ameer': 4169, 'spin': 4170, 'rho': 4171, 'asar': 4172, 'yet': 4173, 'marathi': 4174, 'subject': 4175, 'hadsa': 4176, 'dwara': 4177, 'arabi': 4178, 'fest': 4179, 'honey': 4180, 'if': 4181, 'misaal': 4182, 'achay': 4183, 'looo': 4184, 'poatti': 4185, 'past': 4186, 'majboori': 4187, 'bhosdi': 4188, 'temple': 4189, 'indra': 4190, 'skti': 4191, 'hogaa': 4192, 'jarsi': 4193, 'kabhie': 4194, 'raavan': 4195, 'mohan': 4196, 'tadka': 4197, 'rozgaar': 4198, 'albata': 4199, 'jadojehad': 4200, 'rahoonga': 4201, 'kiyoun': 4202, 'alam': 4203, 'karage': 4204, 'pehly': 4205, 'boss': 4206, 'yadi': 4207, 'diyan': 4208, 'kahain': 4209, 'aamar': 4210, 'aami': 4211, 'aar': 4212, 'islamabad': 4213, 'cause': 4214, 'bhand': 4215, 'policy': 4216, 'taiyaar': 4217, 'bazaar': 4218, 'er': 4219, 'dedicated': 4220, 'akram': 4221, 'disha': 4222, 'fauji': 4223, 'ayub': 4224, 'aah': 4225, 'gunaah': 4226, 'jeena': 4227, 'add': 4228, 'scene': 4229, 'hwa': 4230, 'chle': 4231, '11hour': 4232, 'rakay': 4233, 'presstitute': 4234, 'partiyon': 4235, 'jha': 4236, 'awasthi': 4237, 'sharad': 4238, 'parast': 4239, 'dand': 4240, 'suno': 4241, 'siti': 4242, 'aarzoo': 4243, 'aimplb': 4244, 'nark': 4245, 'apani': 4246, 'asur': 4247, 'similarity': 4248, 'property': 4249, 'rozi': 4250, 'kyuon': 4251, 'jaisy': 4252, 'competition': 4253, 'lata': 4254, 'raste': 4255, 'roshni': 4256, 'girna': 4257, 'parta': 4258, 'tustikaran': 4259, 'fat': 4260, 'israel': 4261, 'jnu': 4262, 'kamaal': 4263, 'mamle': 4264, 'register': 4265, 'publicity': 4266, 'ayi': 4267, 'dhabba': 4268, 'tyar': 4269, 'honest': 4270, 'deewane': 4271, 'car': 4272, 'baski': 4273, 'laagu': 4274, 'swimming': 4275, 'pool': 4276, 'healthy': 4277, 'chura': 4278, 'soo': 4279, 'logical': 4280, 'medical': 4281, 'prapt': 4282, 'dabaya': 4283, 'kir': 4284, '2020': 4285, 'haarne': 4286, 'point': 4287, 'eik': 4288, 'abhee': 4289, 'khilen': 4290, 'bazee': 4291, 'katrena': 4292, 'chalay': 4293, 'chalate': 4294, 'bhalai': 4295, 'rakhkar': 4296, 'milkar': 4297, 'uljhe': 4298, 'hello': 4299, 'quota': 4300, 'bewaqoof': 4301, 'ghante': 4302, 'khatma': 4303, 'condition': 4304, 'ashamed': 4305, 'bolnewale': 4306, 'womens': 4307, 'trust': 4308, 'dilao': 4309, 'shauq': 4310, 'kahich': 4311, 'bhulo': 4312, 'dikhata': 4313, 'parde': 4314, 'samjhe': 4315, 'ginti': 4316, 'hav': 4317, 'bachu': 4318, 'celebrate': 4319, 'maara': 4320, 'taal': 4321, 'singing': 4322, 'paaoge': 4323, 'barbaadi': 4324, 'talented': 4325, 'abuse': 4326, 'agent': 4327, 'bhaiyaa': 4328, 'beemari': 4329, 'upa': 4330, 'bhushan': 4331, 'kerne': 4332, 'maafi': 4333, 'bans': 4334, 'roots': 4335, 'intazaar': 4336, 'lapet': 4337, 'badhti': 4338, 'psl': 4339, 'ghany': 4340, 'suhany': 4341, 'chaon': 4342, 'chirhakty': 4343, 'bor': 4344, 'chatnar': 4345, 'ashjar': 4346, 'haaye': 4347, 'bacchi': 4348, 'teeno': 4349, 'samajhte': 4350, 'jamhoriyat': 4351, 'faisle': 4352, 'nationalist': 4353, 'insano': 4354, 'jaanch': 4355, 'elections': 4356, 'article': 4357, 'sakate': 4358, '12th': 4359, 'crease': 4360, 'pahunchaate': 4361, 'glad': 4362, 'naukar': 4363, 'javed': 4364, 'muhsin': 4365, 'maun': 4366, 'jar': 4367, 'viswas': 4368, 'difference': 4369, 'subhash': 4370, 'ghai': 4371, 'sets': 4372, 'moke': 4373, '54': 4374, 'karata': 4375, 'kareina': 4376, 'wahiyat': 4377, 'ghuma': 4378, 'chheen': 4379, 'lee': 4380, 'dimple': 4381, 'naraz': 4382, 'inaam': 4383, 'saved': 4384, 'subsidy': 4385, 'behal': 4386, 'shan': 4387, 'raise': 4388, 'paid': 4389, 'dubara': 4390, 'imandari': 4391, 'solution': 4392, 'julm': 4393, 'destroy': 4394, 'sharifs': 4395, 'position': 4396, 'anpad': 4397, 'ganwar': 4398, 'zor': 4399, 'karana': 4400, 'chahate': 4401, 'lallu': 4402, 'baht': 4403, 'ladkiya': 4404, 'ivs': 4405, 'iv': 4406, 'bolke': 4407, 'jhagde': 4408, 'rastriya': 4409, 'sau': 4410, 'tareeqon': 4411, 'dramay': 4412, 'champion': 4413, 'tulna': 4414, 'chate': 4415, 'dya': 4416, 'incident': 4417, 'hamesa': 4418, 'baney': 4419, 'bcci': 4420, 'sunanda': 4421, 'rawayya': 4422, 'gee': 4423, 'karwahi': 4424, 'taarif': 4425, 'peeth': 4426, 'lord': 4427, 'chaturthi2017': 4428, 'ramchandra': 4429, 'siya': 4430, 'duryodhano': 4431, 'maaro': 4432, 'bhim': 4433, 'organization': 4434, 'waqas': 4435, 'exposed': 4436, 'em': 4437, 'ledhu': 4438, 'asalu': 4439, 'hag': 4440, 'darubandi': 4441, 'yunhi': 4442, 'killings': 4443, 'gaddhe': 4444, 'dinesh': 4445, 'faraq': 4446, 'hojayega': 4447, 'unhi': 4448, 'dancer': 4449, 'fikr': 4450, 'tel': 4451, 'harayana': 4452, 'stalking': 4453, 'hazaaron': 4454, 'chaah': 4455, 'mainn': 4456, 'phaltu': 4457, 'peete': 4458, 'poster': 4459, 'bhijwaya': 4460, 'hiii': 4461, 'gaon': 4462, 'premi': 4463, 'zyaada': 4464, 'dikhe': 4465, 'ghume': 4466, 'nishana': 4467, 'pora': 4468, 'sikhana': 4469, 'katputli': 4470, 'hataye': 4471, 'sutta': 4472, 'parade': 4473, 'hogy': 4474, 'anjam': 4475, 'lagane': 4476, 'khota': 4477, 'tika': 4478, 'maarna': 4479, 'dahaad': 4480, 'bheta': 4481, 'mut': 4482, 'sagi': 4483, 'ummeed': 4484, 'deshse': 4485, 'abhibhi': 4486, 'atyachari': 4487, 'manane': 4488, 'health': 4489, 'sindhu': 4490, 'bhulte': 4491, 'mi': 4492, 'relation': 4493, 'lal': 4494, 'jarrori': 4495, 'moujood': 4496, 'macha': 4497, 'phd': 4498, 'separatist': 4499, 'rupay': 4500, 'lado': 4501, 'shiv': 4502, 'north': 4503, 'aagaye': 4504, 'shoot': 4505, 'hijab': 4506, 'jii': 4507, 'bannu': 4508, 'mian': 4509, 'wafa': 4510, 'local': 4511, 'ilzam': 4512, 'tbhi': 4513, 'gaanay': 4514, 'kshatriya': 4515, 'khadi': 4516, 'abd': 4517, 'villier': 4518, 'kanya': 4519, 'aese': 4520, 'padne': 4521, 'rat': 4522, 'itihaas': 4523, 'nange': 4524, 'chodh': 4525, 'harbhajan': 4526, 'pesa': 4527, 'successful': 4528, 'hia': 4529, 'angrezo': 4530, 'baradari': 4531, 'yakjhati': 4532, 'rameez': 4533, 'afsos': 4534, 'kabiliyat': 4535, 'esliye': 4536, 'karar': 4537, 'saayad': 4538, 'accidents': 4539, 'note': 4540, 'chhap': 4541, 'janay': 4542, 'lein': 4543, 'sirjee': 4544, 'funding': 4545, 'bot': 4546, 'put': 4547, 'badlaw': 4548, 'jasoda': 4549, 'parosiyon': 4550, 'shobha': 4551, 'tuesday': 4552, 'kung': 4553, 'fu': 4554, 'dilwayenge': 4555, 'bhaji': 4556, '03': 4557, 'yee': 4558, 'saktey': 4559, 'hungama': 4560, 'whi': 4561, 'doge': 4562, 'out': 4563, 'ey': 4564, 'chahye': 4565, 'bahadur': 4566, 'kahna': 4567, 'hari': 4568, 'gas': 4569, 'kamm': 4570, 'simit': 4571, 'hotaa': 4572, 'bakchodiya': 4573, 'hashmi': 4574, 'stardom': 4575, 'charan': 4576, 'murgi': 4577, 'pait': 4578, 'leela': 4579, 'halaat': 4580, 'bakro': 4581, 'study': 4582, 'jayengi': 4583, 'soyi': 4584, 'dikhtee': 4585, 'rk': 4586, 'paribhasha': 4587, 'samjaho': 4588, 'single': 4589, 'enko': 4590, 'char': 4591, 'barten': 4592, 'standard': 4593, 'badhane': 4594, 'chutiapa': 4595, 'coll': 4596, 'chee': 4597, 'marni': 4598, 'birth': 4599, 'yu': 4600, 'genuine': 4601, 'dilata': 4602, 'sudharne': 4603, 'knock': 4604, 'rangeela': 4605, 'jabardast': 4606, 'chahunga': 4607, 'lagatar': 4608, 'abh': 4609, 'babaji': 4610, 'gunah': 4611, 'jhuti': 4612, 'failate': 4613, 'parveen': 4614, 'sainikon': 4615, 'friendship': 4616, 'shastri': 4617, 'bandar': 4618, 'gulati': 4619, 'jaaney': 4620, 'kaamyab': 4621, 'kaka': 4622, 'bewkuf': 4623, 'khudd': 4624, 'metter': 4625, 'kodak': 4626, 'chata': 4627, 'parha': 4628, 'tyagi': 4629, 'vishwanathan': 4630, '24': 4631, 'koushik': 4632, 'bul': 4633, 'mohammad': 4634, 'laasho': 4635, 'chupane': 4636, 'troll': 4637, 'andha': 4638, 'rekha': 4639, 'such': 4640, 'sochne': 4641, 'progress': 4642, 'mother': 4643, 'proof': 4644, 'behtareen': 4645, 'dekhnay': 4646, 'balatkari': 4647, 'mary': 4648, 'tennis': 4649, 'dev': 4650, 'banaa': 4651, 'pol': 4652, 'rag': 4653, 'aaega': 4654, 'th': 4655, 'jazba': 4656, 'khreedte': 4657, 'legya': 4658, 'tahum': 4659, 'kaand': 4660, 'aib': 4661, 'chode': 4662, 'khaa': 4663, 'tendulkar': 4664, 'kiski': 4665, 'samil': 4666, 'bhejna': 4667, 'atka': 4668, 'negi': 4669, 'zahoor': 4670, 'faroqui': 4671, 'parents': 4672, 'hatt': 4673, 'vadodara': 4674, 'tuj': 4675, 'shak': 4676, 'villain': 4677, 'pandito': 4678, 'poocho': 4679, 'rage': 4680, 'anwar': 4681, 'cont': 4682, 'hajam': 4683, 'electricity': 4684, 'mashwara': 4685, 'bhopal': 4686, 'rockstar': 4687, 'khasam': 4688, 'meinn': 4689, 'jihaad': 4690, 'read': 4691, 'quraan': 4692, 'promote': 4693, 'siyasi': 4694, 'sabki': 4695, 'galli': 4696, 'deya': 4697, 'vikash': 4698, 'ahmad': 4699, 'hijde': 4700, 'dusara': 4701, 'tasweer': 4702, 'khilona': 4703, 'takkar': 4704, 'int': 4705, '420': 4706, 'chlna': 4707, 'gareebo': 4708, 'bhoke': 4709, 'lamba': 4710, 'atankwadi': 4711, 'chah': 4712, 'raaste': 4713, 'qabza': 4714, 'muqabla': 4715, 'kejariwal': 4716, 'khaya': 4717, 'kun': 4718, 'shrey': 4719, 'chennai': 4720, 'bhansali': 4721, 'lao': 4722, 'morality': 4723, 'moral': 4724, 'nayapalika': 4725, 'chamak': 4726, 'asan': 4727, 'fens': 4728, 'khatir': 4729, 'arshad': 4730, 'warsi': 4731, 'hamla': 4732, 'studio': 4733, 'mohammed': 4734, 'vishesh': 4735, 'dhara': 4736, 'waat': 4737, 'lollywood': 4738, 'retweet': 4739, 'riot': 4740, 'sallu': 4741, 'bhaari': 4742, 'grt': 4743, 'ari': 4744, 'miyan': 4745, 'ekdam': 4746, 'phateechar': 4747, 'saaray': 4748, 'hooron': 4749, 'ilzaam': 4750, '5th': 4751, 'farig': 4752, 'acharya': 4753, 'jhuthe': 4754, 'pucha': 4755, 'plan': 4756, '229': 4757, 'wahab': 4758, 'sachi': 4759, 'jhooti': 4760, 'jashan': 4761, 'giri': 4762, 'kartoote': 4763, 'sunaina': 4764, 'wave': 4765, 'boh': 4766, 'peshab': 4767, 'porkistani': 4768, 'batata': 4769, 'ullooraatchi': 4770, 'manra': 4771, 'therthal': 4772, 'thiruttha': 4773, 'sattatthil': 4774, 'sapaanaayahar': 4775, 'kaichchaatthu': 4776, 'dharya': 4777, 'akhir': 4778, 'ajab': 4779, 'bachi': 4780, 'wade': 4781, 'nibate': 4782, 'shor': 4783, 'ended': 4784, 'mazahiya': 4785, 'vyang': 4786, 'masheen': 4787, 'banayen': 4788, 'tumhein': 4789, 'saathi': 4790, 'barri': 4791, 'rahman': 4792, 'kamai': 4793, 'bajah': 4794, 'chalna': 4795, 'keemat': 4796, 'antar': 4797, 'gane': 4798, 'convert': 4799, 'unchi': 4800, 'sandeep': 4801, 'randi': 4802, 'dad': 4803, 'gond': 4804, 'islye': 4805, 'hrr': 4806, 'lgte': 4807, 'next': 4808, 'tohhh': 4809, 'apun': 4810, 'shaq': 4811, 'rpt': 4812, 'safi': 4813, 'faaltu': 4814, 'dainai': 4815, 'bimari': 4816, 'kalank': 4817, 'container': 4818, 'laane': 4819, 'tarha': 4820, 'patwari': 4821, 'kutty': 4822, 'bachav': 4823, 'bhae': 4824, 'shabash': 4825, 'azan': 4826, 'petition': 4827, 'diwane': 4828, 'principal': 4829, 'taare': 4830, '56': 4831, 'nikalo': 4832, 'dub': 4833, 'shikshamitra': 4834, 'tution': 4835, 'aade': 4836, '07': 4837, 'peechay': 4838, 'hotey': 4839, 'bhoot': 4840, 'chappal': 4841, 'msg': 4842, 'batayi': 4843, 'aaenge': 4844, 'lagataar': 4845, 'betiyon': 4846, 'vaad': 4847, 'noor': 4848, '50000': 4849, 'najeeb': 4850, 'zalmi': 4851, 'casteism': 4852, 'ghulaam': 4853, 'haramkhor': 4854, 'peso': 4855, 'ba': 4856, 'kbi': 4857, 'keere': 4858, 'ladko': 4859, 'ussi': 4860, 'muze': 4861, 'bareme': 4862, 'aishwarya': 4863, 'teh': 4864, 'fantasies': 4865, 'puche': 4866, 'suraj': 4867, 'bhadve': 4868, 'jamat': 4869, 'rasheed': 4870, 'killing': 4871, 'chuky': 4872, 'dilaye': 4873, 'review': 4874, 'fayeda': 4875, 'tarike': 4876, 'sochen': 4877, 'sanchita': 4878, 'congres': 4879, 'banayi': 4880, 'bla': 4881, 'nikalte': 4882, 'leadaro': 4883, 'krishna': 4884, 'l2017': 4885, 'ach': 4886, 'jiyo': 4887, 'bachegi': 4888, 'badhkar': 4889, 'rishto': 4890, 'rajdeep': 4891, 'abay': 4892, 'konsa': 4893, 'honga': 4894, 'manch': 4895, 'shru': 4896, 'tickets': 4897, 'baji': 4898, 'dhanya': 4899, 'russia': 4900, 'pashto': 4901, 'lolz': 4902, 'mashallah': 4903, 'ed': 4904, 'gew': 4905, 'liberal': 4906, 'celebration': 4907, 'celebrity': 4908, 'artist': 4909, 'tayaar': 4910, 'dharmo': 4911, 'ignore': 4912, 'konsi': 4913, 'khoob': 4914, 'pakistanis': 4915, 'death': 4916, 'association': 4917, 'bhrastachar': 4918, 'choose': 4919, 'decent': 4920, 'daikha': 4921, 'utpidan': 4922, 'abolish': 4923, 'bahane': 4924, 'account': 4925, 'language': 4926, 'hangama': 4927, 'types': 4928, 'chotay': 4929, 'janey': 4930, 'geeta': 4931, 'mahabharata': 4932, 'chowk': 4933, 'understand': 4934, 'gurinchi': 4935, 'sambit': 4936, 'pese': 4937, 'tabsara': 4938, 'sunny': 4939, 'bhains': 4940, 'inaka': 4941, 'tapori': 4942, 'pir': 4943, 'awww': 4944, 'deez': 4945, 'atif': 4946, 'aslam': 4947, 'talking': 4948, 'jiss': 4949, 'sunwai': 4950, 'loo': 4951, 'zani': 4952, 'jangal': 4953, 'eh': 4954, 'vyapari': 4955, 'siva': 4956, 'hindustaan': 4957, 'deepika': 4958, 'marriages': 4959, 'khaane': 4960, 'mecca': 4961, 'tanz': 4962, 'rupey': 4963, 'deto': 4964, 'kissan': 4965, 'rp': 4966, 'suresh': 4967, 'aree': 4968, 'sleep': 4969, 'lagenge': 4970, 'pig': 4971, 'traffic': 4972, 'kahavat': 4973, 'kahawat': 4974, 'tell': 4975, 'involved': 4976, 'santo': 4977, 'afzal': 4978, 'gein': 4979, 'hands': 4980, 'ghorakhpur': 4981, 'pichhe': 4982, 'dishkyaao': 4983, 'bakree': 4984, 'khet': 4985, 'express': 4986, 'satire': 4987, 'paaun': 4988, 'payal': 4989, 'batt': 4990, 'pyara': 4991, 'dhol': 4992, 'navratri': 4993, 'handsome': 4994, 'visits': 4995, 'pandal': 4996, 'ras': 4997, 'kavita': 4998, 'wahn': 4999, 'seekhay': 5000, 'huu': 5001, 'select': 5002, 'madarso': 5003, 'bh': 5004, 'laate': 5005, 'joote': 5006, 'heroes': 5007, 'qatl': 5008, 'calculator': 5009, 'pandit': 5010, 'chaiya': 5011, 'tajziya': 5012, 'rakhiye': 5013, 'cylinder': 5014, 'price': 5015, 'jalti': 5016, 'samjhana': 5017, '1974': 5018, 'involve': 5019, 'sahe': 5020, 'howa': 5021, 'sis': 5022, 'harte': 5023, 'pul': 5024, 'mukhya': 5025, 'kripa': 5026, 'gaurakshaks': 5027, 'krege': 5028, 'baari': 5029, 'chauthi': 5030, 'hanging': 5031, 'siddhartha': 5032, 'haramkhoro': 5033, 'andhe': 5034, 'babao': 5035, 'taqat': 5036, 'khangress': 5037, 'bhishab': 5038, 'ghor': 5039, 'rocks': 5040, 'shikaar': 5041, 'andaza': 5042, 'akbar': 5043, 'siwa': 5044, 'buniyaad': 5045, 'maulvi': 5046, 'lug': 5047, 'jadeja': 5048, 'bhaav': 5049, 'mulko': 5050, 'nirbhaya': 5051, 'pant': 5052, 'dhoondh': 5053, 'rep': 5054, 'murder': 5055, 'assalam': 5056, 'cheeze': 5057, 'comment': 5058, 'barey': 5059, 'andhera': 5060, 'memories': 5061, 'bacchon': 5062, 'sochenge': 5063, 'kone': 5064, 'launda': 5065, 'bass': 5066, 'khalte': 5067, 'nasib': 5068, 'malhotra': 5069, 'capton': 5070, 'hamrah': 5071, 'maweshi': 5072, 'mandi': 5073, 'puhanch': 5074, 'written': 5075, 'zabardasti': 5076, 'ghus': 5077, 'aware': 5078, 'uplabdhi': 5079, 'jimma': 5080, 'button': 5081, 'adha': 5082, 'zahil': 5083, 'batayenge': 5084, 'bikau': 5085, 'rept': 5086, 'gow': 5087, 'koie': 5088, 'ullitta': 5089, 'raajinaama': 5090, 'pidita': 5091, 'random': 5092, 'khor': 5093, 'bivi': 5094, 'adhikaar': 5095, 'machaya': 5096, 'badminton': 5097, 'viral': 5098, 'ghotala': 5099, 'arrest': 5100, 'gira': 5101, 'tah': 5102, 'katam': 5103, 'deer': 5104, 'samasya': 5105, 'bich': 5106, 'ser': 5107, 'mukabla': 5108, 'kijiyega': 5109, 'nahiiii': 5110, 'meetha': 5111, 'parda': 5112, 'hitter': 5113, 'roka': 5114, 'dheela': 5115, 'lutne': 5116, 'payege': 5117, 'kahaan': 5118, 'dhire': 5119, 'sikchhit': 5120, 'bhawnao': 5121, 'awareness': 5122, 'jaisay': 5123, 'darate': 5124, 'picha': 5125, 'book': 5126, 'room': 5127, 'deewaron': 5128, 'sufism': 5129, 'jaativad': 5130, 'pehchaan': 5131, 'himesh': 5132, 'horaha': 5133, 'faad': 5134, 'bah': 5135, 'bhonkne': 5136, 'mathula': 5137, 'bachaya': 5138, 'pahchaan': 5139, 'filthy': 5140, 'kale': 5141, 'throw': 5142, 'khatron': 5143, 'jhoot': 5144, 'munna': 5145, 'vida': 5146, 'hasti': 5147, 'mitati': 5148, 'diwas': 5149, 'bin': 5150, 'payenge': 5151, 'maang': 5152, 'sansar': 5153, 'rana': 5154, 'anhoni': 5155, 'bhang': 5156, 'purana': 5157, 'ms': 5158, 'secular': 5159, 'vishay': 5160, 'tyala': 5161, 'after': 5162, 'nabi': 5163, 'kaur': 5164, 'keu': 5165, 'karney': 5166, 'fenkne': 5167, 'darhi': 5168, 'gaa': 5169, 'sabke': 5170, 'io': 5171, 'mulkon': 5172, 'won': 5173, 'ankho': 5174, 'doosro': 5175, 'title': 5176, 'something': 5177, 'changed': 5178, 'harshvardhan': 5179, 'musharraf': 5180, 'towards': 5181, 'nate': 5182, 'aay': 5183, 'jaspit': 5184, 'bhumra': 5185, 'kameena': 5186, 'pakar': 5187, 'diyo': 5188, 'dikhati': 5189, 'rehman': 5190, 'chiti': 5191, 'kaheen': 5192, 'surname': 5193, 'jaankar': 5194, 'retirement': 5195, 'eng': 5196, 'bulaya': 5197, 'st': 5198, 'lallan': 5199, 'dheere': 5200, 'dont': 5201, 'baraste': 5202, 'sikhaoge': 5203, 'naseeb': 5204, 'friends': 5205, 'mjhe': 5206, 'taraqi': 5207, 'script': 5208, 'dhoom': 5209, 'visited': 5210, 'dd': 5211, 'according': 5212, 'mitane': 5213, 'doglapan': 5214, 'cheezon': 5215, 'katuwe': 5216, 'ramdev': 5217, 'ese': 5218, 'trailer': 5219, '1962': 5220, 'gadu': 5221, 'waste': 5222, 'path': 5223, 'devendra': 5224, 'kos': 5225, 'aafat': 5226, 'medaan': 5227, 'muza': 5228, 'muj': 5229, 'shariya': 5230, 'dhong': 5231, 'gol': 5232, 'tarh': 5233, 'deals': 5234, 'bazar': 5235, 'awards': 5236, 'maah': 5237, 'gaayab': 5238, 'itta': 5239, 'jaada': 5240, 'hv': 5241, 'attract': 5242, 'bowling': 5243, 'general': 5244, 'idary': 5245, 'dikhti': 5246, 'needs': 5247, 'hur': 5248, 'fela': 5249, 'waje': 5250, 'sat': 5251, 'aadi': 5252, 'mot': 5253, 'kero': 5254, 'chhori': 5255, '71': 5256, 'traf': 5257, 'ahishta': 5258, 'gaytards': 5259, 'moo': 5260, 'america': 5261, '2007': 5262, 'cahiye': 5263, 'manate': 5264, 'jeb': 5265, 'result': 5266, 'bahno': 5267, 'secure': 5268, 'evergreen': 5269, 'chhodo': 5270, 'harana': 5271, 'yahe': 5272, 'beh': 5273, 'tank': 5274, 'ammo': 5275, 'style': 5276, 'ankhon': 5277, 'nirdosh': 5278, '42': 5279, 'waqiyaat': 5280, 'blah': 5281, 'karka': 5282, 'hatana': 5283, 'kushal': 5284, 'dayaa': 5285, 'sahuliyat': 5286, 'adat': 5287, 'velaq': 5288, 'velak': 5289, 'aayo': 5290, 'ashleel': 5291, 'tankhwa': 5292, 'chahtey': 5293, 'darling': 5294, 'machhar': 5295, 'wajha': 5296, 'phursat': 5297, 'shaan': 5298, 'jano': 5299, 'shaheb': 5300, 'gujju': 5301, 'gore': 5302, 'bhrashtachar': 5303, 'gan': 5304, 'khiladiyo': 5305, 'aiims': 5306, 'maulavi': 5307, 'recommend': 5308, 'sansad': 5309, 'ghabrata': 5310, 'chalnewali': 5311, 'anshan': 5312, 'lagayega': 5313, 'ghatya': 5314, 'bulle': 5315, 'chaupat': 5316, 'kiyaa': 5317, 'paaye': 5318, 'kejrival': 5319, 'babua': 5320, 'ess': 5321, 'ashish': 5322, 'nehra': 5323, 'ambani': 5324, 'baithna': 5325, 'chamkane': 5326, 'jama': 5327, 'karli': 5328, 'dhau': 5329, 'fa': 5330, 'jhagda': 5331, 'gharma': 5332, 'sehri': 5333, 'sonay': 5334, 'area': 5335, 'thrones': 5336, 'mayweather': 5337, 'prabhudeva': 5338, 'deva': 5339, 'sagarika': 5340, 'layiye': 5341, 'banwaiye': 5342, 'district': 5343, 'bag': 5344, 'aha': 5345, 'bolchhi': 5346, 'gals': 5347, 'milgai': 5348, 'likhai': 5349, 'farooq': 5350, 'chupi': 5351, 'berehmi': 5352, 'atyachaar': 5353, 'nangapan': 5354, 'paon': 5355, 'jatey': 5356, 'airtel': 5357, 'come': 5358, 'ghotalebaaj': 5359, 'ekathe': 5360, 'kyki': 5361, 'kathm': 5362, 'enka': 5363, 'hawale': 5364, 'fr': 5365, 'badalta': 5366, 'business': 5367, 'dhiru': 5368, 'ru': 5369, 'bashirhat': 5370, 'bachaa': 5371, 'your': 5372, 'welcome': 5373, 'bhayya': 5374, 'suvidha': 5375, 'ush': 5376, 'rma': 5377, 'borders': 5378, 'rahye': 5379, 'rukne': 5380, 'mahangi': 5381, 'rupaye': 5382, 'badhwane': 5383, 'bandhwate': 5384, 'btana': 5385, 'jitane': 5386, 'bahutdete': 5387, 'angrezi': 5388, '25kmpl': 5389, 'chhamta': 5390, 'yaisa': 5391, 'sarir': 5392, 'sif': 5393, 'awal': 5394, 'daar': 5395, 'wesa': 5396, 'natalie': 5397, 'molavion': 5398, 'bakvas': 5399, 'payenga': 5400, 'originality': 5401, 'recycle': 5402, 'viewership': 5403, 'ashes': 5404, 'imfact': 5405, 'wc': 5406, 'boards': 5407, '330': 5408, 'chitt': 5409, 'patt': 5410, 'gamvir': 5411, 'durgati': 5412, 'bakcho': 5413, 'ghisi': 5414, 'piti': 5415, 'ghasit': 5416, 'ismain': 5417, 'baly': 5418, 'talokaat': 5419, 'saka': 5420, 'fattuon': 5421, 'tarek': 5422, 'swachhta': 5423, 'elegy': 5424, 'kaab': 5425, 'khujkiwal': 5426, 'masoomo': 5427, 'gaav': 5428, 'baburaav': 5429, 'naav': 5430, 'samje': 5431, 'bhanja': 5432, 'jk': 5433, 'lk': 5434, 'wirodhi': 5435, 'lagaate': 5436, 'wagaira': 5437, 'fix': 5438, 'milibhagat': 5439, 'jina': 5440, 'raas': 5441, 'ghatnayein': 5442, 'hardin': 5443, 'sex': 5444, 'chaand': 5445, 'vyvastha': 5446, 'reporting': 5447, 'nirichhan': 5448, 'videocon': 5449, 'd2h': 5450, 'lajwaab': 5451, 'ayesha': 5452, 'gulu': 5453, 'haneef': 5454, 'drug': 5455, 'andr': 5456, 'asshole': 5457, 'saas': 5458, 'rago': 5459, 'dava': 5460, 'gilli': 5461, 'danda': 5462, 'galiyon': 5463, 'ladhai': 5464, 'topiwaloki': 5465, 'badhali': 5466, 'badimaanki': 5467, 'mazlum': 5468, 'darindegiki': 5469, 'tatte': 5470, 'minakshi': 5471, 'llb': 5472, 'aayengi': 5473, 'kisika': 5474, 'hanan': 5475, 'aarkshan': 5476, 'gatib': 5477, 'genral': 5478, 'vardan': 5479, 'waiqi': 5480, 'insaniyt': 5481, 'ehtesab': 5482, 'hathiyar': 5483, 'giya': 5484, 'hareef': 5485, 'istamal': 5486, 'followers': 5487, 'agly': 5488, 'maich': 5489, 'hemari': 5490, 'geyi': 5491, 'nuqsaan': 5492, 'burey': 5493, 'tabtak': 5494, 'lagne': 5495, 'ajinkya': 5496, 'khamoosh': 5497, 'ulte': 5498, 'chuthiye': 5499, 'assurance': 5500, 'ghanto': 5501, 'window': 5502, 'thoalvigalin': 5503, 'shaamat': 5504, 'torha': 5505, 'kochi': 5506, 'aval': 5507, 'banay': 5508, 'jub': 5509, 'mazloomeyat': 5510, 'darama': 5511, 'khalee': 5512, 'guwa': 5513, 'rupee': 5514, 'bagher': 5515, 'bekaar': 5516, 'pheeki': 5517, 'karengein': 5518, 'majaak': 5519, 'aaur': 5520, 'pvt': 5521, '153': 5522, 'fansi': 5523, 'kardete': 5524, 'rapiest': 5525, 'karwaana': 5526, 'prachaar': 5527, 'tmhari': 5528, 'tmhe': 5529, 'aaegi': 5530, 'achhae': 5531, 'burae': 5532, 'zyan': 5533, 'namaaz': 5534, 'gurbani': 5535, 'ilaha': 5536, 'illillah': 5537, 'prokabbadi': 5538, 'ghairat': 5539, 'emaani': 5540, 'jagay': 5541, 'holes': 5542, 'jute': 5543, 'bhigo': 5544, 'kaata': 5545, 'lulu': 5546, 'darana': 5547, 'furogh': 5548, 'chauka': 5549, 'attha': 5550, 'faansi': 5551, 'dhikaar': 5552, 'anura': 5553, 'gb': 5554, 'had': 5555, 'oriya': 5556, 'shaher': 5557, 'sahayata': 5558, 'gaw': 5559, 'rakkhako': 5560, 'gundai': 5561, 'license': 5562, 'kahun': 5563, 'rishvat': 5564, 'khori': 5565, 'guzara': 5566, 'tareeka': 5567, 'seekhiye': 5568, 'vasundhra': 5569, 'jato': 5570, 'lose': 5571, 'bachoon': 5572, 'naak': 5573, 'kariyoo': 5574, 'ganza': 5575, 'mairay': 5576, 'tweeting': 5577, 'gayai': 5578, 'pliz': 5579, 'khichdi': 5580, 'dharms': 5581, 'badtameej': 5582, 'thar': 5583, 'banata': 5584, 'partha': 5585, 'rodiwadi': 5586, 'badha': 5587, 'andhi': 5588, 'avashyak': 5589, 'amrapali': 5590, 'buyers': 5591, 'status': 5592, 'faide': 5593, 'mrwa': 5594, 'weshya': 5595, 'swatantrata': 5596, 'kark': 5597, 'importance': 5598, 'shivpal': 5599, 'baatne': 5600, 'sudhaaro': 5601, 'masum': 5602, 'katlustani': 5603, 'chinua': 5604, 'iphone': 5605, 'chakker': 5606, 'fully': 5607, 'poocha': 5608, 'ghazwa': 5609, 'auratein': 5610, 'saarey': 5611, 'apradhiyon': 5612, 'small': 5613, 'niklo': 5614, 'sajen': 5615, 'hongae': 5616, 'saleem': 5617, 'bataien': 5618, 'pakhtun': 5619, 'baiti': 5620, 'khailti': 5621, 'doosri': 5622, 'lliye': 5623, 'rastey': 5624, 'skate': 5625, 'padey': 5626, 'sorry': 5627, 'paak': 5628, 'shasan': 5629, 'lmbi': 5630, 'gaddaron': 5631, 'murdabad': 5632, 'poina': 5633, 'kathha': 5634, 'choona': 5635, 'mahina': 5636, 'raheta': 5637, 'shauhar': 5638, 'wasoolna': 5639, 'baghwan': 5640, 'jeisa': 5641, 'ucchal': 5642, 'waaat': 5643, 'poochha': 5644, 'fielding': 5645, 'satisfied': 5646, 'apply': 5647, 'niiii': 5648, 'bhaliii': 5649, 'jaga': 5650, 'trains': 5651, 'patari': 5652, 'sudharna': 5653, 'opening': 5654, 'shrab': 5655, 'shakl': 5656, 'fek': 5657, 'manzilon': 5658, 'lut': 5659, 'dilon': 5660, 'kaarwan': 5661, 'kashtiyan': 5662, 'saahil': 5663, '365': 5664, 'muhar': 5665, 'humarr': 5666, 'flat': 5667, 'pahlaj': 5668, 'nihalani': 5669, 'sacked': 5670, 'consistent': 5671, 'korean': 5672, 'agay': 5673, 'nithish': 5674, 'fair': 5675, 'sans': 5676, 'bakri': 5677, 'pelna': 5678, 'biryani': 5679, 'abhinna': 5680, 'ang': 5681, 'une': 5682, 'bulaaya': 5683, 'yuwa': 5684, 'massumo': 5685, 'taa': 5686, 'valon': 5687, 'peti': 5688, 'kaskar': 5689, 'ishwar': 5690, 'younger': 5691, 'saakh': 5692, 'buraiyaan': 5693, 'chhip': 5694, 'ant': 5695, 'zyaadatar': 5696, 'almost': 5697, 'settled': 5698, 'rim': 5699, 'jhim': 5700, 'saavan': 5701, 'bheege': 5702, 'aagan': 5703, 'bhram': 5704, 'censorship': 5705, 's7': 5706, 'duur': 5707, 'nakaratmak': 5708, 'vicharo': 5709, 'baksa': 5710, 'mahidaya': 5711, 'nikaliye': 5712, 'suprecourt': 5713, 'aitihasik': 5714, 'sunakar': 5715, 'kayam': 5716, 'fashadiya': 5717, 'babako': 5718, 'sarif': 5719, 'tajurbe': 5720, 'jarrorath': 5721, 'marenge': 5722, 'rokenge': 5723, 'hizde': 5724, 'ghariyali': 5725, 'aasu': 5726, 'chanels': 5727, 'parinam': 5728, 'sabbarwal': 5729, 'chudwaa': 5730, 'tjhe': 5731, 'bhii': 5732, 'matherchod': 5733, 'ganje': 5734, 'popular': 5735, 'utsuk': 5736, 'pichwade': 5737, 'peeta': 5738, 'shuruwat': 5739, 'daroga': 5740, 'lekha': 5741, 'jokha': 5742, 'awan': 5743, 'degre': 5744, 'haam': 5745, 'rusva': 5746, 'dengu': 5747, 'yey': 5748, '09': 5749, 'tuze': 5750, 'unwanted': 5751, 'useless': 5752, 'firta': 5753, 'niteshji': 5754, 'besaram': 5755, 'khuwab': 5756, 'tehreek': 5757, '1994': 5758, 'ziaulhaq': 5759, 'moive': 5760, 'living': 5761, 'shirf': 5762, 'samajhten': 5763, 'adme': 5764, 'imam': 5765, 'manne': 5766, 'iccha': 5767, 'kayse': 5768, '4shadi': 5769, 'sooar': 5770, '40bachche': 5771, 'bewafa': 5772, 'bri': 5773, '1709': 5774, 'shayra': 5775, 'likhgi': 5776, 'azeeb': 5777, 'khilone': 5778, 'jazbaton': 5779, 'sham': 5780, 'charity': 5781, 'conf': 5782, 'rence': 5783, 'dkshroff': 5784, 'gharwalone': 5785, 'rekh': 5786, 'rishtey': 5787, 'father': 5788, 'goal': 5789, 'con': 5790, 'parrott': 5791, 'thahrata': 5792, 'mitra': 5793, 'kehke': 5794, 'anath': 5795, 'batati': 5796, 'shubham': 5797, 'hojao': 5798, 'bahaney': 5799, 'lengey': 5800, 'jyadatar': 5801, 'tuzh': 5802, 'presidents': 5803, 'statement': 5804, 'hindumuslim': 5805, 'netas': 5806, 'milme': 5807, 'nazariya': 5808, 'beton': 5809, 'spaceship': 5810, 'dekhene': 5811, 'poucho': 5812, 'harso': 5813, 'bhaot': 5814, 'hamarye': 5815, 'establishment': 5816, 'mujhy': 5817, 'iraq': 5818, 'bharke': 5819, 'stephen': 5820, 'kesha': 5821, 'nowplaying': 5822, 'diware': 5823, 'aid': 5824, 'mahashay': 5825, 'amnesia': 5826, 'research': 5827, 'vibag': 5828, 'spast': 5829, 'irctc': 5830, 'bhugto': 5831, 'jeeo': 5832, 'chakra': 5833, 'baadshaah': 5834, 'kyunkiyeh': 5835, 'auryahan': 5836, 'hartarahki': 5837, 'chhoot': 5838, 'doosredeshme': 5839, 'nahimilti': 5840, 'logonkepyarepakistanmebhi': 5841, 'banaanewaale': 5842, 'jayogi': 5843, 'jiti': 5844, 'athe': 5845, 'ayenghe': 5846, 'income': 5847, 'badgehi': 5848, 'pakistain': 5849, 'shriram': 5850, 'jwan': 5851, 'lives': 5852, 'dies': 5853, 'bavisyu': 5854, 'bagvan': 5855, 'salamat': 5856, 'rake': 5857, 'chadarmod': 5858, 'masjido': 5859, 'fatwe': 5860, 'nikalwa': 5861, 'dhaya': 5862, 'ghattiya': 5863, 'logonn': 5864, 'aqel': 5865, 'chahiyay': 5866, 'werna': 5867, 'choordiyann': 5868, 'pehun': 5869, 'lainn': 5870, 'womans': 5871, 'haaq': 5872, 'bravehadiaya': 5873, 'lolabe': 5874, 'product': 5875, 'maulanao': 5876, '1500': 5877, 'halaka': 5878, 'vivek': 5879, 'leg': 5880, 'spinner': 5881, 'qadir': 5882, 'khisyani': 5883, 'khambha': 5884, 'noche': 5885, 'prime': 5886, 'chowkidar': 5887, 'chini': 5888, 'pehan': 5889, 'vigyapan': 5890, 'shreef': 5891, 'dako': 5892, 'peechein': 5893, 'divorced': 5894, 'nowhat': 5895, 'hoker': 5896, 'ekadhikar': 5897, 'bhiol': 5898, 'cheeje': 5899, 'perils': 5900, 'pezulm': 5901, 'khdeda': 5902, 'berozgaari': 5903, 'budhawe': 5904, 'lota': 5905, 'hadso': 5906, 'hateli': 5907, 'qazzafi': 5908, 'muth': 5909, 'sperm': 5910, 'chomu': 5911, 'fasi': 5912, 'adharm': 5913, 'acchai': 5914, 'naitikta': 5915, 'anaitikta': 5916, 'kamjor': 5917, 'aatankvadiyo': 5918, 'yaj': 5919, 'kebas': 5920, 'dept': 5921, 'chotey': 5922, 'karey': 5923, 'haya': 5924, 'dupata': 5925, 'orha': 5926, 'maryam': 5927, 'aisha': 5928, 'gulalai': 5929, 'ayan': 5930, 'lohe': 5931, 'heere': 5932, 'copied': 5933, 'rum': 5934, 'sarc': 5935, 'asm': 5936, 'billo': 5937, 'bilwal': 5938, 'dharial': 5939, 'yum': 5940, 'humlog': 5941, 'sikhaate': 5942, 'children': 5943, 'pap': 5944, 'tumahre': 5945, 'haiwan': 5946, 'akeli': 5947, 'kl': 5948, 'palo': 5949, 'vapas': 5950, 'paanch': 5951, 'agustus': 5952, '1991': 5953, 'kurbaani': 5954, 'pariwaar': 5955, '50k': 5956, 'quantity': 5957, 'ulti': 5958, 'pregnant': 5959, 'naye': 5960, 'ruka': 5961, 'deceased': 5962, 'dosiyo': 5963, 'lipa': 5964, 'leny': 5965, 'khayberpakhtoon': 5966, 'khof': 5967, 'chakaar': 5968, 'floods': 5969, 'atty': 5970, 'rehtay': 5971, 'darbhanga': 5972, 'sportsman': 5973, 'paristhitiy': 5974, 'vishwash': 5975, 'khareed': 5976, 'insb': 5977, 'baukha': 5978, 'khidki': 5979, 'tukade': 5980, 'rehate': 5981, 'mamale': 5982, 'lada': 5983, 'karawaya': 5984, 'gend': 5985, 'balley': 5986, 'karvaana': 5987, 'karwaiye': 5988, 'karneko': 5989, 'madile': 5990, 'fames': 5991, 'filhaal': 5992, 'training': 5993, 'chanda': 5994, 'ikkatha': 5995, 'vigrah': 5996, 'group': 5997, 'zakhm': 5998, 'ghaav': 5999, 'upadhi': 6000, 'bulati': 6001, 'pariwarvad': 6002, 'grasit': 6003, 'eske': 6004, 'vipachh': 6005, 'mashvrey': 6006, 'akrem': 6007, 'khde': 6008, 'bouncer': 6009, 'seam': 6010, 'hanso': 6011, 'sharmila': 6012, 'farooqi': 6013, 'lagtee': 6014, 'jogging': 6015, 'jaaya': 6016, 'wajan': 6017, 'ikatha': 6018, 'ladkiyaan': 6019, 'ladkon': 6020, 'gaaliyaan': 6021, 'trendy': 6022, 'rahin': 6023, 'janmabhumi': 6024, 'hanin': 6025, 'lele': 6026, 'gentleman': 6027, 'loadshedding': 6028, 'taaqi': 6029, 'pooche': 6030, 'maange': 6031, 'sanmaan': 6032, 'ollat': 6033, 'pdh': 6034, 'doctor': 6035, 'nni': 6036, 'wapsee': 6037, 'kijeye': 6038, 'anirodh': 6039, 'rajneta': 6040, 'gaurakhsha': 6041, 'rehene': 6042, 'doo': 6043, 'literature': 6044, 'mullaon': 6045, 'pde': 6046, 'lounda': 6047, 'archana': 6048, 'vijaya': 6049, 'sadbuddhi': 6050, 'married': 6051, 'saavdhan': 6052, 'jhand': 6053, 'sushmaji': 6054, 'hajoom': 6055, 'punishment': 6056, 'chhahiye': 6057, 'sja': 6058, 'doon': 6059, 'dm': 6060, 'cooperation': 6061, 'vive': 6062, 'dan': 6063, 'abto': 6064, 'kran': 6065, 'khovay': 6066, 'hovay': 6067, 'rajputana': 6068, 'samasyao': 6069, 'dhundh': 6070, 'layi': 6071, 'samasyae': 6072, 'dilaiye': 6073, '3sal': 6074, 'hora': 6075, 'shr': 6076, 'pateechar': 6077, 'ghosit': 6078, 'agge': 6079, 'taking': 6080, 'choonawala': 6081, 'doodhta': 6082, 'ph': 6083, 'positivity': 6084, 'bhigi': 6085, 'saand': 6086, 'vahan': 6087, 'contribution': 6088, 'charam': 6089, 'jale': 6090, 'buje': 6091, 'badhsha': 6092, 'samim': 6093, 'patriotism': 6094, 'sakafati': 6095, 'taareekhi': 6096, 'maag': 6097, 'naalo': 6098, 'budhayo': 6099, 'janhn': 6100, 'naalay': 6101, 'san': 6102, 'thaahi': 6103, 'wai': 6104, 'hhahaha': 6105, 'vish': 6106, 'houta': 6107, 'energy': 6108, 'bachegii': 6109, 'netaji': 6110, 'crass': 6111, 'jore': 6112, 'dwaar': 6113, 'presidential': 6114, 'elections2017': 6115, 'advani': 6116, 'cheeteh': 6117, 'bajirao': 6118, 'talwar': 6119, 'sandeh': 6120, 'desakti': 6121, 'cheer': 6122, 'aesi': 6123, 'tantr': 6124, 'intejaam': 6125, 'bandobast': 6126, 'ayse': 6127, 'mutallaka': 6128, 'khasha': 6129, 'akdam': 6130, 'mamshahab': 6131, 'sisters': 6132, 'dikhae': 6133, 'mukht': 6134, 'walou': 6135, 'logou': 6136, 'deraviolence': 6137, 'dalle': 6138, 'pronounce': 6139, 'jaaegi': 6140, 'haqeeqi': 6141, 'peda': 6142, 'bohran': 6143, 'faeda': 6144, 'zimedar': 6145, 'seekhi': 6146, 'btaye': 6147, 'kerti': 6148, 'aadesh': 6149, 'tahe': 6150, 'istekbaal': 6151, 'yarana': 6152, 'samjti': 6153, 'bharatki': 6154, 'bhaand': 6155, 'safed': 6156, 'ujaale': 6157, 'aintee': 6158, 'uthanay': 6159, 'kheltay': 6160, 'taraash': 6161, 'kharidein': 6162, 'chiffon': 6163, 'saree': 6164, 'daaman': 6165, 'sambandh': 6166, 'rapat': 6167, 'nirale': 6168, 'supne': 6169, 'gale': 6170, 'vidva': 6171, 'ronaken': 6172, 'gaddafi': 6173, 'ment': 6174, 'wafadar': 6175, 'tali': 6176, 'thoko': 6177, 'apatti': 6178, 'youva': 6179, 'naara': 6180, 'chana': 6181, 'khilaye': 6182, 'jayanti': 6183, 'dhoti': 6184, 'bewkoof': 6185, 'andher': 6186, 'begum': 6187, 'banner': 6188, 'ratan': 6189, 'sharda': 6190, 'zeenat': 6191, 'shaukat': 6192, 'nasir': 6193, 'pathan': 6194, 'narsimhan': 6195, 'tanvi': 6196, 'madat': 6197, 'suite': 6198, 'pasie': 6199, 'dikhne': 6200, 'aandhi': 6201, 'toofan': 6202, 'jagta': 6203, 'superman': 6204, 'saara': 6205, 'boldene': 6206, 'bahoton': 6207, 'fadanvis': 6208, 'sitaaro': 6209, 'khatter': 6210, 'ghatyaa': 6211, 'juree': 6212, 'detaie': 6213, 'sambhav': 6214, 'sahoyg': 6215, 'fokti': 6216, 'ruko': 6217, 'sharukhan': 6218, 'bakcho8i': 6219, 'raan8': 6220, 'lmao': 6221, 'docomo': 6222, 'destruction': 6223, 'holy': 6224, 'differences': 6225, 'names': 6226, 'orphan': 6227, 'nijad': 6228, 'dilayegi': 6229, 'neevu': 6230, 'yaavaaga': 6231, 'barodu': 6232, 'antha': 6233, 'aagthaa': 6234, 'untu': 6235, 'rangaswamige': 6236, 'koyee': 6237, 'manushya': 6238, 'swarth': 6239, 'upyog': 6240, 'pehlukhan': 6241, 'naqvi': 6242, 'cuter': 6243, 'kid': 6244, 'aankhon': 6245, 'aansu': 6246, 'santoshi': 6247, 'meenakshi': 6248, 'darzi': 6249, 'mill': 6250, 'kherat': 6251, 'wja': 6252, 'awwww': 6253, 'jhaeelo': 6254, 'humane': 6255, 'dayara': 6256, 'vadapau': 6257, 'kin': 6258, 'utpathang': 6259, 'latay': 6260, 'shahbano': 6261, 'divorces': 6262, 'buddhijeevi': 6263, 'edhar': 6264, 'thoalvihalin': 6265, 'sanskar': 6266, 'jaison': 6267, 'jyatatar': 6268, 'failaaya': 6269, 'saaamne': 6270, 'milate': 6271, 'candidate': 6272, 'margae': 6273, 'literally': 6274, 'shuroo': 6275, '7bje': 6276, 'shaeqeen': 6277, 'betab': 6278, '2sra': 6279, 'tohfa': 6280, 'hmne': 6281, 'dhanda': 6282, 'hadith': 6283, 'khalnayak': 6284, 'krij': 6285, 'keli': 6286, 'mech': 6287, 'hons': 6288, 'deri': 6289, 'mude': 6290, 'prithviraj': 6291, 'shammi': 6292, 'shashi': 6293, 'randhir': 6294, 'karishma': 6295, 'inlogo': 6296, 'ayyaashi': 6297, 'ooooo': 6298, 'these': 6299, 'weakening': 6300, 'fabric': 6301, 'chora': 6302, 'newz': 6303, 'collingwoo': 6304, 'wajib': 6305, 'thapar': 6306, 'karky': 6307, 'kehty': 6308, 'soye': 6309, 'graphy': 6310, 'goti': 6311, '100rs': 6312, 'hd': 6313, 'chhup': 6314, 'bindaas': 6315, 'sirr': 6316, 'basbaki': 6317, 'fuski': 6318, 'uthya': 6319, 'bhatkaya': 6320, 'dey': 6321, 'aajaye': 6322, 'jawaani': 6323, 'lawyer': 6324, 'krk': 6325, 'hindubhai': 6326, 'ramrahi': 6327, 'ghao': 6328, 'lagarahi': 6329, 'kaas': 6330, 'kalbe': 6331, 'sudhrege': 6332, 'stoning': 6333, 'kaudi': 6334, 'hin': 6335, 'pirates': 6336, 'antankiyo': 6337, 'karvayenge': 6338, 'netagiri': 6339, 'taweel': 6340, 'intezar': 6341, 'darmiyan': 6342, 'amad': 6343, 'pizza': 6344, 'guraantee': 6345, 'idiots': 6346, 'dialogs': 6347, 'ghamnd': 6348, 'mukdma': 6349, 'faisala': 6350, 'trha': 6351, 'pants': 6352, 'failata': 6353, 'mehrom': 6354, 'sweep': 6355, 'mama': 6356, 'ilaaz': 6357, 'puchogi': 6358, 'inteshar': 6359, 'opp': 6360, 'phasaya': 6361, 'jodke': 6362, 'chutye': 6363, 'dhika': 6364, 'aaeen': 6365, 'isalam': 6366, 'toothpick': 6367, 'danton': 6368, 'phans': 6369, 'idaraa': 6370, 'reaction': 6371, 'transfer': 6372, 'jindagi': 6373, 'bharsth': 6374, 'sooooooo': 6375, 'saamnay': 6376, 'faash': 6377, 'kindlyadjust': 6378, 'naram': 6379, 'zuban': 6380, 'suba': 6381, '2000': 6382, 'firse': 6383, 'saniya': 6384, 'mailk': 6385, 'autkat': 6386, 'yy': 6387, 'pb': 6388, 'aksmiq': 6389, 'ghatana': 6390, 'afghan': 6391, 'dehshatgard': 6392, 'legends': 6393, 'pedal': 6394, 'marta': 6395, 'sekne': 6396, 'gurus': 6397, 'intzaar': 6398, 'chelo': 6399, 'tyaar': 6400, 'guruo': 6401, 'pahuchne': 6402, 'maharajo': 6403, 'karte7lejao': 6404, 'mall': 6405, 'del': 6406, 'hike': 6407, 'daru': 6408, 'shortlist': 6409, 'nakara': 6410, 'worship': 6411, 'khelkar': 6412, 'anzham': 6413, 'glti': 6414, 'seeti': 6415, 'bajey': 6416, 'sajay': 6417, 'taali': 6418, 'jamey': 6419, 'khabhi': 6420, 'bhahut': 6421, 'aachi': 6422, 'mahzabi': 6423, 'pori': 6424, 'gaddar': 6425, 'parthar': 6426, 'ghutno': 6427, 'models': 6428, 'shehla': 6429, 'human': 6430, 'chachha': 6431, 'sabkuj': 6432, 'samajta': 6433, 'sunnewala': 6434, 'cartoon': 6435, 'servant': 6436, 'khelata': 6437, 'tumharahi': 6438, 'bhava': 6439, 'janme': 6440, 'janmi': 6441, 'hmare': 6442, 'phela': 6443, 'abadi': 6444, 'jarurt': 6445, 'aabadi': 6446, 'waapis': 6447, 'cheezo': 6448, 'banake': 6449, 'manmarji': 6450, 'dallo': 6451, 'ande': 6452, 'irfankhan': 6453, 'legendary': 6454, 'alvidaai': 6455, 'unho': 6456, 'keese': 6457, 'meene': 6458, '32': 6459, 'maarae': 6460, 'gayae': 6461, 'kijiyae': 6462, 'memo': 6463, 'ratragit': 6464, 'gaate': 6465, 'golf': 6466, 'badon': 6467, 'childrens': 6468, 'inhon': 6469, 'mashalllah': 6470, 'hopes': 6471, '1v': 6472, 'erest': 6473, 'pale': 6474, 'hatwaya': 6475, 'bezati': 6476, 'aisehi': 6477, 'pareshaan': 6478, 'kahein': 6479, 'parhai': 6480, 'reminds': 6481, 'nomis': 6482, 'navami': 6483, 'migrants': 6484, 'sarr': 6485, 'marvi': 6486, 'sirmed': 6487, 'mazaaq': 6488, 'poochtay': 6489, 'pareshaaniyan': 6490, 'azaab': 6491, 'achy': 6492, 'salook': 6493, 'moat': 6494, 'qaim': 6495, 'bhaagty': 6496, 'hovy': 6497, 'ghuss': 6498, 'vichardhara': 6499, 'aadhipatya': 6500, 'apuraniya': 6501, 'saral': 6502, 'that': 6503, 'iger': 6504, 'khisiyahi': 6505, 'kambha': 6506, 'nochee': 6507, 'lakar': 6508, 'bihaar': 6509, 'tar': 6510, 'kalsoom': 6511, 'honesty': 6512, 'jst': 6513, 'sunayi': 6514, 'waapas': 6515, 'hell': 6516, 'doors': 6517, 'heaven': 6518, 'ganwaro': 6519, 'pados': 6520, 'molunat': 6521, 'manaiviyin': 6522, 'suhayeenam': 6523, 'anikkana': 6524, 'mudal': 6525, 'poottihali': 6526, 'dahwan': 6527, 'vilayaada': 6528, 'maattar': 6529, 'dauray': 6530, 'khilarion': 6531, 'elaan': 6532, 'junaid': 6533, 'siayasat': 6534, 'bewakofon': 6535, 'shahbash': 6536, 'nibha': 6537, 'romeo': 6538, 'gaurakshako': 6539, 'bhudhijivi': 6540, 'secret': 6541, 'gud': 6542, 'rajnikanth': 6543, 'broker': 6544, 'atankvaad': 6545, 'roji': 6546, 'waho': 6547, 'nic': 6548, 'advance': 6549, 'alhamdulillah': 6550, 'manchester': 6551, 'bhos': 6552, 'ike': 6553, 'porkisstan': 6554, 'haradiya': 6555, 'laas': 6556, 'maveer': 6557, 'rohan': 6558, 'winner': 6559, 'manveer': 6560, 'kohre': 6561, 'goeventz': 6562, 'dabaaw': 6563, 'khwabon': 6564, 'titli': 6565, 'talk': 6566, 'sassy': 6567, 'waahhh': 6568, 'jegah': 6569, 'sirmujhe': 6570

In [ ]:

# adding 1 because of reserved 0 index for padding vocab_size = len(word_index) + 1

maxlen = 120

X_train = pad_sequences(X_train, padding=’post’, maxlen=maxlen) X_test = pad_sequences(X_test, padding=’post’, maxlen=maxlen)

In [ ]:

# use GloVe embeddings to create the feature matrix# load the GloVe word embeddings and create a dictionary that will contain words as keys and their corresponding embedding list as values embeddings_dictionary = dict() glove_file = open(‘/content/drive/My Drive/Data Files/depression analysis data/glove.6B.300d.txt’, encoding=”utf8″)

for line in glove_file: records = line.split() word = records[0] vector_dimensions = asarray(records[1:], dtype=’float32′) embeddings_dictionary[word] = vector_dimensions glove_file.close()

In [ ]:

# create an embedding matrix where each row number will correspond to the index of the word in the corpus # the matrix will have 300 columns where each column will contain the GloVe word embeddings for the words in our corpusEMBEDDING_DIM = 300 embedding_matrix = [[0]*EMBEDDING_DIM]*vocab_size for word, index in tokenizer.word_index.items(): embedding_vector = embeddings_dictionary.get(word) if embedding_vector is not None: embedding_matrix[index] = embedding_vector

In [ ]:

# attention Layerdef dot_product(x, kernel): “”” Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: “”” if K.backend() == ‘tensorflow’: return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)

class AttentionWithContext(Layer): “”” Attention operation, with a context/query vector, for temporal data. Supports Masking. Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf] “Hierarchical Attention Networks for Document Classification” by using a context vector to assist the attention # Input shape 3D tensor with shape: (samples, steps, features). # Output shape 2D tensor with shape: (samples, features). How to use: Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True. The dimensions are inferred based on the output shape of the RNN. Note: The layer has been tested with Keras 2.0.6 Example: model.add(LSTM(64, return_sequences=True)) model.add(AttentionWithContext()) # next add a Dense layer (for classification/regression) or whatever… “””def __init__(self, W_regularizer=None, u_regularizer=None, b_regularizer=None, W_constraint=None, u_constraint=None, b_constraint=None, bias=True, **kwargs): self.supports_masking = True self.init = initializers.get('glorot_uniform') self.W_regularizer = regularizers.get(W_regularizer) self.u_regularizer = regularizers.get(u_regularizer) self.b_regularizer = regularizers.get(b_regularizer) self.W_constraint = constraints.get(W_constraint) self.u_constraint = constraints.get(u_constraint) self.b_constraint = constraints.get(b_constraint) self.bias = bias super(AttentionWithContext, self).__init__(**kwargs) def build(self, input_shape): assert len(input_shape) == 3 self.W = self.add_weight(shape=(input_shape[-1], input_shape[-1],), initializer=self.init, name='{}_W'.format(self.name), regularizer=self.W_regularizer, constraint=self.W_constraint) if self.bias: self.b = self.add_weight(shape=(input_shape[-1],), initializer='zero', name='{}_b'.format(self.name), regularizer=self.b_regularizer, constraint=self.b_constraint) self.u = self.add_weight(shape=(input_shape[-1],), initializer=self.init, name='{}_u'.format(self.name), regularizer=self.u_regularizer, constraint=self.u_constraint) super(AttentionWithContext, self).build(input_shape) def compute_mask(self, input, input_mask=None): *# do not pass the mask to the next layers*return None def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) *# apply mask after the exp. will be re-normalized next*if mask is not None: *# Cast the mask to floatX to avoid float64 upcasting in theano* a *= K.cast(mask, K.floatx()) *# in some cases especially in the early stages of training the sum may be almost zero# and this results in NaN's. A workaround is to add a very small positive number ε to the sum.# a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())* a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1) def compute_output_shape(self, input_shape): return input_shape[0], input_shape[-1]

In [ ]:

# bidirectional lstm with attention layer modeldef model_lstm(embedding_matrix): inp = Input(shape=(maxlen,)) x = Embedding(vocab_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=True)(inp) x = Bidirectional(LSTM(256, dropout=0.2, recurrent_dropout=0.2, return_sequences=True))(x) # x = AttentionWithContext()(x) x = Dense(128, activation=”relu”)(x) x = Dropout(0.30)(x) x = Dense(128, activation=”relu”)(x) x = Dropout(0.30)(x) x = Dense(1, activation=”sigmoid”)(x) model = Model(inputs=inp, outputs=x) model.compile(loss=’binary_crossentropy’, optimizer=’adam’, metrics=[‘accuracy’]) model.summary() return model

model_lstm = model_lstm(np.array(embedding_matrix)) model_lstm.summary()

`Model: “model_1”


Layer (type) Output Shape Param #

input_3 (InputLayer) [(None, 120)] 0


embedding_2 (Embedding) (None, 120, 300) 4029300


bidirectional_1 (Bidirection (None, 120, 512) 1140736


dense_3 (Dense) (None, 120, 128) 65664


dropout_2 (Dropout) (None, 120, 128) 0


dense_4 (Dense) (None, 120, 128) 16512


dropout_3 (Dropout) (None, 120, 128) 0


dense_5 (Dense) (None, 120, 1) 129

Total params: 5,252,341 Trainable params: 5,252,341 Non-trainable params: 0


Model: “model_1”


Layer (type) Output Shape Param #

input_3 (InputLayer) [(None, 120)] 0


embedding_2 (Embedding) (None, 120, 300) 4029300


bidirectional_1 (Bidirection (None, 120, 512) 1140736


dense_3 (Dense) (None, 120, 128) 65664


dropout_2 (Dropout) (None, 120, 128) 0


dense_4 (Dense) (None, 120, 128) 16512


dropout_3 (Dropout) (None, 120, 128) 0


dense_5 (Dense) (None, 120, 1) 129

Total params: 5,252,341 Trainable params: 5,252,341 Non-trainable params: 0 _________________________________________________________________`

In [ ]:

# save the best model and early stopping# saveBestModel = keras.callbacks.ModelCheckpoint(path+’/model/best_model.hdf5′, monitor=’val_acc’, verbose=0, save_best_only=True, save_weights_only=False, mode=’auto’, period=1) earlyStopping = EarlyStopping(monitor=’val_loss’, min_delta=0, patience=3, verbose=0, mode=’auto’)

# Fit the model history = model_lstm.fit(X_train, y_train, batch_size=1000, epochs=2, validation_split=0.15, callbacks=[earlyStopping])

Epoch 1/2 4/4 [==============================] - 73s 18s/step - loss: 0.6234 - accuracy: 0.8761 - val_loss: 0.4469 - val_accuracy: 0.9030 Epoch 2/2 4/4 [==============================] - 72s 18s/step - loss: 0.3998 - accuracy: 0.9014 - val_loss: 0.3612 - val_accuracy: 0.9030

In [ ]:

# Final evaluation of the model metrics = model_lstm.evaluate(X_test, y_test) print(“Loss”, metrics[0]) print(“Accuracy:”, metrics[1])

25/25 [==============================] - 8s 309ms/step - loss: 0.3319 - accuracy: 0.9175 Loss 0.331880122423172 Accuracy: 0.9175127148628235

In [ ]:

y_pred_test = model.predict(X_test, batch_size=128) y_pred_bool_test = y_pred_test > 0.5

print(classification_report(y_test, y_pred_bool_test))

In [ ]:

# cnn modeldef model_cnn(embedding_matrix): inp = Input(shape=(maxlen,)) x = Embedding(vocab_size, EMBEDDING_DIM, weights=[embedding_matrix], trainable=True)(inp) x = SpatialDropout(0.3)(x) x = Convolution1D(100, 3, activation=”relu”)(x) x = GlobalMaxPool1D()(x) # x = AttentionWithContext()(x) x = Dense(128, activation=”relu”)(x) x = Dropout(0.30)(x) x = Dense(128, activation=”relu”)(x) x = Dropout(0.30)(x) x = Dense(1, activation=”sigmoid”)(x) model = Model(inputs=inp, outputs=x) model.compile(loss=’binary_crossentropy’, optimizer=’adam’, metrics=[‘accuracy’]) model.summary() return model

model_cnn = model_cnn(np.array(embedding_matrix)) model_cnn.summary()

In [ ]:

# save the best model and early stopping# saveBestModel = keras.callbacks.ModelCheckpoint(path+’/model/best_model.hdf5′, monitor=’val_acc’, verbose=0, save_best_only=True, save_weights_only=False, mode=’auto’, period=1) earlyStopping = EarlyStopping(monitor=’val_loss’, min_delta=0, patience=3, verbose=0, mode=’auto’)

# Fit the model history = model_cnn.fit(X_train, y_train, batch_size=1000, epochs=2, validation_split=0.15, callbacks=[earlyStopping])

In [ ]:

# Final evaluation of the model metrics = model_cnn.evaluate(X_test, y_test) print(“Loss”, metrics[0]) print(“Accuracy:”, metrics[1])

In [ ]:

y_pred_test = model.predict(X_test, batch_size=128) y_pred_bool_test = y_pred_test > 0.5

print(classification_report(y_test, y_pred_bool_test))

Character Embeddings

In [ ]:

# character level tokenizer tokenizer = Tokenizer(num_words=None, char_level=True, oov_token=’UNK’) tokenizer.fit_on_texts(X_train)

# if you already have a character list, then replace the tokenizer.word_index# construct a new vocabularyalphabet=”abcdefghijklmnopqrstuvwxyz0123456789 ,;.!?:’\”/\|_@#$%^&*~`+-=<>()[]{}” char_dict = {} for i, char in enumerate(alphabet): char_dict[char] = i + 1

# use char_dict to replace the tokenizer.word_index tokenizer.word_index = char_dict.copy() # add ‘UNK’ to the vocabulary tokenizer.word_index[tokenizer.oov_token] = max(char_dict.values()) + 1

# convert string to index X_train = tokenizer.texts_to_sequences(X_train) X_test = tokenizer.texts_to_sequences(X_test)

# padding X_train = pad_sequences(X_train, maxlen=1014, padding=’post’) X_test = pad_sequences(X_test, maxlen=1014, padding=’post’)

# convert to numpy array X_train = np.array(X_train, dtype=’float32′) X_test = np.array(X_test, dtype=’float32′)

print(tokenizer.word_index)

vocab_size = len(tokenizer.word_index) + 1 print(vocab_size)

In [ ]:

embedding_weights = [] #(71, 70) embedding_weights.append(np.zeros(vocab_size)) # first row is padfor char, i in tokenizer.word_index.items(): # from index 1 to 70 onehot = np.zeros(vocab_size) onehot[i-1] = 1 embedding_weights.append(onehot) embedding_weights = np.array(embedding_weights)

print(embedding_weights.shape) # first row all 0 for PAD, 69 char, last row for UNKprint(embedding_weights)

In [ ]:

# char embeddings with cnn input_size = 1014 embedding_size = 69 conv_layers = [[256, 7, 3], [256, 7, 3], [256, 3, -1], [256, 3, -1], [256, 3, -1], [256, 3, 3]] fully_connected_layers = [1024, 1024]

def model_char_embeddings_cnn(): inputs = Input(shape=(input_size,), name=’input’, dtype=’int64′)  # shape=(?, 1014) x = Embedding(vocab_size, embedding_size, input_length=input_size, weights=[embedding_weights])(inputs) for filter_num, filter_size, pooling_size in conv_layers: x = Conv1D(filter_num, filter_size)(x) x = Activation(‘relu’)(x) if pooling_size != -1: x = MaxPooling1D(pool_size=pooling_size)(x) # final shape=(none, 34, 256) x = Flatten()(x) *# (none, 8704)*for dense_size in fully_connected_layers: x = Dense(dense_size, activation=’relu’)(x) # dense_size == 1024x = Dropout(0.5)(x) predictions = Dense(1, activation=’sigmoid’)(x)*# build model* model = Model(inputs=inputs, outputs=predictions) model = model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) *# adam, categorical_crossentropy*return model

model_char_embeddings_cnn = model_char_embeddings_cnn() model_char_embeddings_cnn.summary()

In [ ]:

# save the best model and early stopping# saveBestModel = keras.callbacks.ModelCheckpoint(path+’/model/best_model.hdf5′, monitor=’val_acc’, verbose=0, save_best_only=True, save_weights_only=False, mode=’auto’, period=1) earlyStopping = EarlyStopping(monitor=’val_loss’, min_delta=0, patience=3, verbose=0, mode=’auto’)

# Fit the model history = model_char_embeddings_cnn.fit(X_train, y_train, batch_size=1000, epochs=2, validation_split=0.15, callbacks=[earlyStopping])

In [ ]:

# final evaluation of the model metrics = model_cnn.evaluate(X_test, y_test) print(“Loss”, metrics[0]) print(“Accuracy:”, metrics[1])

In [ ]:

# CMSA architecture# 1. sub-word level representations with CNN# 2. parallel encoder network consisting of two BiLSTMs with attention mechanism# 3. feature network

Sub-word Embeddings

In [ ]:

# sub-word level representation# convert string to subword, this process may take several minutes bpe = BPE(“/content/drive/My Drive/Data Files/code-mixed analysis data/en.wiki.bpe.op25000.vocab.txt”) X_train = [bpe.encode(sentence) for sentence in X_train] X_test = [bpe.encode(sentence) for sentence in X_test]

# build vocab, {token: index} vocab = {} for i, token in enumerate(bpe.words): vocab[token] = i + 1

# convert subword to index, function version def subword2index(texts, vocab): sentences = [] for s in texts: s = s.split() one_line = [] for word in s: if word not in vocab.keys(): one_line.append(vocab[‘unk’]) else: one_line.append(vocab[word]) sentences.append(one_line) return sentences

# sonvert train and test X_train = subword2index(X_train, vocab) X_test = subword2index(X_test, vocab)

# padding X_train = pad_sequences(X_train, maxlen=1014, padding=’post’) X_test = pad_sequences(X_test, maxlen=1014, padding=’post’)

# convert to numpy array X_train = np.array(X_train) X_test = np.array(X_test)

In [ ]:

from gensim.models import KeyedVectors

model = KeyedVectors.load_word2vec_format(“/content/drive/My Drive/Data Files/code-mixed analysis data/en.wiki.bpe.op25000.d50.w2v.bin”, binary=True)

In [ ]:

embedding_dim = 50 embedding_weights = np.zeros((len(vocab) + 1, embedding_dim)) *# (25001, 50)*for subword, i in vocab.items(): if subword in model.vocab: embedding_vector = model[subword] if embedding_vector is not None: embedding_weights[i] = embedding_vector else: # print(subword) # print the subword in vocab but not in modelcontinue

In [ ]:

# sub-word embeddings with cnn input_size = 1014 embedding_size = 50 vocab_size = len(vocab)+1 conv_layers = [[256, 7, 3], [256, 7, 3], [256, 3, -1], [256, 3, -1], [256, 3, -1], [256, 3, 3]] fully_connected_layers = [1024, 1024]

def model_subword_embeddings_cnn(): inputs = Input(shape=(input_size,), name=’input’, dtype=’int64′)  # shape=(?, 1014) x = Embedding(vocab_size, embedding_size, input_length=input_size, weights=[embedding_weights])(inputs) for filter_num, filter_size, pooling_size in conv_layers: x = Conv1D(filter_num, filter_size)(x) x = Activation(‘relu’)(x) if pooling_size != -1: x = MaxPooling1D(pool_size=pooling_size)(x) # final shape=(none, 34, 256) x = Flatten()(x) *# (none, 8704)*for dense_size in fully_connected_layers: x = Dense(dense_size, activation=’relu’)(x) # dense_size == 1024x = Dropout(0.5)(x) predictions = Dense(1, activation=’sigmoid’)(x)*# build model* model = Model(inputs=inputs, outputs=predictions) model = model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) *# adam, categorical_crossentropy*return model

model_subword_embeddings_cnn = model_subword_embeddings_cnn() model_subword_embeddings_cnn.summary()