Analysing Youtube Comments — Stuff Made Here



Word — Count table Stuff Made Here comments
Wordcloud from Stuff Made Here comments

Interesting Words

Making the vizualizations

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import json_lines
import os
from import Bar
import json

files = os.listdir("rawdata")
stopwords = stopwords.words('english')

bar = Bar('Progress: ', max=len(files))
data = {}
for file in files:
file_data = json_lines.reader(open('rawdata/'+file,'r'))
for comment in file_data:
tokens = word_tokenize(comment['text'])
for word in tokens:
word = word.lower()
if word not in stopwords and word.isalpha():
if word in data.keys():
data[word] += 1
data[word] = 1

data = {k: v for k, v in sorted(data.items(), key=lambda item: item[1],reverse=True)}
import os
from wordcloud import WordCloud
import numpy as np
from PIL import Image
import json

mask = np.array("mask.png"))
data = json.load(open("wordcount.json","r"))

wc = WordCloud(width=3888,height=5180, background_color="white", max_words=6000,mask=mask,max_font_size=1000, random_state=32)




Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store