# Imports first import numpy as np import pandas as pd from os import path from PIL import Image from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import matplotlib.pyplot as plt # Helper function def save_wordcloud_from_text(text,file): print( "INFO: about to generate the WordCloud for {}".format(file) ) wordcloud = WordCloud(font_path="/Ubuntu-Th.ttf", width=1920, height=1080).generate(text) print( "INFO: setting the figure size for {}".format(file) ) plt.figure(figsize=(200,100), facecolor='k') print( "INFO: setting tight layout for {}".format(file) ) plt.tight_layout(pad=0) print( "INFO: performing the 'imshow' for {}".format(file) ) plt.imshow(wordcloud, interpolation="bilinear") print( "INFO: turning off the axes for {}".format(file) ) plt.axis("off") print( "INFO: saving file {}".format(file) ) plt.savefig(file, facecolor='k', bbox_inches='tight') # Main script file_input = "snake_public.csv" file_users = "users.png" file_messages = "messages.png" print( "INFO: about to read in CSV file." ) df = pd.read_csv(file_input, low_memory=False) print( "INFO: about to extract users column." ) users = df.ut.dropna(how='any') text_users = "\n".join(review for review in users.astype(str)) save_wordcloud_from_text( text_users, file_users ) print( "INFO: about to extract messages column." ) messages = df.m.dropna(how='any') text_messages = "\n".join(review for review in messages.astype(str)) save_wordcloud_from_text( text_messages, file_messages )