Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions DiscourseAnalysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from pattern.en import sentiment, modality

def replace_all(text, dic): #Looks at text and replaces all occurances of each key in dic with the given value
for key, value in dic.iteritems(): #Iterates through all the keys and values of dic
text = text.replace(key, value) #replaces key with value in text
return text

def sort_data(data): #Takes a CSV file for a transcript (assumes names are in the first column and speech in the second) and outputs the data sorted into a dictionary with the names as keys and everything the individual says as the value.
fin = open(data) #Opens the specified file
d = {} #Creates empty dictionary that data will be sorted into
bad_text = {"\x92": "'", "\x85": "'", "\r": "", "\n": "'"} #Dictionary of codes for formatting and characters that do not carry over into a CSV file. Values associated with each key will replace the "bad" characters
for line in fin:
text = replace_all(line, bad_text) #Uses the replace all function to remove characters in bad_text and replace them with their associated values
key = text[0:text.index(',')].lower().strip() #Creates the keys for the sorted dictionary. Takes the characters from the beginning of each line to the first comma to get just the name in the first column.
if key in d: #if the key is already in the dictionary, the text is appended to the associated name in the dictionary
d[key].append(text[text.index(',')+1:])
else:
d[key] = [text[text.index(','):]] #If the key has not been added to the dictionary yet it adds the name and the associated text on that line
return d

def get_sentiment(d): #Takes a dictionary and returns a new dictionary of values for sentiment for every line in the original dictionary
#The sentiment function returns two values in a list. The first rates the polarity of a sentence (positive or negative) from -1.0 to 1.0, where postive correlates with positive language. The second value is the subjectivity of a sentence from 0.0 to 1.0, where sujective language scores a 1.0.
sent = {}
for key, value in d.items():
sent[key] = [] #Creates an empty list for each key in the dictionary
for x in value:
sent[key].append(sentiment(x)) #Adds the sentiment rating for each line in the transcript for each key
return sent

def get_modality(d): #Takes a dictionary and creates a new dictionary of values for modality for every line in the original dictionary.
#Modality is a rating of how certain somebody is on a range from -1.0 to 1.0, where negative values indicate uncertainty and positive values represent certainty. Values greater than 0.5 represent facts.
mod = {}
for key, value in d.items():
mod[key] = [] #Creates an empty list for each key in the dictionary
for x in value:
mod[key].append(modality(x)) #Adds the sentiment rating for each line in the transcript for each key
return mod

def average_modality(data): #Takes a dictionary, calculates the average modality for each value, and stores it with the same key
mod = get_modality(data)
avg = {}
for key, value in mod.items():
summation = 0 #Initializes the sum of the modality rating for each line
total = 0 #Initializes a counter for the total number of lines for a given person
avg[key] = [] #Creates an empty list to store each result
for x in value:
summation += x #Adds the modality data to the value
total += 1 #Increments the total number by 1 to represent the total number of spoken lines
avg[key].append(summation/total) #Divides the sum by the total to provide an average modality for each person
return avg

def average_polarity(data): #Takes a dictionary, calculates the average polarity for each value, and stores it with the same key
sent = get_sentiment(data)
avg = {}
for key, value in sent.items():
summation = 0 #Initializes the sum of the polarity rating for each line
total = 0 #Initializes a counter for the total number of lines for a given person
avg[key] = []
for x in value:
summation += x[0] #Adds the polarity data to the value. The sentiment function returns a list with polarity in the 0th position
total += 1 #Increments the total number by 1 to represent the total number of spoken lines
avg[key].append(summation/total) #Divides the sum by the total to provide an average polarity for each person
return avg

def average_subjectivity(data): #Takes a CSV transcript (assumes names are in the first column and speech in the second) and returns the average subjectivity for each person
sent = get_sentiment(data)
avg = {}
for key, value in sent.items():
summation = 0 #Initializes the sum of the polarity rating for each line
total = 0 #Initializes a counter for the total number of lines for a given person
avg[key] = []
for x in value:
summation += x[1] #Adds the subjectivity data to the value. The sentiment function returns a list with polarity in the 1th position
total += 1 #Increments the total number by 1 to represent the total number of spoken lines
avg[key].append(summation/total) #Divides the sum by the total to provide an average subjectivity for each person
return avg

def pattern_results(data): #Takes a CSV file as input (assumes names are in the first column and speech in the second) and prints the names of the participants along with the average calculations
d = sort_data(data)
avg_mod = average_modality(d)
avg_pol = average_polarity(d)
avg_sub = average_subjectivity(d)
for key, value in d.items():
if key != "male participant" and key != "female participant" and key != "non-team member" and key != '': #Ignores a number of common names that pop up in transscripts and are to be ignored
print key + ":"
print "Average Modality: " + str(avg_mod[key]) + ", Average Polarity: " + str(avg_pol[key]) + ", Average Subjectivity" + str(avg_sub[key])
print "\n"
89 changes: 89 additions & 0 deletions MiniProject1-master/Holmes_Mini_Project_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
from pattern.en import sentiment
from pattern.en import modality
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can import multiple things simultaneously, e.g.
from pattern.en import sentiment, modality


def sort_data(data):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recommend commenting functions - something along the lines of, "put in _______ and get out _______".

fin = open(data)
d = {}
names = []
speech = []
for line in fin:
line1 = line.replace(",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,","",1)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually when you hard-code in something like this -- something where you remove particular characters -- it's good to specify why. I imagine that for some reason, there was a line in your data with ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,," in it.

line2 = line1.replace("\x92", "'")
line3 = line2.replace("\x85", "")
text = line3.replace("\r\n", "")
key = text[0:text.index(',')]
if key.strip() in d:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You call key.strip() multiple times in the same loop; it's more efficient to, say, have key be the .strip()'d version, and then use it directly. Also, just a heads-up: entries will vary based on case and capitalization. If this isn't intended, I'd recommend converting keys all to the same case (e.g., lowercase)--python has a built-in method for that.

d[key.strip()].append(text[text.index(',')+1:len(text)])
else:
d[key.strip()] = [text[text.index(','):len(text)]]
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can just use [text.index(',')+1:]; if you don't put a value after the : then you grab everything to the end of the string.

return d

def get_sentiment(data):
d = sort_data(data)
sent = {}

for key, value in d.items():
sent[key] = []
for x in value:
sent[key].append(sentiment(x))
return sent

def get_modality(data):
d = sort_data(data)
mod = {}

for key, value in d.items():
mod[key] = []
for x in value:
mod[key].append(modality(x))
return mod

def average_modality(data):
mod = get_modality(data)
avg = {}
for key, value in mod.items():
summation = 0
total = 0
avg[key] = []
for x in value:
summation += x
total += 1
avg[key].append(summation/total)
return avg

def average_polarity(data):
sent = get_sentiment(data)
avg = {}
for key, value in sent.items():
summation = 0
total = 0
avg[key] = []
for x in value:
summation += x[0]
total += 1
avg[key].append(summation/total)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of creating an empty list, and then appending to it, we could just skip the avg[key] = [] and directly go to avg[key] = [summation/total]. Just food for thought!

return avg

def average_subjectivity(data):
sent = get_sentiment(data)
avg = {}
for key, value in sent.items():
summation = 0
total = 0
avg[key] = []
for x in value:
summation += x[1]
total += 1
avg[key].append(summation/total)
return avg

def pattern_results(data):
d = sort_data(data)
avg_mod = average_modality(data)
avg_pol = average_polarity(data)
avg_sub = average_subjectivity(data)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is rather computationally inefficient. average_modality, average_polarity, and average_subjectivity all repeat calls: once you've sorted d once, it's best to structure your program to not call sort_data again for average_modality, average_polarity, and the like. One way around this, for example, could be to pass d into average_polarity, then pass it into get_sentiment, and then skip the additional sortings.

for key, value in d.items():
if key != "Male Participant" and key != "Female Participant" and key != "Non-team member":
print key + ":"
print "Average Modality: " + str(avg_mod[key]) + ", Average Polarity: " + str(avg_pol[key]) + ", Average Subjectivity" + str(avg_sub[key])
print "\n"
Binary file added MiniProject1-master/MiniProject1WriteUp.pdf
Binary file not shown.
11 changes: 11 additions & 0 deletions MiniProject1-master/transcript_section.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Philip Melter,"Like having some of this linear like, right. I mean we could put some information there and it'd maybe be useful, but… "
Elizabeth Homer,"True. I think it’s so nice and concise the way that it is now, with… "
Philip Melter,Yeah.
Elizabeth Homer,The before and after.
Philip Melter,Shouldn’t we try to pack more info into it?
Elizabeth Homer,Right.
Mike Lands,"I think you should zoom in. Like a lot of stuff… well, or I guess the least I’m saying is like a lot of our vignettes around regionals are all really interesting, right. There’s the foghorn goes off while they’re running in the middle of the woods and there’s the like… we had to go around the lake, can anyone hear me on the radio? They’re seeing someone through the scope like… "
Philip Melter,Yeah. Yeah.
Mike Lands,"There’s not like really… they’re really detailed, but I don’t know if they belong in the broad interactions map... Or if anymore details needs to be put in. "
Philip Melter,Okay.
Mike Lands,"Like I don’t know if this is what Mary had in mind, but I think it works. "
Binary file added MiniProject1WriteUp.pdf
Binary file not shown.
11 changes: 11 additions & 0 deletions transcript_section.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Philip Melter,"Like having some of this linear like, right. I mean we could put some information there and it'd maybe be useful, but… "
Elizabeth Homer,"True. I think it’s so nice and concise the way that it is now, with… "
Philip Melter,Yeah.
Elizabeth Homer,The before and after.
Philip Melter,Shouldn’t we try to pack more info into it?
Elizabeth Homer,Right.
Mike Lands,"I think you should zoom in. Like a lot of stuff… well, or I guess the least I’m saying is like a lot of our vignettes around regionals are all really interesting, right. There’s the foghorn goes off while they’re running in the middle of the woods and there’s the like… we had to go around the lake, can anyone hear me on the radio? They’re seeing someone through the scope like… "
Philip Melter,Yeah. Yeah.
Mike Lands,"There’s not like really… they’re really detailed, but I don’t know if they belong in the broad interactions map... Or if anymore details needs to be put in. "
Philip Melter,Okay.
Mike Lands,"Like I don’t know if this is what Mary had in mind, but I think it works. "