sd16fall · AndrewHolmes · Oct 6, 2016 · Oct 9, 2016 · matthewruehle · Oct 6, 2016
diff --git a/DiscourseAnalysis.py b/DiscourseAnalysis.py
@@ -0,0 +1,87 @@
+from pattern.en import sentiment, modality
+
+def replace_all(text, dic): #Looks at text and replaces all occurances of each key in dic with the given value
+    for key, value in dic.iteritems(): #Iterates through all the keys and values of dic
+        text = text.replace(key, value) #replaces key with value in text
+    return text
+
+def sort_data(data): #Takes a CSV file for a transcript (assumes names are in the first column and speech in the second) and outputs the data sorted into a dictionary with the names as keys and everything the individual says as the value.
+    fin = open(data) #Opens the specified file
+    d = {} #Creates empty dictionary that data will be sorted into
+    bad_text = {"\x92": "'", "\x85": "'", "\r": "", "\n": "'"} #Dictionary of codes for formatting and characters that do not carry over into a CSV file. Values associated with each key will replace the "bad" characters
+    for line in fin:
+        text = replace_all(line, bad_text) #Uses the replace all function to remove characters in bad_text and replace them with their associated values
+        key = text[0:text.index(',')].lower().strip() #Creates the keys for the sorted dictionary. Takes the characters from the beginning of each line to the first comma to get just the name in the first column.
+        if key in d: #if the key is already in the dictionary, the text is appended to the associated name in the dictionary
+            d[key].append(text[text.index(',')+1:])
+        else:
+            d[key] = [text[text.index(','):]] #If the key has not been added to the dictionary yet it adds the name and the associated text on that line
+    return d
+
+def get_sentiment(d): #Takes a dictionary and returns a new dictionary of values for sentiment for every line in the original dictionary
+    #The sentiment function returns two values in a list. The first rates the polarity of a sentence (positive or negative) from -1.0 to 1.0, where postive correlates with positive language. The second value is the subjectivity of a sentence from 0.0 to 1.0, where sujective language scores a 1.0.
+    sent = {}
+    for key, value in d.items():
+        sent[key] = [] #Creates an empty list for each key in the dictionary
+        for x in value:
+            sent[key].append(sentiment(x)) #Adds the sentiment rating for each line in the transcript for each key
+    return sent
+
+def get_modality(d): #Takes a dictionary and creates a new dictionary of values for modality for every line in the original dictionary.
+    #Modality is a rating of how certain somebody is on a range from -1.0 to 1.0, where negative values indicate uncertainty and positive values represent certainty. Values greater than 0.5 represent facts.
+    mod = {}
+    for key, value in d.items():
+        mod[key] = [] #Creates an empty list for each key in the dictionary
+        for x in value:
+            mod[key].append(modality(x)) #Adds the sentiment rating for each line in the transcript for each key
+    return mod
+
+def average_modality(data): #Takes a dictionary, calculates the average modality for each value, and stores it with the same key
+    mod = get_modality(data)
+    avg = {}
+    for key, value in mod.items():
+        summation = 0 #Initializes the sum of the modality rating for each line
+        total = 0 #Initializes a counter for the total number of lines for a given person
+        avg[key] = [] #Creates an empty list to store each result
+        for x in value:
+            summation += x #Adds the modality data to the value
+            total += 1 #Increments the total number by 1 to represent the total number of spoken lines
+        avg[key].append(summation/total) #Divides the sum by the total to provide an average modality for each person
+    return avg
+
+def average_polarity(data): #Takes a dictionary, calculates the average polarity for each value, and stores it with the same key
+    sent = get_sentiment(data)
+    avg = {}
+    for key, value in sent.items():
+        summation = 0 #Initializes the sum of the polarity rating for each line
+        total = 0 #Initializes a counter for the total number of lines for a given person
+        avg[key] = []
+        for x in value:
+            summation += x[0] #Adds the polarity data to the value. The sentiment function returns a list with polarity in the 0th position
+            total += 1 #Increments the total number by 1 to represent the total number of spoken lines
+        avg[key].append(summation/total) #Divides the sum by the total to provide an average polarity for each person
+    return avg
+
+def average_subjectivity(data): #Takes a CSV transcript (assumes names are in the first column and speech in the second) and returns the average subjectivity for each person
+    sent = get_sentiment(data)
+    avg = {}
+    for key, value in sent.items():
+        summation = 0 #Initializes the sum of the polarity rating for each line
+        total = 0 #Initializes a counter for the total number of lines for a given person
+        avg[key] = []
+        for x in value:
+            summation += x[1] #Adds the subjectivity data to the value. The sentiment function returns a list with polarity in the 1th position
+            total += 1 #Increments the total number by 1 to represent the total number of spoken lines
+        avg[key].append(summation/total) #Divides the sum by the total to provide an average subjectivity for each person
+    return avg
+
+def pattern_results(data): #Takes a CSV file as input (assumes names are in the first column and speech in the second) and prints the names of the participants along with the average calculations
+    d = sort_data(data)
+    avg_mod = average_modality(d)
+    avg_pol = average_polarity(d)
+    avg_sub = average_subjectivity(d)
+    for key, value in d.items():
+        if key != "male participant" and key != "female participant" and key != "non-team member" and key != '': #Ignores a number of common names that pop up in transscripts and are to be ignored
+            print key + ":"
+            print "Average Modality: " + str(avg_mod[key]) + ", Average Polarity: " + str(avg_pol[key]) + ", Average Subjectivity" + str(avg_sub[key])
+            print "\n"
diff --git a/MiniProject1-master/Holmes_Mini_Project_1.py b/MiniProject1-master/Holmes_Mini_Project_1.py
@@ -0,0 +1,89 @@
+from pattern.en import sentiment
+from pattern.en import modality
+
+def sort_data(data):
+    fin = open(data)
+    d = {}
+    names = []
+    speech = []
+    for line in fin:
+        line1 = line.replace(",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,","",1)
+        line2 = line1.replace("\x92", "'")
+        line3 = line2.replace("\x85", "")
+        text = line3.replace("\r\n", "")
+        key = text[0:text.index(',')]
+        if key.strip() in d:
+            d[key.strip()].append(text[text.index(',')+1:len(text)])
+        else:
+            d[key.strip()] = [text[text.index(','):len(text)]]
+    return d
+
+def get_sentiment(data):
+    d = sort_data(data)
+    sent = {}
+
+    for key, value in d.items():
+        sent[key] = []
+        for x in value:
+            sent[key].append(sentiment(x))
+    return sent
+
+def get_modality(data):
+    d = sort_data(data)
+    mod = {}
+
+    for key, value in d.items():
+        mod[key] = []
+        for x in value:
+            mod[key].append(modality(x))
+    return mod
+
+def average_modality(data):
+    mod = get_modality(data)
+    avg = {}
+    for key, value in mod.items():
+        summation = 0
+        total = 0
+        avg[key] = []
+        for x in value:
+            summation += x
+            total += 1
+        avg[key].append(summation/total)
+    return avg
+
+def average_polarity(data):
+    sent = get_sentiment(data)
+    avg = {}
+    for key, value in sent.items():
+        summation = 0
+        total = 0
+        avg[key] = []
+        for x in value:
+            summation += x[0]
+            total += 1
+        avg[key].append(summation/total)
+    return avg
+
+def average_subjectivity(data):
+    sent = get_sentiment(data)
+    avg = {}
+    for key, value in sent.items():
+        summation = 0
+        total = 0
+        avg[key] = []
+        for x in value:
+            summation += x[1]
+            total += 1
+        avg[key].append(summation/total)
+    return avg
+
+def pattern_results(data):
+    d = sort_data(data)
+    avg_mod = average_modality(data)
+    avg_pol = average_polarity(data)
+    avg_sub = average_subjectivity(data)
+    for key, value in d.items():
+        if key != "Male Participant" and key != "Female Participant" and key != "Non-team member":
+            print key + ":"
+            print "Average Modality: " + str(avg_mod[key]) + ", Average Polarity: " + str(avg_pol[key]) + ", Average Subjectivity" + str(avg_sub[key])
+            print "\n"
diff --git a/MiniProject1-master/MiniProject1WriteUp.pdf b/MiniProject1-master/MiniProject1WriteUp.pdf
diff --git a/MiniProject1-master/transcript_section.csv b/MiniProject1-master/transcript_section.csv
@@ -0,0 +1,11 @@
+Philip Melter,"Like  having some of this linear  like,  right.   I mean  we could put some information there  and it'd maybe be useful,  but… "
+Elizabeth Homer,"True.  I think it’s so nice and concise  the way that it is now,  with… "
+Philip Melter,Yeah.
+Elizabeth Homer,The before and after.
+Philip Melter,Shouldn’t we try to pack more info into it?
+Elizabeth Homer,Right.
+Mike Lands,"I think you should zoom in.   Like  a lot of stuff… well,  or I guess  the least I’m saying  is like  a lot of our vignettes around regionals  are all really interesting,  right.   There’s the foghorn goes off while they’re running in the middle of the woods  and there’s the  like… we had to go around the lake,  can anyone hear me on the radio?   They’re seeing someone through the scope  like… "
+Philip Melter,Yeah.  Yeah.
+Mike Lands,"There’s not  like  really… they’re really detailed,  but I don’t know if they belong  in the broad interactions map...  Or if anymore details needs to be put in. "
+Philip Melter,Okay.
+Mike Lands,"Like  I don’t know if this is what Mary had in mind,  but I think it works. "
diff --git a/MiniProject1WriteUp.pdf b/MiniProject1WriteUp.pdf
diff --git a/transcript_section.csv b/transcript_section.csv
@@ -0,0 +1,11 @@
+Philip Melter,"Like  having some of this linear  like,  right.   I mean  we could put some information there  and it'd maybe be useful,  but… "
+Elizabeth Homer,"True.  I think it’s so nice and concise  the way that it is now,  with… "
+Philip Melter,Yeah.
+Elizabeth Homer,The before and after.
+Philip Melter,Shouldn’t we try to pack more info into it?
+Elizabeth Homer,Right.
+Mike Lands,"I think you should zoom in.   Like  a lot of stuff… well,  or I guess  the least I’m saying  is like  a lot of our vignettes around regionals  are all really interesting,  right.   There’s the foghorn goes off while they’re running in the middle of the woods  and there’s the  like… we had to go around the lake,  can anyone hear me on the radio?   They’re seeing someone through the scope  like… "
+Philip Melter,Yeah.  Yeah.
+Mike Lands,"There’s not  like  really… they’re really detailed,  but I don’t know if they belong  in the broad interactions map...  Or if anymore details needs to be put in. "
+Philip Melter,Okay.
+Mike Lands,"Like  I don’t know if this is what Mary had in mind,  but I think it works. "