### Thread: Help with Sentiment classification in python

1. No Profile Picture
Registered User
Devshed Newbie (0 - 499 posts)

Join Date
Nov 2013
Posts
2
Rep Power
0

#### Help with Sentiment classification in python

I am still learning python. I need to write a loop to calculate
a) Score(Positive) = p(Positive) *p(w1|Positive)*...*p(wn|Positive)
b) Score(Negative) = p(Negative) *p(w1|Negative)*...*p(wn|Negative)

#then, using the above calculations,
c) calculate a variable "prob" that holds the likelihood of the sentence being Positive: p(Positive|w1,...wn).

Please I have given the entire code below and i need help please.

Code:
```#!/usr/bin/env python
import re, random, math, collections, itertools

#------------- Function Definitions ---------------------

#reading pre-labeled movie reviews and splitting into lines

posSentences=[]    #initialise list
negSentences=[]    #initialise list

txt = open('Data/Movies/rt-polarity.pos', 'r')

txt = open('Data/Movies/rt-polarity.neg', 'r')

#reading pre-labeled Nokia reviews and splitting into lines
posSentencesNokia=[]    #initialise list
negSentencesNokia=[]    #initialise list

txt = open('Data/Nokia/nokia-pos.txt', 'r')

txt = open('Data/Nokia/nokia-neg.txt', 'r')

posWordList=[]    #initialise list
negWordList=[]    #initialise list
txt = open('Data/SentimentDictionary/positive-words.txt', 'r')

txt = open('Data/SentimentDictionary/negative-words.txt', 'r')

#Create single sentiment dictionary, where words have value 1 if positive and -1 if negative:

sentimentDictionary={} #initialise dictionary

for i in posWordList:
sentimentDictionary[i] = 1
for i in negWordList:
sentimentDictionary[i] = -1

#create Training and Test Datsets
#create 90-10 split of training and test data from movie reviews, with sentiment labels
sentenceTrain={}
sentimentTest={}

for i in posSentences:
if random.randint(1,10)<2:
sentencesTest[i]="positive"
else:
sentencesTrain[i]="positive"

for i in negSentences:
if random.randint(1,10)<2:
sentencesTest[i]="negative"
else:
sentencesTrain[i]="negative"

#create Nokia Datset, with sentiment attached to sentences:
for i in posSentencesNokia:
sentencesNokia[i]="positive"
for i in negSentencesNokia:
sentencesNokia[i]="negative"

#----------------------------End of data initialisation ----------------#

#calculates p(W|Positive), p(W|Negative) and p(W) for all words in training data

def trainBayes(sentencesTrain, pWordPos, pWordNeg, pWord):
posFeatures = [] # [] initialises a list [array]
negFeatures = []
freqPositive = {} # {} initialises a dictionary [hash function]
freqNegative = {}
dictionary = {}
posWordsTot = 0
negWordsTot = 0
allWordsTot = 0

#iterate through each sentence/sentiment pair in the training data
for sentence, sentiment in sentencesTrain.iteritems():
wordList = re.findall(r"[\w']+", sentence) # get word list
for word in wordList:
allWordsTot += 1 # keeps count of total words in dataset
if not dictionary.has_key(word):
dictionary[word] = 1
if sentiment=="positive":
posWordsTot += 1 # keeps count of total words in positive class

#keep count of each word in positive context
if not freqPositive.has_key(word):
freqPositive[word] = 1
else:
freqPositive[word] += 1
else:
negWordsTot+=1 # keeps count of total words in negative class

#keep count of each word in positive context
if not freqNegative.has_key(word):
freqNegative[word] = 1
else:
freqNegative[word] += 1

for word in dictionary:
#do some smoothing so that minimum count of a word is 1
if not freqNegative.has_key(word):
freqNegative[word] = 1
if not freqPositive.has_key(word):
freqPositive[word] = 1

# Calculate p(word|positive)
pWordPos[word] = freqPositive[word] / float(posWordsTot)

# Calculate p(word|negative)
pWordNeg[word] = freqNegative[word] / float(negWordsTot)

# Calculate p(word)
pWord[word] = (freqPositive[word] + freqNegative[word]) / float(allWordsTot)

#----------------End Training ----------------------------------

#implement naive bayes algorithm
#INPUTS:
#  sentencesTest is a dictonary with sentences associated with sentiment
#  dataName is a string (used only for printing output)
#  pWordPos is dictionary storing p(word|positive) for each word
#     i.e., pWordPos["apple"] will return a real value for p("apple"|positive)
#  pWordNeg is dictionary storing p(word|negative) for each word
#  pWord is dictionary storing p(word)
#  pPos is a real number containing the fraction of positive reviews in the dataset

def testBayes(sentencesTest, dataName, pWordPos, pWordNeg, pWord, pPos):
pNeg=1-pPos

#for each sentence, sentiment pair in the dataset

for sentence, sentiment in sentencesTest.iteritems():
Words = re.findall(r"[\w']+", sentence)#collect all words

#-------------Write Code Here -----------------------------#
# At this point, Words contains the list of words in the sentence.
# You need to implement Naive Bayes to decide the sentiment of the
# sentence based on the list of words.
# Write the loop to calculate:

#   Score(Positive) = p(Positive) *p(w1|Positive)*...*p(wn|Positive)
#   Score(Negative) = p(Negative) *p(w1|Negative)*...*p(wn|Negative)

# Now, using the above calculations, calculate a variable "prob" that holds the likelihood of the sentence being Positive:
# p(Positive|w1,...wn) = Score(Positive)/(Score(Positive)+Score(Negative))
#In short, you need to:
# a) Write the loop to calculate the Scores
# b) create a variable called "prob" that stores p(Positive|w1,...,wn)

#------------------------Finish Code--------------------------#

# the code below will keep track of classification accuracy. Do not change the indentation, the code below is part of the for loop.
wordList=[]
pPosW=pPos
pNegW=pNeg
for word in wordList:
if pWord.has_key(word):
if pWord[word]>0.00000001:
#repeated multiplication can make pPosW and pNegW very small
#So I multiply them by a large number to keep the arithmatic
#sensible. It doesn't change the maths when you
#calculate "prob"
pPosW *=pWordPos[word]*100000
pNegW *=pWordNeg[word]*100000

prob=pPosW/float(pPosW+pNegW)
total+=1
if sentiment=="positive":
totalpos+=1
if prob>0.5:
correct+=1
correctpos+=1
else:
correct+=0
else:
totalneg+=1
if prob<=0.5:
correct+=1
correctneg+=1
else:
correct+=0

acc=correct/float(total)
print dataName + " Accuracy (All)=%0.2f" % acc + " (%d" % correct + "/%d" % total + ")"
accpos=correctpos/float(totalpos)
accneg=correctneg/float(totalneg)
print dataName + " Accuracy (Pos)=%0.2f" % accpos + " (%d" % correctpos + "/%d" % totalpos + ")"
print dataName + " Accuracy (Neg)=%0.2f" % accneg + " (%d" % correctneg + "/%d" % totalneg + ")\n"

# This is a simple classifier that uses a sentiment dictionary to classify a sentence. For each word in the sentence, if the word is in the positive dictionary, it adds 1, if it is in the negative dictionary, it subtracts 1.
# If the final score is above a threshold, it classifies as "Positive", otherwise as "Negative"

def testDictionary(sentencesTest, dataName, sentimentDictionary, threshold):
total=0
correct=0
totalpos=0
totalneg=0
correctpos=0
correctneg=0
for sentence, sentiment in sentencesTest.iteritems():
Words = re.findall(r"[\w']+", sentence)
score=0
for word in Words:
if sentimentDictionary.has_key(word):
score+=sentimentDictionary[word]

total+=1
if sentiment=="positive":
totalpos+=1
if score>=threshold:
correct+=1
correctpos+=1
else:
correct+=0
else:
totalneg+=1
if score<threshold:
correct+=1
correctneg+=1
else:
correct+=0

acc=correct/float(total)
print dataName + " Accuracy (All)=%0.2f" % acc + " (%d" % correct + "/%d" % total + ")"
accpos=correctpos/float(totalpos)
accneg=correctneg/float(totalneg)
print dataName + " Accuracy (Pos)=%0.2f" % accpos + " (%d" % correctpos + "/%d" % totalpos + ")"
print dataName + " Accuracy (Neg)=%0.2f" % accneg + " (%d" % correctneg + "/%d" % totalneg + ")\n"

#---------- Main Script --------------------------

sentimentDictionary={} # {} initialises a dictionary [hash function]
sentencesTrain={}
sentencesTest={}
sentencesNokia={}

#initialise datasets and dictionaries

pWordPos={} # p(W|Positive)
pWordNeg={} # p(W|Negative)
pWord={}    # p(W)

#build conditional probabilities using training data

trainBayes(sentencesTrain, pWordPos, pWordNeg, pWord)

#run naive bayes classifier on datasets

print "Naive Bayes"
testBayes(sentencesTrain,  "Films (Train Data)\t", pWordPos, pWordNeg, pWord,0.5)
testBayes(sentencesTest,  "Films  (Test Data)\t", pWordPos, pWordNeg, pWord,0.5)
testBayes(sentencesNokia, "Nokia   (All Data)\t", pWordPos, pWordNeg, pWord,0.7)

#run sentiment dictionary based classifier on datasets

print "Sentiment Dictionary"
testDictionary(sentencesTrain,  "Films (Train Data)\t", sentimentDictionary, -4)
testDictionary(sentencesTest,  "Films  (Test Data)\t",  sentimentDictionary, -4)
testDictionary(sentencesNokia, "Nokia   (All Data)\t",  sentimentDictionary, -3)```
2. Since I'm lazy and your project is already past due, I stall a little longer by explaining I cannot run your program because I don't have the input files.