Browse Source

添加情感分析

master
Yener 5 years ago
parent
commit
3826eb8f3e
3 changed files with 141 additions and 0 deletions
  1. +9
    -0
      jiagu/normal/README.md
  2. +132
    -0
      jiagu/sentiment/bayes.py
  3. BIN
      jiagu/sentiment/model/1.model

+ 9
- 0
jiagu/normal/README.md View File

@@ -0,0 +1,9 @@

文本归一化

包含 中文转拼音

全角半角等




+ 132
- 0
jiagu/sentiment/bayes.py View File

@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
import sys
import gzip
import marshal
from math import log, exp

class BaseProb(object):
def __init__(self):
self.d = {}
self.total = 0.0
self.none = 0

def exists(self, key):
return key in self.d

def getsum(self):
return self.total

def get(self, key):
if not self.exists(key):
return False, self.none
return True, self.d[key]

def freq(self, key):
return float(self.get(key)[1])/self.total

class AddOneProb(BaseProb):
def __init__(self):
self.d = {}
self.total = 0.0
self.none = 1

def add(self, key, value):
self.total += value
if not self.exists(key):
self.d[key] = 1
self.total += 1
self.d[key] += value

class Bayes(object):
def __init__(self):
self.d = {}
self.total = 0

def save(self, fname, iszip=True):
d = {}
d['total'] = self.total
d['d'] = {}
for k, v in self.d.items():
d['d'][k] = v.__dict__

if not iszip:
marshal.dump(d, open(fname, 'wb'))
else:
f = gzip.open(fname, 'wb')
f.write(marshal.dumps(d))
f.close()

def load(self, fname, iszip=True):
if not iszip:
d = marshal.load(open(fname, 'rb'))
else:
try:
f = gzip.open(fname, 'rb')
d = marshal.loads(f.read())
except IOError:
f = open(fname, 'rb')
d = marshal.loads(f.read())
f.close()
self.total = d['total']
self.d = {}
for k, v in d['d'].items():
self.d[k] = AddOneProb()
self.d[k].__dict__ = v

def train(self, data):
for d in data:
c = d[0]
if c not in self.d:
self.d[c] = AddOneProb()
for word in d[1]:
self.d[c].add(word, 1)
self.total = sum(map(lambda x: self.d[x].getsum(), self.d.keys()))

def classify(self, x):
tmp = {}
for k in self.d:
tmp[k] = log(self.d[k].getsum()) - log(self.total)
for word in x:
tmp[k] += log(self.d[k].freq(word))
ret, prob = 0, 0
for k in self.d:
now = 0
try:
for otherk in self.d:
now += exp(tmp[otherk]-tmp[k])
now = 1/now
except OverflowError:
now = 0
if now > prob:
ret, prob = k, now
return (ret, prob)

if __name__=='__main__':
classifier = Bayes()

# 预测
classifier.load('model/1.model')


import jiagu

words = jiagu.seg('今天真的开心')
ret, prob = classifier.classify(words)
print(ret, prob)

BIN
jiagu/sentiment/model/1.model View File


Loading…
Cancel
Save