|
|
@@ -0,0 +1,59 @@ |
|
|
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
|
|
''' |
|
|
|
data1 = pd.read_table('./result-艺术-4字.txt', |
|
|
|
header=None, # 表示不要导入原文件内的表头 |
|
|
|
names=['keyword','count'], #自定义列名 |
|
|
|
sep=',', # 原文件的分隔符是'::',此处是按此分隔符将数据导入 |
|
|
|
engine= 'python', |
|
|
|
index_col = 'keyword') |
|
|
|
|
|
|
|
data2 = pd.read_table('./result-艺术-7字.txt', |
|
|
|
header=None, # 表示不要导入原文件内的表头 |
|
|
|
names=['keyword','count'], #自定义列名 |
|
|
|
sep=',', # 原文件的分隔符是'::',此处是按此分隔符将数据导入 |
|
|
|
engine= 'python', |
|
|
|
index_col = 'keyword') |
|
|
|
|
|
|
|
jiaoji = pd.merge(data1, data2, on=['keyword','count']) |
|
|
|
jiaoji.to_csv('./compare-jiaoji-result.txt', header = False) |
|
|
|
|
|
|
|
bingji = pd.merge(data1, data2, on=['keyword','count'], how='outer') |
|
|
|
bingji.to_csv('./compare-bingji-result.txt', header = False) |
|
|
|
|
|
|
|
chaji=data1.append(data2) |
|
|
|
chaji=pd.Series(chaji.index).value_counts() |
|
|
|
chaji=chaji[chaji==1] |
|
|
|
chaji.to_csv('./compare-chaji-result.txt', header = False) |
|
|
|
''' |
|
|
|
|
|
|
|
data1 = pd.read_table('./result-艺术-7字-标点符号混乱化.txt', |
|
|
|
header=None, # 表示不要导入原文件内的表头 |
|
|
|
names=['keyword','count'], #自定义列名 |
|
|
|
sep=',', # 原文件的分隔符是'::',此处是按此分隔符将数据导入 |
|
|
|
engine= 'python') |
|
|
|
|
|
|
|
data2 = pd.read_table('./result-艺术-7字-去重.txt', |
|
|
|
header=None, # 表示不要导入原文件内的表头 |
|
|
|
names=['keyword','count'], #自定义列名 |
|
|
|
sep=',', # 原文件的分隔符是'::',此处是按此分隔符将数据导入 |
|
|
|
engine= 'python') |
|
|
|
|
|
|
|
del data1['count'] |
|
|
|
del data2['count'] |
|
|
|
|
|
|
|
jiaoji = pd.merge(data1, data2, on=['keyword']) |
|
|
|
jiaoji.to_csv('./compare-jiaoji-result.txt', header = False) |
|
|
|
|
|
|
|
bingji = pd.merge(data1, data2, on=['keyword'], how='outer') |
|
|
|
bingji.to_csv('./compare-bingji-result.txt', header = False) |
|
|
|
|
|
|
|
chaji=data2.append(data1) |
|
|
|
chaji=chaji.drop_duplicates(subset=['keyword'],keep=False) |
|
|
|
chaji.to_csv('./compare-chaji-result.txt', header = False) |
|
|
|
|
|
|
|
|
|
|
|
chaji=jiaoji.append(data1) |
|
|
|
chaji=chaji.drop_duplicates(subset=['keyword'],keep=False) |
|
|
|
chaji.to_csv('./compare-chaji-result.txt', header = False) |