diff --git a/自然语言处理/文本聚类与关键字抽取/文本聚类/main_tfidf.py b/自然语言处理/文本聚类与关键字抽取/文本聚类/main_tfidf.py
index 809d8de..bfa2219 100644
--- a/自然语言处理/文本聚类与关键字抽取/文本聚类/main_tfidf.py
+++ b/自然语言处理/文本聚类与关键字抽取/文本聚类/main_tfidf.py
@@ -72,6 +72,7 @@ class NumberNormalizingVectorizer(TfidfVectorizer):
 
 stwlist=[line.strip() for line in open('dataset/stopwords_zh.txt', 'r',encoding='utf-8').readlines()]
 
+'''
 def loadData(filepath):
     fopen = open(filepath, 'r', encoding='utf-8')
     fileread = fopen.read()
@@ -85,13 +86,25 @@ for item in items:
     title = item.get("title")
     content = item.get("content")
 
-    '''
-    if title is not None and len(title) > 0:
-        docArr.append(title)
-    '''
+    # if title is not None and len(title) > 0:
+        # docArr.append(title)
 
     if content is not None and len(content) > 0:
         docArr.append(content)
+'''
+
+def loadData(filepath):
+    fopen = open(filepath, 'r', encoding='utf-8')
+
+    arr = []
+    for line in fopen.readlines():
+        if len(line) > 0:
+            arr.append(line)
+
+    fopen.close()
+    return arr
+
+docArr = loadData("./data-艺术.txt")
 
 docs = [" ".join(jieba.lcut(doc)) for doc in docArr]
 # pprint(docs[:10]) # 展示靠前的十篇文章的分词效果，注意，每篇文章变成了有一连串词汇组成的list（列表）
@@ -190,6 +203,7 @@ km.fit(X)
 print("完成所耗费时间：%0.3fs" % (time() - t0))
 print()
 
+'''
 print("Homogeneity值: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
 print("Completeness值: %0.3f" % metrics.completeness_score(labels, km.labels_))
 print("V-measure值: %0.3f" % metrics.v_measure_score(labels, km.labels_))
@@ -197,8 +211,8 @@ print("Adjusted Rand-Index值: %.3f"
       % metrics.adjusted_rand_score(labels, km.labels_))
 print("Silhouette Coefficient值: %0.3f"
       % metrics.silhouette_score(X, km.labels_, sample_size=1000))
-
 print()
+'''
 
 #用训练好的聚类模型反推文档的所属的主题类别
 label_prediction = km.predict(X)   
diff --git a/自然语言处理/文本聚类与关键字抽取/文本聚类/main_word2vec_1.py b/自然语言处理/文本聚类与关键字抽取/文本聚类/main_word2vec_1.py
index 552254a..0662cfe 100644
--- a/自然语言处理/文本聚类与关键字抽取/文本聚类/main_word2vec_1.py
+++ b/自然语言处理/文本聚类与关键字抽取/文本聚类/main_word2vec_1.py
@@ -47,6 +47,7 @@ def loadData(filepath):
     fopen.close()
     return json.loads(fileread)
 
+'''
 items = loadData("./dataset/data.json")
 
 docArr = []
@@ -54,25 +55,49 @@ for item in items:
     title = item.get("title")
     content = item.get("content")
 
-    '''
-    if title is not None and len(title) > 0:
-        docArr.append(title)
-    '''
+    # if title is not None and len(title) > 0:
+        # docArr.append(title)
 
     if content is not None and len(content) > 0:
         docArr.append(content)
+'''
+
+def loadData(filepath):
+    fopen = open(filepath, 'r', encoding='utf-8')
+
+    arr = []
+    for line in fopen.readlines():
+        if len(line) > 0:
+            arr.append(line)
+
+    fopen.close()
+    return arr
+
+docArr = loadData("./data-艺术.txt")
 
 docs = [" ".join(jieba.lcut(doc)) for doc in docArr]
 # pprint(docs[:10]) # 展示靠前的十篇文章的分词效果，注意，每篇文章变成了有一连串词汇组成的list（列表）
 
-
 print("%d 个文档" % len(docs))
 print()
 
+sentences = [[word for word in document.strip().split() if word not in stwlist] for document in docs] # 过滤语句中的停用词
 
-import gensim
 
-sentences = [[word for word in document.strip().split() if word not in stwlist] for document in docs] # 过滤语句中的停用词
+'''
+from smart_open import smart_open
+class MySentences(object):  
+    def __init__(self, filename):    
+        self.filename = filename  
+    def __iter__(self):          
+        for line in smart_open(self.filename, 'r', encoding='utf-8'):        
+            line = line.lower() #对每一行文本中的英文词汇小写化
+            yield [i.strip() for i in jieba.lcut(line) if i not in stwlist and len(i) > 1]  #在载入文本的同时，对其中的语句进行分词处理，且去掉停用词和长度小于1的语句
+
+sentences = MySentences('./data-艺术.txt')  # 内存友好的迭代器
+'''
+
+import gensim
 
 # 在这些语句上训练word2vec模型
 model = gensim.models.Word2Vec(sentences, vector_size=200, window=5, min_count=5, workers=2)
@@ -82,7 +107,7 @@ model = gensim.models.Word2Vec(sentences, vector_size=200, window=5, min_count=5
 
 #获取model里面的说有关键词
 keys=model.wv.index_to_key
-print(keys)
+# print(keys)
 print(len(keys))
 
 #获取词对于的词向量