类型错误:预期的字符串或类似字节的对象(Python 3)(Wordcloud)


import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import wordcloud
from wordcloud import WordCloud,STOPWORDS

# Read the whole text.
remarks = open(r'C:\Users\marmar\Remarks.txt').read().split()
#Create words over an image
mask = np.array(Image.open(r'C:\users\marmar\Documents\cloud.png'))

 #set the stopwords list
 stopwords= set(STOPWORDS)
 #append new words to the stopwords list
 new_words =open(r'C:\Users\marmar\Documents\comments.txt').read().split()
 #generate the word cloud with parameters
wc = 
         background_color="white", max_words=2000, 
         mask=mask,min_font_size =15, max_font_size=40, relative_scaling = 
         0.5, stopwords=new_stopwords,normalize_plurals= True)
plt.imshow(wc, interpolation="bilinear")

#Show the wordcloud

所以,如果我删除.read() .split()从我的备注文本文件中,它实际上可以工作并返回词云。但是,我希望能够对单词进行标记,并且实际上使词云准确。 (它没有显示单词分开)。但是,每次我这样做时,都会收到此错误。

TypeError                                 Traceback (most recent call last)
<ipython-input-7-76f0df420fc2> in <module>()
 19 wc = WordCloud(background_color="white", max_words=2000, 
 mask=mask,min_font_size =15, max_font_size=40, relative_scaling = 0.5, 
 20                 normalize_plurals= True)
 ---> 21 wc.generate(remarks)
 22 plt.figure(figsize=(25,25))
 23 plt.imshow(wc, interpolation="bilinear")

 packages\wordcloud\wordcloud.py in generate(self, text)
  563         self
 564         """
 --> 565         return self.generate_from_text(text)
 567     def _check_generated(self):

packages\wordcloud\wordcloud.py in generate_from_text(self, text)
544         self
545         """
--> 546         words = self.process_text(text)
547         self.generate_from_frequencies(words)
548         return self

in process_text(self, text)
511         regexp = self.regexp if self.regexp is not None else r"\w[\w']+"
--> 513         words = re.findall(regexp, text, flags)
514         # remove stopwords
515         words = [word for word in words if word.lower() not in 

~\AppData\Local\Continuum\anaconda3\lib\re.py in findall(pattern, string, 
221     Empty matches are included in the result."""
--> 222     return _compile(pattern, flags).findall(string)
224 def finditer(pattern, string, flags=0):

TypeError: expected string or bytes-like object

我试图做的实际上是从我的备注文本文件中删除所有标点符号,并将文本文件转换为 unicode。评论文本文件工作正常,不知道为什么我的备注文件不能。




