web123456

pyLDAvis generates LDA topics and visualizes them

# Prepare data PATH = "C:\\Users\\mat\\Desktop\data\\Key phrase extraction of each stage\\stage1_Key phrase.txt" file_object2=open(PATH,encoding = 'utf-8',errors = 'ignore').read().split('\n') #Line-by-line reading content data_set=[] #Create a list of stored word segments for i in range(len(file_object2)): result=[] seg_list = file_object2[i].split() for w in seg_list :#Read each line of participle result.append(w) data_set.append(result) # print(data_set) dictionary = corpora.Dictionary(data_set) # Build document-term matrix corpus = [dictionary.doc2bow(text) for text in data_set] #Lda = # Create LDA object #Calculate confusion def perplexity(num_topics): ldamodel = LdaModel(corpus, num_topics=num_topics, id2word = dictionary, passes=30) # print(ldamodel.print_topics(num_topics=num_topics, num_words=15)) # print(ldamodel.log_perplexity(corpus)) return ldamodel.log_perplexity(corpus) #Calculate coherence def coherence(num_topics): ldamodel = LdaModel(corpus, num_topics=num_topics, id2word = dictionary, passes=30,random_state = 1) # print(ldamodel.print_topics(num_topics=num_topics, num_words=10)) ldacm = CoherenceModel(model=ldamodel, texts=data_set, dictionary=dictionary, coherence='c_v') # print(ldacm.get_coherence()) return ldacm.get_coherence() # Draw a confusion line chart x = range(1,15) # z = [perplexity(i) for i in x] y = [coherence(i) for i in x] plt.plot(x, y) plt.xlabel('Number of topics') plt.ylabel('coherence size') plt.rcParams['-serif']=['SimHei'] matplotlib.rcParams['axes.unicode_minus']=False plt.title('Theme-coherence changes') plt.show()