Bar Chart of Frequency of modals in different sections of the Brown Corpus

2023-01-05 18:31:40

Natural Language Processing with Python

Chapter 4.8

 colors = 'rgbcmyk' # red, green, blue, cyan, magenta, yellow, black

 def bar_chart(categories, words, counts):

     "Plot a bar chart showing counts for each word by category"

     import pylab

     ind = pylab.arange(len(words))

     width = 0.1*1 / (len(categories) + 1)*10

     bar_groups = []

     for c in range(len(categories)):

         bars = pylab.bar(ind+c*width, counts[categories[c]], width,

                         color=colors[c % len(colors)])

         bar_groups.append(bars)

     pylab.xticks(ind+width, words)

     pylab.legend([b[0] for b in bar_groups], categories, loc='upper left')

     pylab.ylabel('Frequency')

     pylab.title('Frequency of Six Modal Verbs by Genre')

     pylab.show()

 def test_bar_char():

     genres = ['news', 'religion', 'hobbies', 'government', 'adventure']

     modals = ['can', 'could', 'may', 'might', 'must', 'will']

     cfdist = nltk.ConditionalFreqDist(

                 (genre, word)

                 for genre in genres

                 for word in nltk.corpus.brown.words(categories=genre)

                 if word in modals)

     counts = {}

     for genre in genres:

         counts[genre] = [cfdist[genre][word] for word in modals]

     bar_chart(genres, modals, counts)

修改了width,结果为：

码农公寓

相关文章