1
2
3
4
5
6
7
8
9 """
10 Read tokens from the Stopwords Corpus.
11 """
12
13 from nltk_lite.corpora import get_basedir
14 import os
15
16 items = ['danish', 'dutch', 'english', 'french', 'german', 'italian',
17 'norwegian', 'portuguese', 'russian', 'spanish', 'swedish']
18
19 item_name = {
20 'danish': 'Danish stopwords',
21 'dutch': 'Dutch stopwords',
22 'english': 'English stopwords',
23 'french': 'French stopwords',
24 'german': 'German stopwords',
25 'italian': 'Italian stopwords',
26 'norwegian': 'Norwegian stopwords',
27 'portuguese': 'Portuguese stopwords',
28 'russian': 'Russian stopwords',
29 'spanish': 'Spanish stopwords',
30 'swedish': 'Swedish stopwords',
31 }
32
33 -def raw(files = 'english'):
40
51
52 if __name__ == '__main__':
53 demo()
54