Package nltk_lite :: Package corpora :: Module words
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.corpora.words

 1  # Natural Language Toolkit: Wordlist Corpus Reader 
 2  # 
 3  # Copyright (C) 2001-2007 University of Pennsylvania 
 4  # Author: Steven Bird <sb@ldc.upenn.edu> 
 5  #         Edward Loper <edloper@gradient.cis.upenn.edu> 
 6  # URL: <http://nltk.sf.net> 
 7  # For license information, see LICENSE.TXT 
 8   
 9  """ 
10  Read tokens from the Wordlist Corpus. 
11  """        
12   
13  from nltk_lite.corpora import get_basedir 
14  import os 
15   
16  items = ['en'] 
17   
18  item_name = { 
19      'en': 'English Wordlist', 
20      } 
21   
22 -def raw(files = items):
23 if type(files) is str: files = (files,) 24 25 for file in files: 26 path = os.path.join(get_basedir(), "words", file) 27 for word in open(path).readlines(): 28 yield word.strip()
29
30 -def demo():
31 from nltk_lite.corpora import words 32 from itertools import islice 33 from pprint import pprint 34 35 pprint(list(islice(words.raw(), 0, 20)))
36 37 if __name__ == '__main__': 38 demo() 39