1 from fsa import FSA
2 import yaml
3 from featurelite import unify
4
7 self.yamlstr = yamlstr
8 self._cache = None
13
15 """
16 Return an object that combines the feature labels a and b.
17
18 For now, this only does string concatenation; it can be extended
19 to unify 'featurelite' style dictionaries.
20 """
21 def override_features(a, b):
22 return b
23
24 if isinstance(a, YAMLwrapper): a = a.value()
25 if isinstance(b, YAMLwrapper): b = b.value()
26 if isinstance(a, str) and isinstance(b, str):
27 return a+b
28 else:
29 d = {}
30 vars = {}
31
32 return unify(a, b, vars, fail=override_features)
33 return '%s%s' % (a, b)
34
38 - def fsa(self): return self._fsa
40 trans = self.fsa()._transitions[state]
41 for label in trans.keys():
42 if label is not None and label[0].startswith(word) and len(label[0]) > len(word):
43 next = label[0][len(word):]
44 for pair in alphabet:
45 if next.startswith(pair.input()): yield pair.input()
56
57 @staticmethod
63 @staticmethod
64 - def from_text(text):
65 fsa = FSA([], {}, 'Begin', ['End'])
66 state = 'Begin'
67 for line in text.split('\n'):
68 line = line.strip()
69 if not line or line.startswith(';'): continue
70 if line[-1] == ':':
71 state = line[:-1]
72 else:
73 if line.split()[0].endswith(':'):
74 parts = line.split()
75 name = parts[0][:-1]
76 next_states = parts[1:]
77 for next in next_states:
78 fsa.insert_safe(name, None, next)
79 elif len(line.split()) > 2:
80
81 word, next, features = line.split(None, 2)
82 if word.startswith('"') or\
83 word.startswith("'") and word.endswith("'"):
84 word = eval(word)
85 if features:
86 if features == 'None': features = None
87 elif features[0] in '\'"{':
88 features = YAMLwrapper(features)
89 fsa.insert_safe(state, (word, features), next)
90 elif len(line.split()) == 2:
91 word, next = line.split()
92 features = ''
93 if word == "''":
94 word = ''
95 fsa.insert_safe(state, (word, features), next)
96 else:
97 print "Ignoring line in morphology: %r" % line
98 return KimmoMorphology(fsa)
99
102
103 if __name__ == '__main__':
104 demo()
105