1
2
3
4
5
6
7
8 """
9 Tools for reading and writing dependency trees.
10 The input is assumed to be in U{Malt-TAB<http://w3.msi.vxu.se/~nivre/research/MaltXML.html>} format.
11
12 Currently only reads the first tree in a file.
13 """
14
15 from nltk_lite.parse import Tree
16 from pprint import pformat
17 import re
18
19
21 """
22 A container for the nodes and labelled edges of a dependency structure.
23 """
25 """
26 We place a dummy 'top' node in the first position
27 in the nodelist, since the root node is often assigned '0'
28 as its head. This also means that the indexing of the nodelist
29 corresponds directly to the Malt-TAB format, which starts at 1.
30 """
31 top = {'word':None, 'deps':[], 'rel': 'TOP'}
32 self.nodelist = [top]
33 self.root = None
34 self.stream = None
35
37 return pformat(self.nodelist)
38
39 - def load(self, file):
45
47 """
48 Deal with lines in which spaces are used rather than tabs.
49 """
50 import re
51 SPC = re.compile(' +')
52 return re.sub(SPC, '\t', line)
53
54 - def read(self, input):
96
97 - def _word(self, node, filter=True):
98 w = node['word']
99 if filter:
100 if w != ',': return w
101 return w
102
104 """
105 Recursive function for turning dependency graphs into
106 NLTK trees.
107 @type i: C{int}
108 @param i: index of a node in C{nodelist}
109 @return: either a word (if the indexed node
110 is a leaf) or a L{Tree}.
111 """
112
113 node = self.nodelist[i]
114 word = node['word']
115 deps = node['deps']
116
117 if len(deps) == 0:
118 return word
119 else:
120 return Tree(word, [self._deptree(j) for j in deps])
121
122
124 """
125 Starting with the C{root} node, build a dependency tree using the NLTK
126 L{Tree} constructor. Dependency labels are omitted.
127 """
128 node = self.root
129 word = node['word']
130 deps = node['deps']
131
132 return Tree(word, [self._deptree(i) for i in deps])
133
135 try:
136 return self.nodelist[i]['head']
137 except IndexError:
138 return None
139
141 try:
142 return self.nodelist[i]['rel']
143 except IndexError:
144 return None
145
147 """
148 Convert the data in a C{nodelist} into a networkx
149 labeled directed graph.
150 @rtype: C{XDigraph}
151 """
152 nx_nodelist = range(1, len(self.nodelist))
153 nx_edgelist = [(n, self._hd(n), self._rel(n))
154 for n in nx_nodelist if self._hd(n)]
155 self.nx_labels = {}
156 for n in nx_nodelist:
157 self.nx_labels[n] = self.nodelist[n]['word']
158
159 g = NX.XDiGraph()
160 g.add_nodes_from(nx_nodelist)
161 g.add_edges_from(nx_edgelist)
162
163 return g
164
165
166
168 """
169 A demonstration of the result of reading a dependency
170 version of the first sentence of the Penn Treebank.
171 """
172 dg = DepGraph().read("""Pierre NNP 2 NMOD
173 Vinken NNP 8 SUB
174 , , 2 P
175 61 CD 5 NMOD
176 years NNS 6 AMOD
177 old JJ 2 NMOD
178 , , 2 P
179 will MD 0 ROOT
180 join VB 8 VC
181 the DT 11 NMOD
182 board NN 9 OBJ
183 as IN 9 VMOD
184 a DT 15 NMOD
185 nonexecutive JJ 15 NMOD
186 director NN 12 PMOD
187 Nov. NNP 9 VMOD
188 29 CD 16 NMOD
189 . . 9 VMOD
190 """)
191 tree = dg.deptree()
192 print tree.pp()
193 if nx:
194
195 try:
196 import networkx as NX
197 import pylab as P
198 except ImportError:
199 raise
200 g = dg.nx_graph()
201 g.info()
202 pos = NX.spring_layout(g, dim=1)
203 NX.draw_networkx_nodes(g, pos, node_size=50)
204
205 NX.draw_networkx_labels(g, pos, dg.nx_labels)
206 P.xticks([])
207 P.yticks([])
208 P.savefig('deptree.png')
209 P.show()
210
211 if __name__ == '__main__':
212 demo()
213