1
2
3
4
5
6
7
8
9
10
11
12
13 """
14 Basic data classes for representing feature structures. A X{feature
15 structure} is a mapping from feature names to feature values, where:
16
17 - Each X{feature name} is a case sensitive string.
18 - Each X{feature value} can be a base value (such as a string), a
19 variable, or a nested feature structure.
20
21 Feature structures are typically used to represent partial information
22 about objects. A feature name that is not mapped to a value stands
23 for a feature whose value is unknown (I{not} a feature without a
24 value). Two feature structures that represent (potentially
25 overlapping) information about the same object can be combined by
26 X{unification}. When two inconsistant feature structures are unified,
27 the unification fails and returns C{None}.
28
29 Features are usually specified using X{feature paths}, or tuples of
30 feature names that specify path through the nested feature structures
31 to a value.
32
33 Feature structures may contain reentrant feature values. A
34 X{reentrant feature value} is a single feature value that can be
35 accessed via multiple feature paths. Unification preserves the
36 reentrance relations imposed by both of the unified feature
37 structures. After unification, any extensions to a reentrant feature
38 value will be visible using any of its feature paths.
39
40 Feature structure variables are encoded using the
41 L{FeatureVariable} class. Feature structure variables are
42 essentially just names; they do not directly contain values. Instead,
43 the mapping from variables to values is encoded externally to the
44 variable, as a set of X{bindings}. These bindings are stored using
45 the L{FeatureBindings} class.
46
47 @todo: more test cases
48 @sort: FeatureStructure, FeatureVariable, AliasedFeatureVariable,
49 FeatureBindings
50 @group Feature Structures: FeatureStructure
51 @group Variables: FeatureVariable, AliasedFeatureVariable,
52 FeatureBindings
53 @group Unit Tests: FeatureStructureTestCase
54 """
55
56
57
58
59
60
61
62
63
64
65
66 import re
67 from types import NoneType
68
69
70
71
72
74 """
75 An interface for classes that can perform substitutions for feature
76 variables.
77 """
79 """
80 @return: The object that is obtained by replacing
81 each variable bound by C{bindings} with its values.
82 @rtype: (any)
83 """
84 raise NotImplementedError
85
87 """
88 A variable that can stand for a single feature value in a feature
89 structure. Each variable is defined by a unique identifier, which
90 can be either a case-sensitive string (for X{named variables}) or
91 an integer (for X{numbered variables}).
92
93 Named variables are created by calling the C{FeatureVariable}
94 constructor with a string identifier. If multiple named variables
95 objects are created with the same identifier, then they represent
96 the same variable. Numbered variables are created by calling the
97 C{FeatureVariable} constructor with no arguments; a new identifier
98 will be automatically generated. Each new numbered variable object
99 is guaranteed to have a unique identifier.
100
101 Variables do not directly contain values; instead, the mapping
102 from variables to values is encoded externally as a set of
103 bindings, using L{FeatureBindings}. If a set of
104 bindings assigns a value to a variable, then that variable is said
105 to be X{bound} with respect to those bindings; otherwise, it is
106 said to be X{unbound}.
107
108 @see: L{FeatureStructure}
109 """
110 _next_numbered_id = 1
111
113 """
114 Construct a new feature structure variable.
115 @type identifier: C{string}
116 @param identifier: A unique identifier for this variable.
117 Any two C{FeatureVariable} objects with the
118 same identifier are treated as the same variable.
119 """
120 if identifier is None:
121 self._identifier = FeatureVariable._next_numbered_id
122 FeatureVariable._next_numbered_id += 1
123 else:
124 self._identifier = identifier
125
127 """
128 @return: This variable's unique identifier.
129 @rtype: C{string}
130 """
131 return self._identifier
132
134 """
135 @return: A string representation of this feature structure
136 variable. A feature structure variable with identifier
137 C{I{x}} is represented as C{'?I{x}'}.
138 """
139 return '?%s' % self._identifier
140
142 if not isinstance(other, FeatureVariable): return -1
143 return cmp(self._identifier, other._identifier)
144
147
148 - def alias(self, variable):
149 """
150 Return an aliased variable that constrains this variable to be
151 equal to C{variable}.
152 @rtype: L{AliasedFeatureVariable}
153 """
154 if self == variable: return self
155 return AliasedFeatureVariable(self, variable)
156
158 """
159 @return: The value that is bound to this variable if it appears in
160 @C{bindings} otherwise just return self.
161 @rtype: (any)
162 """
163 if bindings.is_bound(self):
164 return bindings.lookup(self)
165 else:
166 return self
167
168
170 """
171 Given a string that encodes a feature variable, return that
172 variable. This method can be used to parse both
173 C{FeatureVariables} and C{AliasedFeatureVariables}. However,
174 this method can not be used to parse numbered variables, since
175 doing so could violate the guarantee that each numbered
176 variable object has a unique identifier.
177 """
178
179 match = re.match(r'\?[a-zA-Z_][a-zA-Z0-9_]*$', s)
180 if match:
181 return FeatureVariable(s[1:])
182
183
184 match = re.match(r'\?<[a-zA-Z_][a-zA-Z0-9_]*'+
185 r'(=[a-zA-Z_][a-zA-Z0-9_]*)*>$', s)
186 if match:
187 idents = s[2:-1].split('=')
188 vars = [FeatureVariable(i) for i in idents]
189 return AliasedFeatureVariable(*vars)
190
191 raise ValueError('Bad FeatureVariable string')
192
193 parse=staticmethod(parse)
194
196 """
197 A set of variables that are constrained to be equal. An aliased
198 variable can be used in place of a simple variable. In
199 particular, an aliased variable stands for a single feature value,
200 and requires that each its aliases are bound to that same
201 value. Aliased variables can be categorized according to their
202 values in a set of bindings:
203
204 - An aliased variable is X{unbound} if none of its aliases
205 is assigned a value.
206
207 - An aliased variable is X{bound} if at least one of its
208 aliases is bound, and all of its bound aliases are
209 assigned the same value. (If at least one alias is
210 unbound, then the aliased variable is said to be X{partially
211 bound}.)
212
213 - An aliased variable is X{inconsistant} if two or more
214 aliases are bound to different values.
215
216 @ivar _aliases: The set of aliases contained by
217 this aliased variable. This set is encoded as a dictionary
218 whose keys are variables.
219 """
221 """
222 Construct a new feature structure variable that contains the
223 given aliases. If C{aliases} contains aliased
224 variables, then they are replaced by their lists of
225 aliases.
226 @raise ValueError: If no aliases are specified.
227 """
228 if len(aliases) == 0:
229 raise ValueError('Expected at least one alias')
230 self._aliases = {}
231 for subvar in aliases:
232 if isinstance(subvar, AliasedFeatureVariable):
233 self._aliases.update(subvar._aliases)
234 else:
235 self._aliases[subvar] = 1
236
238 """
239 Raise C{ValueError}, since aliased variables do not have a
240 single identifier.
241 """
242 raise ValueError('Aliased variables do not have identifiers')
243
245 """
246 @return: A list of the variables that are constrained to be
247 equal by this aliased variable.
248 """
249 return self._aliases.keys()
250
252 """
253 @return: A string representation of this feature structure
254 variable. A feature structure variable with identifiers
255 C{I{X1}, I{X2}, ..., I{Xn}} is represented as
256 C{'?<I{X1}=I{X2}=...=I{Xn}>'}.
257 """
258 idents = [v._identifier for v in self.aliases()]
259 idents.sort()
260 return '?<' + '='.join(idents) + '>'
261
263 if not isinstance(other, FeatureVariable): return -1
264 return cmp(self._aliases, other._identifier)
265
268
270 """
271 A partial mapping from feature variables to values. Simple
272 variables can be either X{bound} (i.e., assigned a value), or
273 X{unbound} (i.e., left unspecified). Aliased variables can
274 additionally be X{inconsistant} (i.e., assigned multiple
275 incompatible values).
276
277 @ivar _bindings: A dictionary mapping from bound variables
278 to their values.
279 """
280 - def __init__(self, initial_bindings=None):
281 """
282 Construct a new set of bindings.
283
284 @param initial_bindings: A dictionary from variables to
285 values, specifying the initial assignments for the bound
286 variables.
287 """
288
289 if initial_bindings is None: initial_bindings = {}
290 for val in initial_bindings.values():
291 if isinstance(val, FeatureVariable):
292 err = 'Variables cannot be bound to other variables'
293 raise ValueError(err)
294
295 self._bindings = initial_bindings.copy()
296
298 """
299 @return: A list of all simple variables that have been
300 assigned values.
301 @rtype: C{list} of L{FeatureVariable}
302 """
303 return self._bindings.keys()
304
306 """
307 @return: True if the given variable is bound. A simple
308 variable is bound if it has been assigned a value. An aliased
309 variable is bound if at least one of its aliases is bound
310 and all of its bound aliases are assigned the same value.
311
312 @rtype: C{bool}
313 """
314 if isinstance(variable, AliasedFeatureVariable):
315 bindings = [self._bindings.get(v)
316 for v in variable.aliases()
317 if self._bindings.has_key(v)]
318 if len(bindings) == 0: return 0
319 inconsistant = [val for val in bindings if val != bindings[0]]
320 if inconsistant: return 0
321 return 1
322
323 return self._bindings.has_key(variable)
324
325 - def lookup(self, variable, update_aliased_bindings=False):
326 """
327 @return: The value that it assigned to the given variable, if
328 it's bound; or the variable itself if it's unbound. The value
329 assigned to an aliased variable is defined as the value that's
330 assigned to its bound aliases.
331
332 @param update_aliased_bindings: If true, then looking up a
333 bound aliased variable will cause any unbound aliases
334 it has to be bound to its value. E.g., if C{?x} is bound
335 to C{1} and C{?y} is unbound, then looking up C{?x=y} will
336 cause C{?y} to be bound to C{1}.
337 @raise ValueError: If C{variable} is an aliased variable with an
338 inconsistant value (i.e., if two or more of its bound
339 aliases are assigned different values).
340 """
341
342
343
344 if isinstance(variable, AliasedFeatureVariable):
345
346 bindings = [self._bindings.get(v)
347 for v in variable.aliases()
348 if self._bindings.has_key(v)]
349
350 if len(bindings) == 0: return variable
351
352 val = bindings[0]
353 for binding in bindings[1:]:
354 if binding != val:
355 raise ValueError('inconsistant value')
356
357 if update_aliased_bindings:
358 for subvar in variable.aliases():
359 self._bindings[subvar] = val
360
361 return val
362
363 return self._bindings.get(variable, variable)
364
365 - def bind(self, variable, value):
366 """
367 Assign a value to a variable. If C{variable} is an aliased
368 variable, then the value is assigned to all of its
369 aliases. Variables can only be bound to values; they may
370 not be bound to other variables.
371
372 @raise ValueError: If C{value} is a variable.
373 """
374 if isinstance(value, FeatureVariable):
375 raise ValueError('Variables cannot be bound to other variables')
376
377 if isinstance(variable, AliasedFeatureVariable):
378 for subvar in variable.aliases():
379 self._bindings[subvar] = value
380 else:
381 self._bindings[variable] = value
382
384 """
385 @return: a copy of this set of bindings.
386 """
387 return FeatureBindings(self._bindings)
388
390 """
391 @return: a string representation of this set of bindings.
392 """
393 if self._bindings:
394 bindings = ['%r=%r' % (k,v) for (k,v) in self._bindings.items()]
395 return '<Bindings: %s>' % (', '.join(bindings))
396 else:
397 return '<Bindings (empty)>'
398
400 if not isinstance(other, FeatureVariable): return -1
401 return cmp((self._bindings, self._synonyms),
402 (other._bindings, other._synonyms))
403
404
406 """
407 A structured set of features. These features are represented as a
408 mapping from feature names to feature values, where each feature
409 value is either a basic value (such as a string or an integer), or
410 a nested feature structure.
411
412 A feature structure's feature values can be accessed via indexing:
413
414 >>> fstruct1 = FeatureStructure(number='singular', person='3rd')
415 >>> print fstruct1['number']
416 'singular'
417
418 >>> fstruct2 = FeatureStructure(subject=fstruct1)
419 >>> print fstruct2['subject']['person']
420 '3rd'
421
422 A nested feature value can be also accessed via a X{feature
423 paths}, or a tuple of feature names that specifies the paths to
424 the nested feature:
425
426 >>> fpath = ('subject','number')
427 >>> print fstruct2[fpath]
428 'singular'
429
430 Feature structures may contain reentrant feature values. A
431 X{reentrant feature value} is a single feature value that can be
432 accessed via multiple feature paths.
433
434 @note: Should I present them as DAGs instead? That would make it
435 easier to explain reentrancy.
436 @ivar _features: A dictionary mapping from feature names to values.
437
438 @ivar _forward: A pointer to another feature structure that
439 replaced this feature structure. This is used during the
440 unification process to preserve reentrance. In particular, if
441 we're unifying feature structures A and B, where:
442
443 - x and y are feature paths.
444 - A contains a feature structure A[x]
445 - B contains a reentrant feature structure B[x]=B[y]
446
447 Then we need to ensure that in the unified structure C,
448 C[x]=C[y]. (Here the equals sign is used to denote the object
449 identity relation, i.e., C{is}.)
450 """
452 self._features = features
453
455 if type(index) == str:
456 return self._features[index]
457 elif len(index) == 0:
458 return self
459 elif len(index) == 1:
460 return self._features[index[0]]
461 elif isinstance(self._features[index[0]], FeatureStructure):
462 return self._features[index[0]][index[1:]]
463 else:
464 raise IndexError('Bad feature path')
465
467 """
468 @return: A list of the names of the features whose values are
469 defined by this feature structure.
470 @rtype: C{list} of C{string}
471 """
472 return self._features.keys()
473
475 """
476 @return: True if C{self} and C{other} assign the same value to
477 to every feature. In particular, return true if
478 C{self[M{p}]==other[M{p}]} for every feature path M{p} such
479 that C{self[M{p}]} or C{other[M{p}]} is a base value (i.e.,
480 not a nested feature structure).
481
482 Note that this is a weaker equality test than L{==<__eq__>},
483 which tests for equal identity.
484
485 @param check_reentrance: If true, then any difference in the
486 reentrance relations between C{self} and C{other} will
487 cause C{equal_values} to return false.
488 """
489 if not isinstance(other, FeatureStructure): return 0
490 if check_reentrance: return `self` == `other`
491 if len(self._features) != len(other._features): return 0
492 for (fname, selfval) in self._features.items():
493 otherval = other._features[fname]
494 if isinstance(selfval, FeatureStructure):
495 if not selfval.equal_values(otherval): return 0
496 else:
497 if selfval != otherval: return 0
498 return 1
499
501 """
502 @return: True if C{self} is the same object as C{other}. This
503 very strict equality test is necessary because object identity
504 is used to distinguish reentrant objects from non-reentrant
505 ones.
506 """
507 return self is other
508
511
513 """
514 @return: a new copy of this feature structure.
515 @param memo: The memoization dicationary, which should
516 typically be left unspecified.
517 """
518
519 if memo is None: memo = {}
520 memo_copy = memo.get(id(self))
521 if memo_copy is not None: return memo_copy
522
523
524
525 newcopy = FeatureStructure()
526 memo[id(self)] = newcopy
527 features = newcopy._features
528
529
530 for (fname, fval) in self._features.items():
531 if isinstance(fval, FeatureStructure):
532 features[fname] = fval.deepcopy(memo)
533 else:
534 features[fname] = fval
535
536 return newcopy
537
539 """
540 @return: A list of all feature structures that can be reached
541 from C{self} by multiple feature paths.
542 @rtype: C{list} of L{FeatureStructure}
543 """
544 reentrance_dict = self._find_reentrances({})
545 return [struct for (struct, reentrant) in reentrance_dict.items()
546 if reentrant]
547
548
549
550
551
553 """
554 @return: The feature structure that is obtained by replacing
555 each variable bound by C{bindings} with its values. If
556 C{self} contains an aliased variable that is partially bound by
557 C{bindings}, then that variable's unbound aliases will be
558 bound to its value. E.g., if the bindings C{<?x=1>} are
559 applied to the feature structure C{[A = ?<x=y>]}, then the
560 bindings will be updated to C{<?x=1,?y=1>}.
561
562 @rtype: L{FeatureStructure}
563 """
564 selfcopy = self.deepcopy()
565 selfcopy._apply_bindings(bindings, {})
566 return selfcopy
567
569 """
570 @return: The feature structure that is obtained by replacing
571 each variable in this feature structure with a new variable
572 that has a unique identifier.
573
574 @param newvars: A dictionary that is used to hold the mapping
575 from old variables to new variables. For each variable M{v}
576 in this feature structure:
577
578 - If C{newvars} maps M{v} to M{v'}, then M{v} will be
579 replaced by M{v'}.
580 - If C{newvars} does not contain M{v}, then a new entry
581 will be added to C{newvars}, mapping M{v} to the new
582 variable that is used to replace it.
583
584 To consistantly rename the variables in a set of feature
585 structures, simply apply rename_variables to each one, using
586 the same dictionary:
587
588 >>> newvars = {} # Maps old vars to alpha-renamed vars
589 >>> new_fstruct1 = ftruct1.rename_variables(newvars)
590 >>> new_fstruct2 = ftruct2.rename_variables(newvars)
591 >>> new_fstruct3 = ftruct3.rename_variables(newvars)
592
593 If newvars is not specified, then an empty dictionary is used.
594
595 @type newvars: C{dictionary} from L{FeatureStructureVariable}
596 to L{FeatureStructureVariable}
597
598 @rtype: L{FeatureStructure}
599 """
600 if newvars is None: newvars = {}
601 selfcopy = self.deepcopy()
602 selfcopy._rename_variables(newvars, {})
603 return selfcopy
604
614
627
628
629
630
631
632
633
634
635
636
637
638 - def unify(self, other, bindings=None, trace=False):
639 """
640 Unify C{self} with C{other}, and return the resulting feature
641 structure. This unified feature structure is the minimal
642 feature structure that:
643 - contains all feature value assignments from both C{self}
644 and C{other}.
645 - preserves all reentrance properties of C{self} and
646 C{other}.
647
648 If no such feature structure exists (because C{self} and
649 C{other} specify incompatible values for some feature), then
650 unification fails, and C{unify} returns C{None}.
651
652 @param bindings: A set of variable bindings to be used and
653 updated during unification. Bound variables are
654 treated as if they were replaced by their values. Unbound
655 variables are bound if they are unified with values; or
656 aliased if they are unified with other unbound variables.
657 If C{bindings} is unspecified, then all variables are
658 assumed to be unbound.
659 """
660 if trace: print '\nUnification trace:'
661
662
663 if bindings is None: bindings = FeatureBindings()
664
665
666
667
668 memo = {}
669 selfcopy = self.deepcopy(memo)
670 othercopy = other.deepcopy(memo)
671
672
673
674 for var in bindings.bound_variables():
675 valid = id(bindings.lookup(var))
676 if memo.has_key(valid):
677 bindings.bind(var, memo[valid])
678
679
680 try: selfcopy._destructively_unify(othercopy, bindings, trace)
681 except FeatureStructure._UnificationFailureError: return None
682
683
684
685 selfcopy._apply_forwards_to_bindings(bindings)
686 selfcopy._apply_forwards(visited={})
687
688
689
690 selfcopy._rebind_aliased_variables(bindings, visited={})
691
692
693 selfcopy._apply_bindings(bindings, visited={})
694
695
696 return selfcopy
697
699 """ An exception that is used by C{_destructively_unify} to
700 abort unification when a failure is encountered. """
701
702
705 """
706 Attempt to unify C{self} and C{other} by modifying them
707 in-place. If the unification succeeds, then C{self} will
708 contain the unified value, and the value of C{other} is
709 undefined. If the unification fails, then a
710 _UnificationFailureError is raised, and the values of C{self}
711 and C{other} are undefined.
712 """
713 if trace:
714
715 self._apply_forwards({})
716 other._apply_forwards({})
717 print ' '+'| '*depth+' /'+`self`
718 print ' '+'| '*depth+'|\\'+ `other`
719
720
721 while hasattr(other, '_forward'): other = other._forward
722
723
724
725
726
727 if self is other:
728 if trace:
729 print ' '+'| '*depth+'|'
730 print ' '+'| '*depth+'| (identical objects)'
731 print ' '+'| '*depth+'|'
732 print ' '+'| '*depth+'+-->'+`self`
733 return
734
735
736
737 other._forward = self
738
739 for (fname, otherval) in other._features.items():
740 if trace:
741 trace_otherval = otherval
742 trace_selfval_defined = self._features.has_key(fname)
743 trace_selfval = self._features.get(fname)
744 if self._features.has_key(fname):
745 selfval = self._features[fname]
746
747
748 if isinstance(selfval, FeatureVariable):
749 selfval = bindings.lookup(selfval)
750 if isinstance(otherval, FeatureVariable):
751 otherval = bindings.lookup(otherval)
752
753 if trace:
754 print ' '+'| '*(depth+1)
755 print ' '+'%s| Unify %s feature:'%('| '*(depth),fname)
756
757
758 if (isinstance(selfval, FeatureStructure) and
759 isinstance(otherval, FeatureStructure)):
760 selfval._destructively_unify(otherval, bindings,
761 trace, depth+1)
762
763
764 elif (isinstance(selfval, FeatureVariable) and
765 isinstance(otherval, FeatureVariable)):
766 self._features[fname] = selfval.alias(otherval)
767
768
769 elif isinstance(selfval, FeatureVariable):
770 bindings.bind(selfval, otherval)
771 elif isinstance(otherval, FeatureVariable):
772 bindings.bind(otherval, selfval)
773
774
775 elif ci_str_cmp and \
776 isinstance(selfval, str) and isinstance(otherval, str)\
777 and selfval.upper() == otherval.upper():
778 pass
779
780
781 elif selfval != otherval:
782 if trace: print ' '+'| '*depth + 'X <-- FAIL'
783 raise FeatureStructure._UnificationFailureError()
784
785
786 else: pass
787
788 if trace and not isinstance(selfval, FeatureStructure):
789
790 if isinstance(trace_selfval, FeatureStructure):
791 trace_selfval._apply_forwards({})
792 if isinstance(trace_otherval, FeatureStructure):
793 trace_otherval._apply_forwards({})
794 print ' '+'%s| /%r' % ('| '*(depth), trace_selfval)
795 print ' '+'%s| |\\%r' % ('| '*(depth), trace_otherval)
796 print ' '+'%s| +-->%r' % ('| '*(depth),
797 self._features[fname])
798
799
800 else:
801 self._features[fname] = otherval
802
803 if trace:
804
805 self._apply_forwards({})
806 print ' '+'| '*depth+'|'
807 print ' '+'| '*depth+'+-->'+`self`
808 if len(bindings.bound_variables()) > 0:
809 print ' '+'| '*depth+' '+`bindings`
810
812 """
813 Replace any feature structure that has a forward pointer with
814 the target of its forward pointer (to preserve reentrancy).
815 """
816 for var in bindings.bound_variables():
817 value = bindings.lookup(var)
818 if (isinstance(value, FeatureStructure) and
819 hasattr(value, '_forward')):
820 while hasattr(value, '_forward'):
821 value = value._forward
822 bindings.bind(var, value)
823
825 """
826 Replace any feature structure that has a forward pointer with
827 the target of its forward pointer (to preserve reentrancy).
828 """
829
830 if visited.has_key(id(self)): return
831 visited[id(self)] = 1
832
833 for fname, fval in self._features.items():
834 if isinstance(fval, FeatureStructure):
835 while hasattr(fval, '_forward'):
836 fval = fval._forward
837 self._features[fname] = fval
838 fval._apply_forwards(visited)
839
850
852 """
853 Check if this feature structure subsumes another feature structure.
854 """
855 return other.equal_values(self.unify(other))
856
857
858
859
860
862 """
863 Display a single-line representation of this feature structure,
864 suitable for embedding in other representations.
865 """
866 return self._repr(self._find_reentrances({}), {})
867
869 """
870 Display a multi-line representation of this feature structure
871 as an FVM (feature value matrix).
872 """
873 return '\n'.join(self._str(self._find_reentrances({}), {}))
874
875 - def _repr(self, reentrances, reentrance_ids):
876 """
877 @return: A string representation of this feature structure.
878 @param reentrances: A dictionary that maps from the C{id} of
879 each feature value in self, indicating whether that value
880 is reentrant or not.
881 @param reentrance_ids: A dictionary mapping from the C{id}s
882 of feature values to unique identifiers. This is modified
883 by C{repr}: the first time a reentrant feature value is
884 displayed, an identifier is added to reentrance_ids for
885 it.
886 """
887 segments = []
888
889
890
891 if reentrances[id(self)]:
892 assert not reentrance_ids.has_key(id(self))
893 reentrance_ids[id(self)] = `len(reentrance_ids)+1`
894
895 items = self._features.items()
896 items.sort()
897
898 for (fname, fval) in items:
899 if not isinstance(fval, FeatureStructure):
900 segments.append('%s=%r' % (fname, fval))
901 elif reentrance_ids.has_key(id(fval)):
902 segments.append('%s->(%s)' % (fname,
903 reentrance_ids[id(fval)]))
904 else:
905 fval_repr = fval._repr(reentrances, reentrance_ids)
906 segments.append('%s=%s' % (fname, fval_repr))
907
908
909 if reentrances[id(self)]:
910 return '(%s)[%s]' % (reentrance_ids[id(self)],
911 ', '.join(segments))
912 else:
913 return '[%s]' % (', '.join(segments))
914
915 - def _str(self, reentrances, reentrance_ids):
916 """
917 @return: A list of lines composing a string representation of
918 this feature structure.
919 @param reentrances: A dictionary that maps from the C{id} of
920 each feature value in self, indicating whether that value
921 is reentrant or not.
922 @param reentrance_ids: A dictionary mapping from the C{id}s
923 of feature values to unique identifiers. This is modified
924 by C{repr}: the first time a reentrant feature value is
925 displayed, an identifier is added to reentrance_ids for
926 it.
927 """
928
929
930 if reentrances[id(self)]:
931 assert not reentrance_ids.has_key(id(self))
932 reentrance_ids[id(self)] = `len(reentrance_ids)+1`
933
934
935 if len(self._features) == 0:
936 if reentrances[id(self)]:
937 return ['(%s) []' % reentrance_ids[id(self)]]
938 else:
939 return ['[]']
940
941
942 maxfnamelen = max(len(k) for k in self.feature_names())
943
944 lines = []
945 items = self._features.items()
946 items.sort()
947
948 for (fname, fval) in items:
949 if not isinstance(fval, FeatureStructure):
950
951 lines.append('%s = %r' % (fname.ljust(maxfnamelen), fval))
952
953 elif reentrance_ids.has_key(id(fval)):
954
955
956 lines.append('%s -> (%s)' % (fname.ljust(maxfnamelen),
957 reentrance_ids[id(fval)]))
958
959 else:
960
961
962 if lines and lines[-1] != '': lines.append('')
963
964
965 fval_lines = fval._str(reentrances, reentrance_ids)
966
967
968 fval_lines = [(' '*(maxfnamelen+3))+l for l in fval_lines]
969
970
971 nameline = (len(fval_lines)-1)/2
972
973 fval_lines[nameline] = (
974 fname.ljust(maxfnamelen)+' ='+
975 fval_lines[nameline][maxfnamelen+2:])
976
977
978 lines += fval_lines
979
980
981 lines.append('')
982
983
984 if lines[-1] == '': lines = lines[:-1]
985
986
987 maxlen = max(len(line) for line in lines)
988 lines = ['[ %s%s ]' % (line, ' '*(maxlen-len(line))) for line in lines]
989
990
991 if reentrances[id(self)]:
992 idstr = '(%s) ' % reentrance_ids[id(self)]
993 lines = [(' '*len(idstr))+l for l in lines]
994 idline = (len(lines)-1)/2
995 lines[idline] = idstr + lines[idline][len(idstr):]
996
997 return lines
998
999
1000
1001
1003 """
1004 Find all of the feature values contained by self that are
1005 reentrant (i.e., that can be reached by multiple paths through
1006 feature structure's features). Return a dictionary
1007 C{reentrances} that maps from the C{id} of each feature value
1008 to a boolean value, indicating whether it is reentrant or not.
1009 """
1010 if reentrances.has_key(id(self)):
1011
1012 reentrances[id(self)] = True
1013 else:
1014
1015 reentrances[id(self)] = False
1016
1017
1018 for fval in self._features.values():
1019 if isinstance(fval, FeatureStructure):
1020 fval._find_reentrances(reentrances)
1021
1022 return reentrances
1023
1024
1025
1026
1027
1028
1030 """
1031 Convert a string representation of a feature structure (as
1032 displayed by repr) into a C{FeatureStructure}. This parse
1033 imposes the following restrictions on the string
1034 representation:
1035 - Feature names cannot contain any of the following:
1036 whitespace, parenthases, quote marks, equals signs,
1037 dashes, and square brackets.
1038 - Only the following basic feature value are supported:
1039 strings, integers, variables, C{None}, and unquoted
1040 alphanumeric strings.
1041 - For reentrant values, the first mention must specify
1042 a reentrance identifier and a value; and any subsequent
1043 mentions must use arrows (C{'->'}) to reference the
1044 reentrance identifier.
1045 """
1046 try:
1047 value, position = cls._parse(s, 0, {})
1048 except ValueError, e:
1049 estr = ('Error parsing field structure\n\n ' +
1050 s + '\n ' + ' '*e.args[1] + '^ ' +
1051 'Expected %s\n' % e.args[0])
1052 raise ValueError, estr
1053 if position != len(s): raise ValueError()
1054 return value
1055
1056
1057 _PARSE_RE = {'name': re.compile(r'\s*([^\s\(\)"\'\-=\[\]]+)\s*'),
1058 'ident': re.compile(r'\s*\((\d+)\)\s*'),
1059 'reentrance': re.compile(r'\s*->\s*'),
1060 'assign': re.compile(r'\s*=\s*'),
1061 'bracket': re.compile(r'\s*]\s*'),
1062 'comma': re.compile(r'\s*,\s*'),
1063 'none': re.compile(r'None(?=\s|\]|,)'),
1064 'int': re.compile(r'-?\d+(?=\s|\]|,)'),
1065 'var': re.compile(r'\?[a-zA-Z_][a-zA-Z0-9_]*'+'|'+
1066 r'\?<[a-zA-Z_][a-zA-Z0-9_]*'+
1067 r'(=[a-zA-Z_][a-zA-Z0-9_]*)*>'),
1068 'symbol': re.compile(r'\w+'),
1069 'stringmarker': re.compile("['\"\\\\]")}
1070
1071
1072 - def _parse(cls, s, position=0, reentrances=None):
1073 """
1074 Helper function that parses a feature structure.
1075 @param s: The string to parse.
1076 @param position: The position in the string to start parsing.
1077 @param reentrances: A dictionary from reentrance ids to values.
1078 @return: A tuple (val, pos) of the feature structure created
1079 by parsing and the position where the parsed feature
1080 structure ends.
1081 """
1082
1083 _PARSE_RE = cls._PARSE_RE
1084
1085
1086 if s[position] != '[': raise ValueError('open bracket', position)
1087 position += 1
1088
1089
1090
1091 match = _PARSE_RE['bracket'].match(s, position)
1092 if match is not None: return cls(), match.end()
1093
1094
1095
1096
1097
1098
1099 features = {}
1100 while position < len(s):
1101
1102 name = id = target = val = None
1103
1104
1105 match = _PARSE_RE['name'].match(s, position)
1106 if match is None: raise ValueError('feature name', position)
1107 name = match.group(1)
1108 position = match.end()
1109
1110
1111 match = _PARSE_RE['reentrance'].match(s, position)
1112 if match is not None:
1113 position = match.end()
1114 match = _PARSE_RE['ident'].match(s, position)
1115 if match is None: raise ValueError('identifier', position)
1116 target = match.group(1)
1117 position = match.end()
1118 try: features[name] = reentrances[target]
1119 except: raise ValueError('bound identifier', position)
1120
1121
1122 else:
1123 match = _PARSE_RE['assign'].match(s, position)
1124 if match is None: raise ValueError('equals sign', position)
1125 position = match.end()
1126
1127
1128 match = _PARSE_RE['ident'].match(s, position)
1129 if match is not None:
1130 id = match.group(1)
1131 if reentrances.has_key(id):
1132 raise ValueError('new identifier', position+1)
1133 position = match.end()
1134
1135 val, position = cls._parseval(s, position, reentrances)
1136 features[name] = val
1137 if id is not None:
1138 reentrances[id] = val
1139
1140
1141 match = _PARSE_RE['bracket'].match(s, position)
1142 if match is not None:
1143 return cls(**features), match.end()
1144
1145
1146 match = _PARSE_RE['comma'].match(s, position)
1147 if match is None: raise ValueError('comma', position)
1148 position = match.end()
1149
1150
1151 raise ValueError('close bracket', position)
1152
1153
1154 - def _parseval(cls, s, position, reentrances):
1155 """
1156 Helper function that parses a feature value. Currently
1157 supports: None, integers, variables, strings, nested feature
1158 structures.
1159 @param s: The string to parse.
1160 @param position: The position in the string to start parsing.
1161 @param reentrances: A dictionary from reentrance ids to values.
1162 @return: A tuple (val, pos) of the value created by parsing
1163 and the position where the parsed value ends.
1164 """
1165
1166 _PARSE_RE = cls._PARSE_RE
1167
1168
1169 if position == len(s): raise ValueError('value', position)
1170
1171
1172 if s[position] in "'\"":
1173 start = position
1174 quotemark = s[position:position+1]
1175 position += 1
1176 while 1:
1177 match = _PARSE_RE['stringmarker'].search(s, position)
1178 if not match: raise ValueError('close quote', position)
1179 position = match.end()
1180 if match.group() == '\\': position += 1
1181 elif match.group() == quotemark:
1182 return eval(s[start:position]), position
1183
1184
1185 if s[position] == '[':
1186 return cls._parse(s, position, reentrances)
1187
1188
1189 match = _PARSE_RE['var'].match(s, position)
1190 if match is not None:
1191 return FeatureVariable.parse(match.group()), match.end()
1192
1193
1194 match = _PARSE_RE['none'].match(s, position)
1195 if match is not None:
1196 return None, match.end()
1197
1198
1199 match = _PARSE_RE['int'].match(s, position)
1200 if match is not None:
1201 return int(match.group()), match.end()
1202
1203
1204 match = _PARSE_RE['symbol'].match(s, position)
1205 if match is not None:
1206 return match.group(), match.end()
1207
1208
1209 raise ValueError('value', position)
1210
1211 _parseval=classmethod(_parseval)
1212 _parse=classmethod(_parse)
1213 parse=classmethod(parse)
1214
1215
1216
1217
1218
1219 import unittest
1220
1221
1222
1223
1225 'Unit testing for FeatureStructure'
1226
1250
1252 'Reentrant unification tests'
1253
1254 fs1 = FeatureStructure.parse('[A=(1)[B=b], E=[F->(1)]]')
1255 fs2 = FeatureStructure.parse("[A=[C='c'], E=[F=[D='d']]]")
1256 fs3 = fs1.unify(fs2)
1257 fs3repr = "[A=(1)[B='b', C='c', D='d'], E=[F->(1)]]"
1258 self.failUnlessEqual(repr(fs3), fs3repr)
1259 fs3 = fs2.unify(fs1)
1260 self.failUnlessEqual(repr(fs3), fs3repr)
1261
1262
1263 fs1 = FeatureStructure.parse("[a=[],b=[],c=[],d=[]]")
1264 fs2 = FeatureStructure.parse('[a=(1)[], b->(1), c->(1), d->(1)]')
1265 fs3 = fs1.unify(fs2)
1266 self.failUnlessEqual(repr(fs3), '[a=(1)[], b->(1), c->(1), d->(1)]')
1267
1268
1269 fs1 = FeatureStructure.parse('[x=(1)[], y->(1)]')
1270 fs2 = FeatureStructure.parse('[x=(1)[], y->(1)]')
1271 fs3 = fs1.unify(fs2)
1272
1274 'Bound variables should get forwarded appropriately'
1275 fs1 = FeatureStructure.parse('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]')
1276
1277 fs2y = FeatureStructure(Y='y')
1278 fs2z = FeatureStructure(Z='z')
1279 fs2 = FeatureStructure.parse('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]')
1280
1281 fs3 = fs1.unify(fs2)
1282 fs3repr = ("[A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]")
1283 self.failUnlessEqual(repr(fs3), fs3repr)
1284
1286 'Cyclic structure tests'
1287
1288 fs1 = FeatureStructure.parse('[F=(1)[], G->(1)]')
1289 fs2 = FeatureStructure.parse('[F=[H=(2)[]], G->(2)]')
1290 fs3 = fs1.unify(fs2)
1291
1292
1293 self.failUnlessEqual(repr(fs3), '[F=(1)[H->(1)], G->(1)]')
1294
1295
1296 self.failUnless(fs3['F'] is fs3['G'])
1297 self.failUnless(fs3['F'] is fs3['G', 'H'])
1298 self.failUnless(fs3['F'] is fs3['G', 'H', 'H'])
1299 self.failUnless(fs3['F'] is fs3[('G',)+(('H',)*10)])
1300
1301
1302 x = FeatureVariable('x')
1303 fs1 = FeatureStructure(F=FeatureStructure(H=x))
1304 fs2 = FeatureStructure(F=x)
1305 fs3 = fs1.unify(fs2)
1306
1307
1308 self.failUnlessEqual(repr(fs3), '[F=(1)[H->(1)]]')
1309
1310
1311 self.failUnless(fs3['F'] is fs3['F','H'])
1312 self.failUnless(fs3['F'] is fs3['F','H','H'])
1313 self.failUnless(fs3['F'] is fs3[('F',)+(('H',)*10)])
1314
1315
1316 fs4 = FeatureStructure.parse('[F=[H=[H=[H=(1)[]]]], K->(1)]')
1317 fs5 = fs3.unify(fs4)
1318 self.failUnlessEqual(repr(fs5), '[F=(1)[H->(1)], K->(1)]')
1319
1320
1321 fs6 = fs4.unify(fs3)
1322 self.failUnlessEqual(repr(fs6), '[F=(1)[H->(1)], K->(1)]')
1323
1324
1325 fs7 = fs3.unify(fs3.deepcopy())
1326
1334
1336 'Aliased variable tests'
1337 fs1 = FeatureStructure.parse("[a=?x, b=?x]")
1338 fs2 = fs1.unify(FeatureStructure.parse("[b=?y, c=?y]"))
1339 self.failUnlessEqual(repr(fs2), '[a=?x, b=?<x=y>, c=?y]')
1340 fs3 = fs2.unify(FeatureStructure.parse("[a=1]"))
1341 self.failUnlessEqual(repr(fs3), '[a=1, b=1, c=1]')
1342
1343 fs1 = FeatureStructure.parse("[a=1]")
1344 fs2 = FeatureStructure.parse("[a=?x, b=?x]")
1345 fs3 = fs2.unify(fs1)
1346 self.failUnlessEqual(repr(fs3), '[a=1, b=1]')
1347
1351
1352 -def test(verbosity):
1355
1356
1357
1358
1359
1361
1362 fs1_lines = str(fs1).split('\n')
1363 fs2_lines = str(fs2).split('\n')
1364 if len(fs1_lines) > len(fs2_lines):
1365 blankline = '['+' '*(len(fs2_lines[0])-2)+']'
1366 fs2_lines += [blankline]*len(fs1_lines)
1367 else:
1368 blankline = '['+' '*(len(fs1_lines[0])-2)+']'
1369 fs1_lines += [blankline]*len(fs2_lines)
1370 for (fs1_line, fs2_line) in zip(fs1_lines, fs2_lines):
1371 print indent + fs1_line + ' ' + fs2_line
1372 print indent+'-'*len(fs1_lines[0])+' '+'-'*len(fs2_lines[0])
1373
1374 linelen = len(fs1_lines[0])*2+3
1375 print indent+'| |'.center(linelen)
1376 print indent+'+-----UNIFY-----+'.center(linelen)
1377 print indent+'|'.center(linelen)
1378 print indent+'V'.center(linelen)
1379
1380 bindings = FeatureBindings()
1381
1382 result = fs1.unify(fs2, bindings)
1383 if result is None:
1384 print indent+'(FAILED)'.center(linelen)
1385 else:
1386 print '\n'.join(indent+l.center(linelen)
1387 for l in str(result).split('\n'))
1388 if bindings and len(bindings.bound_variables()) > 0:
1389 print repr(bindings).center(linelen)
1390 return result
1391
1392 -def demo(trace=False):
1393 import random, sys
1394
1395 HELP = '''
1396 1-%d: Select the corresponding feature structure
1397 q: Quit
1398 t: Turn tracing on or off
1399 l: List all feature structures
1400 ?: Help
1401 '''
1402
1403 print '''
1404 This demo will repeatedly present you with a list of feature
1405 structures, and ask you to choose two for unification. Whenever a
1406 new feature structure is generated, it is added to the list of
1407 choices that you can pick from. However, since this can be a
1408 large number of feature structures, the demo will only print out a
1409 random subset for you to choose between at a given time. If you
1410 want to see the complete lists, type "l". For a list of valid
1411 commands, type "?".
1412 '''
1413 print 'Press "Enter" to continue...'
1414 sys.stdin.readline()
1415
1416 fstruct_strings = [
1417 '[agr=[number=sing, gender=masc]]',
1418 '[agr=[gender=masc, person=3rd]]',
1419 '[agr=[gender=fem, person=3rd]]',
1420 '[subj=[agr=(1)[]], agr->(1)]',
1421 '[obj=?x]', '[subj=?x]',
1422 '[/=None]', '[/=NP]',
1423 '[cat=NP]', '[cat=VP]', '[cat=PP]',
1424 '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]',
1425 '[gender=masc, agr=?C]',
1426 '[gender=?S, agr=[gender=?S,person=3rd]]'
1427 ]
1428
1429 all_fstructs = [(i, FeatureStructure.parse(fstruct_strings[i]))
1430 for i in range(len(fstruct_strings))]
1431
1432 def list_fstructs(fstructs):
1433 for i, fstruct in fstructs:
1434 print
1435 lines = str(fstruct).split('\n')
1436 print '%3d: %s' % (i+1, lines[0])
1437 for line in lines[1:]: print ' '+line
1438 print
1439
1440
1441 while 1:
1442
1443 MAX_CHOICES = 5
1444 if len(all_fstructs) > MAX_CHOICES:
1445 fstructs = random.sample(all_fstructs, MAX_CHOICES)
1446 fstructs.sort()
1447 else:
1448 fstructs = all_fstructs
1449
1450 print '_'*75
1451
1452 print 'Choose two feature structures to unify:'
1453 list_fstructs(fstructs)
1454
1455 selected = [None,None]
1456 for (nth,i) in (('First',0), ('Second',1)):
1457 while selected[i] is None:
1458 print ('%s feature structure (1-%d,q,t,l,?): '
1459 % (nth, len(all_fstructs))),
1460 try:
1461 input = sys.stdin.readline().strip()
1462 if input in ('q', 'Q', 'x', 'X'): return
1463 if input in ('t', 'T'):
1464 trace = not trace
1465 print ' Trace = %s' % trace
1466 continue
1467 if input in ('h', 'H', '?'):
1468 print HELP % len(fstructs); continue
1469 if input in ('l', 'L'):
1470 list_fstructs(all_fstructs); continue
1471 num = int(input)-1
1472 selected[i] = all_fstructs[num][1]
1473 print
1474 except:
1475 print 'Bad sentence number'
1476 continue
1477
1478 if trace:
1479 result = selected[0].unify(selected[1], trace=1)
1480 else:
1481 result = display_unification(selected[0], selected[1])
1482 if result is not None:
1483 for i, fstruct in all_fstructs:
1484 if `result` == `fstruct`: break
1485 else:
1486 all_fstructs.append((len(all_fstructs), result))
1487
1488 print '\nType "Enter" to continue unifying; or "q" to quit.'
1489 input = sys.stdin.readline().strip()
1490 if input in ('q', 'Q', 'x', 'X'): return
1491
1492
1493 if __name__ == '__main__':
1494 test(verbosity=0)
1495 demo()
1496