Package nltk_lite :: Module featurestructure
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.featurestructure

   1  # Natural Language Toolkit: Feature Structures 
   2  # 
   3  # Copyright (C) 2001-2007 University of Pennsylvania 
   4  # Author: Edward Loper <edloper@gradient.cis.upenn.edu>, 
   5  #         Steven Bird <sb@csse.unimelb.edu.au> 
   6  #         Rob Speer (original code) 
   7  # URL: <http://nltk.sourceforge.net> 
   8  # For license information, see LICENSE.TXT 
   9  # 
  10  # $Id: featurestructure.py 4107 2007-02-01 00:07:42Z stevenbird $ 
  11   
  12   
  13  """ 
  14  Basic data classes for representing feature structures.  A X{feature 
  15  structure} is a mapping from feature names to feature values, where: 
  16   
  17    - Each X{feature name} is a case sensitive string. 
  18    - Each X{feature value} can be a base value (such as a string), a 
  19      variable, or a nested feature structure. 
  20   
  21  Feature structures are typically used to represent partial information 
  22  about objects.  A feature name that is not mapped to a value stands 
  23  for a feature whose value is unknown (I{not} a feature without a 
  24  value).  Two feature structures that represent (potentially 
  25  overlapping) information about the same object can be combined by 
  26  X{unification}.  When two inconsistant feature structures are unified, 
  27  the unification fails and returns C{None}. 
  28   
  29  Features are usually specified using X{feature paths}, or tuples of 
  30  feature names that specify path through the nested feature structures 
  31  to a value. 
  32   
  33  Feature structures may contain reentrant feature values.  A 
  34  X{reentrant feature value} is a single feature value that can be 
  35  accessed via multiple feature paths.  Unification preserves the 
  36  reentrance relations imposed by both of the unified feature 
  37  structures.  After unification, any extensions to a reentrant feature 
  38  value will be visible using any of its feature paths. 
  39   
  40  Feature structure variables are encoded using the 
  41  L{FeatureVariable} class.  Feature structure variables are 
  42  essentially just names; they do not directly contain values.  Instead, 
  43  the mapping from variables to values is encoded externally to the 
  44  variable, as a set of X{bindings}.  These bindings are stored using 
  45  the L{FeatureBindings} class. 
  46   
  47  @todo: more test cases 
  48  @sort: FeatureStructure, FeatureVariable, AliasedFeatureVariable, 
  49         FeatureBindings 
  50  @group Feature Structures: FeatureStructure 
  51  @group Variables: FeatureVariable, AliasedFeatureVariable, 
  52                    FeatureBindings 
  53  @group Unit Tests: FeatureStructureTestCase 
  54  """ 
  55   
  56  # Open question: should there be a "feature" object? 
  57  # 
  58  # The current implementation doesn't actually include an object to 
  59  # encode a "feature" (i.e., a name/value pair).  This makes the code 
  60  # simpler -- one less class to deal with, and you can directly query 
  61  # for feature values, rather than going through a feature object.  But 
  62  # there might be use cases for adding a Feature object.  E.g., if we 
  63  # wanted to assign properties (like is_required) to features.  But I'd 
  64  # like to see some compelling use cases before we add it. 
  65   
  66  import re 
  67  from types import NoneType 
  68   
  69  #////////////////////////////////////////////////////////////////////// 
  70  # Variables and variable bindings 
  71  #////////////////////////////////////////////////////////////////////// 
  72   
73 -class SubstituteBindingsI:
74 """ 75 An interface for classes that can perform substitutions for feature 76 variables. 77 """
78 - def substitute_bindings(self, bindings):
79 """ 80 @return: The object that is obtained by replacing 81 each variable bound by C{bindings} with its values. 82 @rtype: (any) 83 """ 84 raise NotImplementedError
85
86 -class FeatureVariable(SubstituteBindingsI):
87 """ 88 A variable that can stand for a single feature value in a feature 89 structure. Each variable is defined by a unique identifier, which 90 can be either a case-sensitive string (for X{named variables}) or 91 an integer (for X{numbered variables}). 92 93 Named variables are created by calling the C{FeatureVariable} 94 constructor with a string identifier. If multiple named variables 95 objects are created with the same identifier, then they represent 96 the same variable. Numbered variables are created by calling the 97 C{FeatureVariable} constructor with no arguments; a new identifier 98 will be automatically generated. Each new numbered variable object 99 is guaranteed to have a unique identifier. 100 101 Variables do not directly contain values; instead, the mapping 102 from variables to values is encoded externally as a set of 103 bindings, using L{FeatureBindings}. If a set of 104 bindings assigns a value to a variable, then that variable is said 105 to be X{bound} with respect to those bindings; otherwise, it is 106 said to be X{unbound}. 107 108 @see: L{FeatureStructure} 109 """ 110 _next_numbered_id = 1 111
112 - def __init__(self, identifier=None):
113 """ 114 Construct a new feature structure variable. 115 @type identifier: C{string} 116 @param identifier: A unique identifier for this variable. 117 Any two C{FeatureVariable} objects with the 118 same identifier are treated as the same variable. 119 """ 120 if identifier is None: 121 self._identifier = FeatureVariable._next_numbered_id 122 FeatureVariable._next_numbered_id += 1 123 else: 124 self._identifier = identifier
125
126 - def identifier(self):
127 """ 128 @return: This variable's unique identifier. 129 @rtype: C{string} 130 """ 131 return self._identifier
132
133 - def __repr__(self):
134 """ 135 @return: A string representation of this feature structure 136 variable. A feature structure variable with identifier 137 C{I{x}} is represented as C{'?I{x}'}. 138 """ 139 return '?%s' % self._identifier
140
141 - def __cmp__(self, other):
142 if not isinstance(other, FeatureVariable): return -1 143 return cmp(self._identifier, other._identifier)
144
145 - def __hash__(self):
146 return self._identifier.__hash__()
147
148 - def alias(self, variable):
149 """ 150 Return an aliased variable that constrains this variable to be 151 equal to C{variable}. 152 @rtype: L{AliasedFeatureVariable} 153 """ 154 if self == variable: return self 155 return AliasedFeatureVariable(self, variable)
156
157 - def substitute_bindings(self, bindings):
158 """ 159 @return: The value that is bound to this variable if it appears in 160 @C{bindings} otherwise just return self. 161 @rtype: (any) 162 """ 163 if bindings.is_bound(self): 164 return bindings.lookup(self) 165 else: 166 return self
167 168 # [staticmethod]
169 - def parse(s):
170 """ 171 Given a string that encodes a feature variable, return that 172 variable. This method can be used to parse both 173 C{FeatureVariables} and C{AliasedFeatureVariables}. However, 174 this method can not be used to parse numbered variables, since 175 doing so could violate the guarantee that each numbered 176 variable object has a unique identifier. 177 """ 178 # Simple variable 179 match = re.match(r'\?[a-zA-Z_][a-zA-Z0-9_]*$', s) 180 if match: 181 return FeatureVariable(s[1:]) 182 183 # Aliased variable 184 match = re.match(r'\?<[a-zA-Z_][a-zA-Z0-9_]*'+ 185 r'(=[a-zA-Z_][a-zA-Z0-9_]*)*>$', s) 186 if match: 187 idents = s[2:-1].split('=') 188 vars = [FeatureVariable(i) for i in idents] 189 return AliasedFeatureVariable(*vars) 190 191 raise ValueError('Bad FeatureVariable string')
192 193 parse=staticmethod(parse)
194
195 -class AliasedFeatureVariable(FeatureVariable):
196 """ 197 A set of variables that are constrained to be equal. An aliased 198 variable can be used in place of a simple variable. In 199 particular, an aliased variable stands for a single feature value, 200 and requires that each its aliases are bound to that same 201 value. Aliased variables can be categorized according to their 202 values in a set of bindings: 203 204 - An aliased variable is X{unbound} if none of its aliases 205 is assigned a value. 206 207 - An aliased variable is X{bound} if at least one of its 208 aliases is bound, and all of its bound aliases are 209 assigned the same value. (If at least one alias is 210 unbound, then the aliased variable is said to be X{partially 211 bound}.) 212 213 - An aliased variable is X{inconsistant} if two or more 214 aliases are bound to different values. 215 216 @ivar _aliases: The set of aliases contained by 217 this aliased variable. This set is encoded as a dictionary 218 whose keys are variables. 219 """
220 - def __init__(self, *aliases):
221 """ 222 Construct a new feature structure variable that contains the 223 given aliases. If C{aliases} contains aliased 224 variables, then they are replaced by their lists of 225 aliases. 226 @raise ValueError: If no aliases are specified. 227 """ 228 if len(aliases) == 0: 229 raise ValueError('Expected at least one alias') 230 self._aliases = {} 231 for subvar in aliases: 232 if isinstance(subvar, AliasedFeatureVariable): 233 self._aliases.update(subvar._aliases) 234 else: 235 self._aliases[subvar] = 1
236
237 - def identifier(self):
238 """ 239 Raise C{ValueError}, since aliased variables do not have a 240 single identifier. 241 """ 242 raise ValueError('Aliased variables do not have identifiers')
243
244 - def aliases(self):
245 """ 246 @return: A list of the variables that are constrained to be 247 equal by this aliased variable. 248 """ 249 return self._aliases.keys()
250
251 - def __repr__(self):
252 """ 253 @return: A string representation of this feature structure 254 variable. A feature structure variable with identifiers 255 C{I{X1}, I{X2}, ..., I{Xn}} is represented as 256 C{'?<I{X1}=I{X2}=...=I{Xn}>'}. 257 """ 258 idents = [v._identifier for v in self.aliases()] 259 idents.sort() 260 return '?<' + '='.join(idents) + '>'
261
262 - def __cmp__(self):
263 if not isinstance(other, FeatureVariable): return -1 264 return cmp(self._aliases, other._identifier)
265
266 - def __hash__(self):
267 return self._aliases.__hash__()
268
269 -class FeatureBindings(object):
270 """ 271 A partial mapping from feature variables to values. Simple 272 variables can be either X{bound} (i.e., assigned a value), or 273 X{unbound} (i.e., left unspecified). Aliased variables can 274 additionally be X{inconsistant} (i.e., assigned multiple 275 incompatible values). 276 277 @ivar _bindings: A dictionary mapping from bound variables 278 to their values. 279 """
280 - def __init__(self, initial_bindings=None):
281 """ 282 Construct a new set of bindings. 283 284 @param initial_bindings: A dictionary from variables to 285 values, specifying the initial assignments for the bound 286 variables. 287 """ 288 # Check that variables are not used as values. 289 if initial_bindings is None: initial_bindings = {} 290 for val in initial_bindings.values(): 291 if isinstance(val, FeatureVariable): 292 err = 'Variables cannot be bound to other variables' 293 raise ValueError(err) 294 295 self._bindings = initial_bindings.copy()
296
297 - def bound_variables(self):
298 """ 299 @return: A list of all simple variables that have been 300 assigned values. 301 @rtype: C{list} of L{FeatureVariable} 302 """ 303 return self._bindings.keys()
304
305 - def is_bound(self, variable):
306 """ 307 @return: True if the given variable is bound. A simple 308 variable is bound if it has been assigned a value. An aliased 309 variable is bound if at least one of its aliases is bound 310 and all of its bound aliases are assigned the same value. 311 312 @rtype: C{bool} 313 """ 314 if isinstance(variable, AliasedFeatureVariable): 315 bindings = [self._bindings.get(v) 316 for v in variable.aliases() 317 if self._bindings.has_key(v)] 318 if len(bindings) == 0: return 0 319 inconsistant = [val for val in bindings if val != bindings[0]] 320 if inconsistant: return 0 321 return 1 322 323 return self._bindings.has_key(variable)
324
325 - def lookup(self, variable, update_aliased_bindings=False):
326 """ 327 @return: The value that it assigned to the given variable, if 328 it's bound; or the variable itself if it's unbound. The value 329 assigned to an aliased variable is defined as the value that's 330 assigned to its bound aliases. 331 332 @param update_aliased_bindings: If true, then looking up a 333 bound aliased variable will cause any unbound aliases 334 it has to be bound to its value. E.g., if C{?x} is bound 335 to C{1} and C{?y} is unbound, then looking up C{?x=y} will 336 cause C{?y} to be bound to C{1}. 337 @raise ValueError: If C{variable} is an aliased variable with an 338 inconsistant value (i.e., if two or more of its bound 339 aliases are assigned different values). 340 """ 341 342 # If it's an aliased variable, then we need to check that the 343 # bindings of all of its aliases are consistant. 344 if isinstance(variable, AliasedFeatureVariable): 345 # Get a list of all bindings. 346 bindings = [self._bindings.get(v) 347 for v in variable.aliases() 348 if self._bindings.has_key(v)] 349 # If it's unbound, return the (aliased) variable. 350 if len(bindings) == 0: return variable 351 # Make sure all the bindings are equal. 352 val = bindings[0] 353 for binding in bindings[1:]: 354 if binding != val: 355 raise ValueError('inconsistant value') 356 # Set any unbound aliases, if requested 357 if update_aliased_bindings: 358 for subvar in variable.aliases(): 359 self._bindings[subvar] = val 360 # Return the value. 361 return val 362 363 return self._bindings.get(variable, variable)
364
365 - def bind(self, variable, value):
366 """ 367 Assign a value to a variable. If C{variable} is an aliased 368 variable, then the value is assigned to all of its 369 aliases. Variables can only be bound to values; they may 370 not be bound to other variables. 371 372 @raise ValueError: If C{value} is a variable. 373 """ 374 if isinstance(value, FeatureVariable): 375 raise ValueError('Variables cannot be bound to other variables') 376 377 if isinstance(variable, AliasedFeatureVariable): 378 for subvar in variable.aliases(): 379 self._bindings[subvar] = value 380 else: 381 self._bindings[variable] = value
382
383 - def copy(self):
384 """ 385 @return: a copy of this set of bindings. 386 """ 387 return FeatureBindings(self._bindings)
388
389 - def __repr__(self):
390 """ 391 @return: a string representation of this set of bindings. 392 """ 393 if self._bindings: 394 bindings = ['%r=%r' % (k,v) for (k,v) in self._bindings.items()] 395 return '<Bindings: %s>' % (', '.join(bindings)) 396 else: 397 return '<Bindings (empty)>'
398
399 - def __cmp__(self, other):
400 if not isinstance(other, FeatureVariable): return -1 401 return cmp((self._bindings, self._synonyms), 402 (other._bindings, other._synonyms))
403 404 # Feature structures use identity-based-equality.
405 -class FeatureStructure(object):
406 """ 407 A structured set of features. These features are represented as a 408 mapping from feature names to feature values, where each feature 409 value is either a basic value (such as a string or an integer), or 410 a nested feature structure. 411 412 A feature structure's feature values can be accessed via indexing: 413 414 >>> fstruct1 = FeatureStructure(number='singular', person='3rd') 415 >>> print fstruct1['number'] 416 'singular' 417 418 >>> fstruct2 = FeatureStructure(subject=fstruct1) 419 >>> print fstruct2['subject']['person'] 420 '3rd' 421 422 A nested feature value can be also accessed via a X{feature 423 paths}, or a tuple of feature names that specifies the paths to 424 the nested feature: 425 426 >>> fpath = ('subject','number') 427 >>> print fstruct2[fpath] 428 'singular' 429 430 Feature structures may contain reentrant feature values. A 431 X{reentrant feature value} is a single feature value that can be 432 accessed via multiple feature paths. 433 434 @note: Should I present them as DAGs instead? That would make it 435 easier to explain reentrancy. 436 @ivar _features: A dictionary mapping from feature names to values. 437 438 @ivar _forward: A pointer to another feature structure that 439 replaced this feature structure. This is used during the 440 unification process to preserve reentrance. In particular, if 441 we're unifying feature structures A and B, where: 442 443 - x and y are feature paths. 444 - A contains a feature structure A[x] 445 - B contains a reentrant feature structure B[x]=B[y] 446 447 Then we need to ensure that in the unified structure C, 448 C[x]=C[y]. (Here the equals sign is used to denote the object 449 identity relation, i.e., C{is}.) 450 """
451 - def __init__(self, **features):
452 self._features = features
453
454 - def __getitem__(self, index):
455 if type(index) == str: 456 return self._features[index] 457 elif len(index) == 0: 458 return self 459 elif len(index) == 1: 460 return self._features[index[0]] 461 elif isinstance(self._features[index[0]], FeatureStructure): 462 return self._features[index[0]][index[1:]] 463 else: 464 raise IndexError('Bad feature path')
465
466 - def feature_names(self):
467 """ 468 @return: A list of the names of the features whose values are 469 defined by this feature structure. 470 @rtype: C{list} of C{string} 471 """ 472 return self._features.keys()
473
474 - def equal_values(self, other, check_reentrance=False):
475 """ 476 @return: True if C{self} and C{other} assign the same value to 477 to every feature. In particular, return true if 478 C{self[M{p}]==other[M{p}]} for every feature path M{p} such 479 that C{self[M{p}]} or C{other[M{p}]} is a base value (i.e., 480 not a nested feature structure). 481 482 Note that this is a weaker equality test than L{==<__eq__>}, 483 which tests for equal identity. 484 485 @param check_reentrance: If true, then any difference in the 486 reentrance relations between C{self} and C{other} will 487 cause C{equal_values} to return false. 488 """ 489 if not isinstance(other, FeatureStructure): return 0 490 if check_reentrance: return `self` == `other` 491 if len(self._features) != len(other._features): return 0 492 for (fname, selfval) in self._features.items(): 493 otherval = other._features[fname] 494 if isinstance(selfval, FeatureStructure): 495 if not selfval.equal_values(otherval): return 0 496 else: 497 if selfval != otherval: return 0 498 return 1
499
500 - def __eq__(self, other):
501 """ 502 @return: True if C{self} is the same object as C{other}. This 503 very strict equality test is necessary because object identity 504 is used to distinguish reentrant objects from non-reentrant 505 ones. 506 """ 507 return self is other
508
509 - def __hash__(self):
510 return id(self)
511
512 - def deepcopy(self, memo=None):
513 """ 514 @return: a new copy of this feature structure. 515 @param memo: The memoization dicationary, which should 516 typically be left unspecified. 517 """ 518 # Check the memoization dictionary. 519 if memo is None: memo = {} 520 memo_copy = memo.get(id(self)) 521 if memo_copy is not None: return memo_copy 522 523 # Create a new copy. Do this *before* we fill out its 524 # features, in case of cycles. 525 newcopy = FeatureStructure() 526 memo[id(self)] = newcopy 527 features = newcopy._features 528 529 # Fill out the features. 530 for (fname, fval) in self._features.items(): 531 if isinstance(fval, FeatureStructure): 532 features[fname] = fval.deepcopy(memo) 533 else: 534 features[fname] = fval 535 536 return newcopy
537
538 - def reentrances(self):
539 """ 540 @return: A list of all feature structures that can be reached 541 from C{self} by multiple feature paths. 542 @rtype: C{list} of L{FeatureStructure} 543 """ 544 reentrance_dict = self._find_reentrances({}) 545 return [struct for (struct, reentrant) in reentrance_dict.items() 546 if reentrant]
547 548 ################################################################# 549 ## Variables 550 ################################################################# 551
552 - def apply_bindings(self, bindings):
553 """ 554 @return: The feature structure that is obtained by replacing 555 each variable bound by C{bindings} with its values. If 556 C{self} contains an aliased variable that is partially bound by 557 C{bindings}, then that variable's unbound aliases will be 558 bound to its value. E.g., if the bindings C{<?x=1>} are 559 applied to the feature structure C{[A = ?<x=y>]}, then the 560 bindings will be updated to C{<?x=1,?y=1>}. 561 562 @rtype: L{FeatureStructure} 563 """ 564 selfcopy = self.deepcopy() 565 selfcopy._apply_bindings(bindings, {}) 566 return selfcopy
567
568 - def rename_variables(self, newvars=None):
569 """ 570 @return: The feature structure that is obtained by replacing 571 each variable in this feature structure with a new variable 572 that has a unique identifier. 573 574 @param newvars: A dictionary that is used to hold the mapping 575 from old variables to new variables. For each variable M{v} 576 in this feature structure: 577 578 - If C{newvars} maps M{v} to M{v'}, then M{v} will be 579 replaced by M{v'}. 580 - If C{newvars} does not contain M{v}, then a new entry 581 will be added to C{newvars}, mapping M{v} to the new 582 variable that is used to replace it. 583 584 To consistantly rename the variables in a set of feature 585 structures, simply apply rename_variables to each one, using 586 the same dictionary: 587 588 >>> newvars = {} # Maps old vars to alpha-renamed vars 589 >>> new_fstruct1 = ftruct1.rename_variables(newvars) 590 >>> new_fstruct2 = ftruct2.rename_variables(newvars) 591 >>> new_fstruct3 = ftruct3.rename_variables(newvars) 592 593 If newvars is not specified, then an empty dictionary is used. 594 595 @type newvars: C{dictionary} from L{FeatureStructureVariable} 596 to L{FeatureStructureVariable} 597 598 @rtype: L{FeatureStructure} 599 """ 600 if newvars is None: newvars = {} 601 selfcopy = self.deepcopy() 602 selfcopy._rename_variables(newvars, {}) 603 return selfcopy
604
605 - def _apply_bindings(self, bindings, visited):
606 # Visit each node only once: 607 if visited.has_key(id(self)): return 608 visited[id(self)] = 1 609 for (fname, fval) in self._features.items(): 610 if isinstance(fval, SubstituteBindingsI): 611 self._features[fname] = fval.substitute_bindings(bindings) 612 if isinstance(fval, FeatureStructure): 613 fval._apply_bindings(bindings, visited)
614
615 - def _rename_variables(self, newvars, visited):
616 # Visit each node only once: 617 if visited.has_key(id(self)): return 618 visited[id(self)] = 1 619 620 for (fname, fval) in self._features.items(): 621 if isinstance(fval, FeatureVariable): 622 if not newvars.has_key(fval): 623 newvars[fval] = FeatureVariable() 624 self._features[fname] = newvars[fval] 625 elif isinstance(fval, FeatureStructure): 626 fval._rename_variables(newvars, visited)
627 628 ################################################################# 629 ## Unification 630 ################################################################# 631 632 # The basic unification algorithm: 633 # 1. Make copies of self and other (preserving reentrance) 634 # 2. Destructively unify self and other 635 # 3. Apply forward pointers, to preserve reentrance. 636 # 4. Find any partially bound aliased variables, and bind them. 637 # 5. Replace bound variables with their values.
638 - def unify(self, other, bindings=None, trace=False):
639 """ 640 Unify C{self} with C{other}, and return the resulting feature 641 structure. This unified feature structure is the minimal 642 feature structure that: 643 - contains all feature value assignments from both C{self} 644 and C{other}. 645 - preserves all reentrance properties of C{self} and 646 C{other}. 647 648 If no such feature structure exists (because C{self} and 649 C{other} specify incompatible values for some feature), then 650 unification fails, and C{unify} returns C{None}. 651 652 @param bindings: A set of variable bindings to be used and 653 updated during unification. Bound variables are 654 treated as if they were replaced by their values. Unbound 655 variables are bound if they are unified with values; or 656 aliased if they are unified with other unbound variables. 657 If C{bindings} is unspecified, then all variables are 658 assumed to be unbound. 659 """ 660 if trace: print '\nUnification trace:' 661 662 # If bindings are unspecified, use an empty set of bindings. 663 if bindings is None: bindings = FeatureBindings() 664 665 # Make copies of self & other (since the unification algorithm 666 # is destructive). Use the same memo, to preserve reentrance 667 # links between self and other. 668 memo = {} 669 selfcopy = self.deepcopy(memo) 670 othercopy = other.deepcopy(memo) 671 672 # Preserve reentrance links from bound variables into either 673 # self or other. 674 for var in bindings.bound_variables(): 675 valid = id(bindings.lookup(var)) 676 if memo.has_key(valid): 677 bindings.bind(var, memo[valid]) 678 679 # Do the actual unification. If it fails, return None. 680 try: selfcopy._destructively_unify(othercopy, bindings, trace) 681 except FeatureStructure._UnificationFailureError: return None 682 683 # Replace any feature structure that has a forward pointer 684 # with the target of its forward pointer. 685 selfcopy._apply_forwards_to_bindings(bindings) 686 selfcopy._apply_forwards(visited={}) 687 688 # Find any partially bound aliased variables, and bind their 689 # unbound aliases. 690 selfcopy._rebind_aliased_variables(bindings, visited={}) 691 692 # Replace bound vars with values. 693 selfcopy._apply_bindings(bindings, visited={}) 694 695 # Return the result. 696 return selfcopy
697
698 - class _UnificationFailureError(Exception):
699 """ An exception that is used by C{_destructively_unify} to 700 abort unification when a failure is encountered. """
701 702 # unify a cyclic self with another structure???
703 - def _destructively_unify(self, other, bindings, trace=False, 704 ci_str_cmp=False, depth=0):
705 """ 706 Attempt to unify C{self} and C{other} by modifying them 707 in-place. If the unification succeeds, then C{self} will 708 contain the unified value, and the value of C{other} is 709 undefined. If the unification fails, then a 710 _UnificationFailureError is raised, and the values of C{self} 711 and C{other} are undefined. 712 """ 713 if trace: 714 # apply_forwards to get reentrancy links right: 715 self._apply_forwards({}) 716 other._apply_forwards({}) 717 print ' '+'| '*depth+' /'+`self` 718 print ' '+'| '*depth+'|\\'+ `other` 719 720 # Look up the "cannonical" copy of other. 721 while hasattr(other, '_forward'): other = other._forward 722 723 # If self is already identical to other, we're done. 724 # Note: this, together with the forward pointers, ensures 725 # that unification will terminate even for cyclic structures. 726 # [XX] Verify/prove this? 727 if self is other: 728 if trace: 729 print ' '+'| '*depth+'|' 730 print ' '+'| '*depth+'| (identical objects)' 731 print ' '+'| '*depth+'|' 732 print ' '+'| '*depth+'+-->'+`self` 733 return 734 735 # Set other's forward pointer to point to self; this makes us 736 # into the cannonical copy of other. 737 other._forward = self 738 739 for (fname, otherval) in other._features.items(): 740 if trace: 741 trace_otherval = otherval 742 trace_selfval_defined = self._features.has_key(fname) 743 trace_selfval = self._features.get(fname) 744 if self._features.has_key(fname): 745 selfval = self._features[fname] 746 # If selfval or otherval is a bound variable, then 747 # replace it by the variable's bound value. 748 if isinstance(selfval, FeatureVariable): 749 selfval = bindings.lookup(selfval) 750 if isinstance(otherval, FeatureVariable): 751 otherval = bindings.lookup(otherval) 752 753 if trace: 754 print ' '+'| '*(depth+1) 755 print ' '+'%s| Unify %s feature:'%('| '*(depth),fname) 756 757 # Case 1: unify 2 feature structures (recursive case) 758 if (isinstance(selfval, FeatureStructure) and 759 isinstance(otherval, FeatureStructure)): 760 selfval._destructively_unify(otherval, bindings, 761 trace, depth+1) 762 763 # Case 2: unify 2 variables 764 elif (isinstance(selfval, FeatureVariable) and 765 isinstance(otherval, FeatureVariable)): 766 self._features[fname] = selfval.alias(otherval) 767 768 # Case 3: unify a variable with a value 769 elif isinstance(selfval, FeatureVariable): 770 bindings.bind(selfval, otherval) 771 elif isinstance(otherval, FeatureVariable): 772 bindings.bind(otherval, selfval) 773 774 # Case 4A: unify two strings, case-insensitively. 775 elif ci_str_cmp and \ 776 isinstance(selfval, str) and isinstance(otherval, str)\ 777 and selfval.upper() == otherval.upper(): 778 pass 779 780 # Case 4: unify 2 non-equal values (failure case) 781 elif selfval != otherval: 782 if trace: print ' '+'| '*depth + 'X <-- FAIL' 783 raise FeatureStructure._UnificationFailureError() 784 785 # Case 5: unify 2 equal values 786 else: pass 787 788 if trace and not isinstance(selfval, FeatureStructure): 789 # apply_forwards to get reentrancy links right: 790 if isinstance(trace_selfval, FeatureStructure): 791 trace_selfval._apply_forwards({}) 792 if isinstance(trace_otherval, FeatureStructure): 793 trace_otherval._apply_forwards({}) 794 print ' '+'%s| /%r' % ('| '*(depth), trace_selfval) 795 print ' '+'%s| |\\%r' % ('| '*(depth), trace_otherval) 796 print ' '+'%s| +-->%r' % ('| '*(depth), 797 self._features[fname]) 798 799 # Case 5: copy from other 800 else: 801 self._features[fname] = otherval 802 803 if trace: 804 # apply_forwards to get reentrancy links right: 805 self._apply_forwards({}) 806 print ' '+'| '*depth+'|' 807 print ' '+'| '*depth+'+-->'+`self` 808 if len(bindings.bound_variables()) > 0: 809 print ' '+'| '*depth+' '+`bindings`
810
811 - def _apply_forwards_to_bindings(self, bindings):
812 """ 813 Replace any feature structure that has a forward pointer with 814 the target of its forward pointer (to preserve reentrancy). 815 """ 816 for var in bindings.bound_variables(): 817 value = bindings.lookup(var) 818 if (isinstance(value, FeatureStructure) and 819 hasattr(value, '_forward')): 820 while hasattr(value, '_forward'): 821 value = value._forward 822 bindings.bind(var, value)
823
824 - def _apply_forwards(self, visited):
825 """ 826 Replace any feature structure that has a forward pointer with 827 the target of its forward pointer (to preserve reentrancy). 828 """ 829 # Visit each node only once: 830 if visited.has_key(id(self)): return 831 visited[id(self)] = 1 832 833 for fname, fval in self._features.items(): 834 if isinstance(fval, FeatureStructure): 835 while hasattr(fval, '_forward'): 836 fval = fval._forward 837 self._features[fname] = fval 838 fval._apply_forwards(visited)
839
840 - def _rebind_aliased_variables(self, bindings, visited):
841 # Visit each node only once: 842 if visited.has_key(id(self)): return 843 visited[id(self)] = 1 844 845 for (fname, fval) in self._features.items(): 846 if isinstance(fval, AliasedFeatureVariable): 847 bindings.lookup(fval, True) 848 elif isinstance(fval, FeatureStructure): 849 fval._rebind_aliased_variables(bindings, visited)
850
851 - def subsumes(self, other):
852 """ 853 Check if this feature structure subsumes another feature structure. 854 """ 855 return other.equal_values(self.unify(other))
856 857 ################################################################# 858 ## String Representations 859 ################################################################# 860
861 - def __repr__(self):
862 """ 863 Display a single-line representation of this feature structure, 864 suitable for embedding in other representations. 865 """ 866 return self._repr(self._find_reentrances({}), {})
867
868 - def __str__(self):
869 """ 870 Display a multi-line representation of this feature structure 871 as an FVM (feature value matrix). 872 """ 873 return '\n'.join(self._str(self._find_reentrances({}), {}))
874
875 - def _repr(self, reentrances, reentrance_ids):
876 """ 877 @return: A string representation of this feature structure. 878 @param reentrances: A dictionary that maps from the C{id} of 879 each feature value in self, indicating whether that value 880 is reentrant or not. 881 @param reentrance_ids: A dictionary mapping from the C{id}s 882 of feature values to unique identifiers. This is modified 883 by C{repr}: the first time a reentrant feature value is 884 displayed, an identifier is added to reentrance_ids for 885 it. 886 """ 887 segments = [] 888 889 # If this is the first time we've seen a reentrant structure, 890 # then assign it a unique identifier. 891 if reentrances[id(self)]: 892 assert not reentrance_ids.has_key(id(self)) 893 reentrance_ids[id(self)] = `len(reentrance_ids)+1` 894 895 items = self._features.items() 896 items.sort() # sorting note: keys are unique strings, so we'll 897 # never fall through to comparing values. 898 for (fname, fval) in items: 899 if not isinstance(fval, FeatureStructure): 900 segments.append('%s=%r' % (fname, fval)) 901 elif reentrance_ids.has_key(id(fval)): 902 segments.append('%s->(%s)' % (fname, 903 reentrance_ids[id(fval)])) 904 else: 905 fval_repr = fval._repr(reentrances, reentrance_ids) 906 segments.append('%s=%s' % (fname, fval_repr)) 907 908 # If it's reentrant, then add on an identifier tag. 909 if reentrances[id(self)]: 910 return '(%s)[%s]' % (reentrance_ids[id(self)], 911 ', '.join(segments)) 912 else: 913 return '[%s]' % (', '.join(segments))
914
915 - def _str(self, reentrances, reentrance_ids):
916 """ 917 @return: A list of lines composing a string representation of 918 this feature structure. 919 @param reentrances: A dictionary that maps from the C{id} of 920 each feature value in self, indicating whether that value 921 is reentrant or not. 922 @param reentrance_ids: A dictionary mapping from the C{id}s 923 of feature values to unique identifiers. This is modified 924 by C{repr}: the first time a reentrant feature value is 925 displayed, an identifier is added to reentrance_ids for 926 it. 927 """ 928 # If this is the first time we've seen a reentrant structure, 929 # then tack on an id string. 930 if reentrances[id(self)]: 931 assert not reentrance_ids.has_key(id(self)) 932 reentrance_ids[id(self)] = `len(reentrance_ids)+1` 933 934 # Special case: 935 if len(self._features) == 0: 936 if reentrances[id(self)]: 937 return ['(%s) []' % reentrance_ids[id(self)]] 938 else: 939 return ['[]'] 940 941 # What's the longest feature name? Use this to align names. 942 maxfnamelen = max(len(k) for k in self.feature_names()) 943 944 lines = [] 945 items = self._features.items() 946 items.sort() # sorting note: keys are unique strings, so we'll 947 # never fall through to comparing values. 948 for (fname, fval) in items: 949 if not isinstance(fval, FeatureStructure): 950 # It's not a nested feature structure -- just print it. 951 lines.append('%s = %r' % (fname.ljust(maxfnamelen), fval)) 952 953 elif reentrance_ids.has_key(id(fval)): 954 # It's a feature structure we've seen before -- print 955 # the reentrance id. 956 lines.append('%s -> (%s)' % (fname.ljust(maxfnamelen), 957 reentrance_ids[id(fval)])) 958 959 else: 960 # It's a new feature structure. Separate it from 961 # other values by a blank line. 962 if lines and lines[-1] != '': lines.append('') 963 964 # Recursively print the feature's value (fval). 965 fval_lines = fval._str(reentrances, reentrance_ids) 966 967 # Indent each line to make room for fname. 968 fval_lines = [(' '*(maxfnamelen+3))+l for l in fval_lines] 969 970 # Pick which line we'll display fname on. 971 nameline = (len(fval_lines)-1)/2 972 973 fval_lines[nameline] = ( 974 fname.ljust(maxfnamelen)+' ='+ 975 fval_lines[nameline][maxfnamelen+2:]) 976 977 # Add the feature structure to the output. 978 lines += fval_lines 979 980 # Separate FeatureStructures by a blank line. 981 lines.append('') 982 983 # Get rid of any excess blank lines. 984 if lines[-1] == '': lines = lines[:-1] 985 986 # Add brackets around everything. 987 maxlen = max(len(line) for line in lines) 988 lines = ['[ %s%s ]' % (line, ' '*(maxlen-len(line))) for line in lines] 989 990 # If it's reentrant, then add on an identifier tag. 991 if reentrances[id(self)]: 992 idstr = '(%s) ' % reentrance_ids[id(self)] 993 lines = [(' '*len(idstr))+l for l in lines] 994 idline = (len(lines)-1)/2 995 lines[idline] = idstr + lines[idline][len(idstr):] 996 997 return lines
998 999 # Walk through the feature tree. The first time we see a feature 1000 # value, map it to False (not reentrant). If we see a feature 1001 # value more than once, then map it to C{True} (reentrant).
1002 - def _find_reentrances(self, reentrances):
1003 """ 1004 Find all of the feature values contained by self that are 1005 reentrant (i.e., that can be reached by multiple paths through 1006 feature structure's features). Return a dictionary 1007 C{reentrances} that maps from the C{id} of each feature value 1008 to a boolean value, indicating whether it is reentrant or not. 1009 """ 1010 if reentrances.has_key(id(self)): 1011 # We've seen it more than once. 1012 reentrances[id(self)] = True 1013 else: 1014 # This is the first time we've seen it. 1015 reentrances[id(self)] = False 1016 1017 # Recurse to contained feature structures. 1018 for fval in self._features.values(): 1019 if isinstance(fval, FeatureStructure): 1020 fval._find_reentrances(reentrances) 1021 1022 return reentrances
1023 1024 ################################################################# 1025 ## Parsing 1026 ################################################################# 1027 1028 # [classmethod]
1029 - def parse(cls, s):
1030 """ 1031 Convert a string representation of a feature structure (as 1032 displayed by repr) into a C{FeatureStructure}. This parse 1033 imposes the following restrictions on the string 1034 representation: 1035 - Feature names cannot contain any of the following: 1036 whitespace, parenthases, quote marks, equals signs, 1037 dashes, and square brackets. 1038 - Only the following basic feature value are supported: 1039 strings, integers, variables, C{None}, and unquoted 1040 alphanumeric strings. 1041 - For reentrant values, the first mention must specify 1042 a reentrance identifier and a value; and any subsequent 1043 mentions must use arrows (C{'->'}) to reference the 1044 reentrance identifier. 1045 """ 1046 try: 1047 value, position = cls._parse(s, 0, {}) 1048 except ValueError, e: 1049 estr = ('Error parsing field structure\n\n ' + 1050 s + '\n ' + ' '*e.args[1] + '^ ' + 1051 'Expected %s\n' % e.args[0]) 1052 raise ValueError, estr 1053 if position != len(s): raise ValueError() 1054 return value
1055 1056 # Regular expressions for parsing. 1057 _PARSE_RE = {'name': re.compile(r'\s*([^\s\(\)"\'\-=\[\]]+)\s*'), 1058 'ident': re.compile(r'\s*\((\d+)\)\s*'), 1059 'reentrance': re.compile(r'\s*->\s*'), 1060 'assign': re.compile(r'\s*=\s*'), 1061 'bracket': re.compile(r'\s*]\s*'), 1062 'comma': re.compile(r'\s*,\s*'), 1063 'none': re.compile(r'None(?=\s|\]|,)'), 1064 'int': re.compile(r'-?\d+(?=\s|\]|,)'), 1065 'var': re.compile(r'\?[a-zA-Z_][a-zA-Z0-9_]*'+'|'+ 1066 r'\?<[a-zA-Z_][a-zA-Z0-9_]*'+ 1067 r'(=[a-zA-Z_][a-zA-Z0-9_]*)*>'), 1068 'symbol': re.compile(r'\w+'), 1069 'stringmarker': re.compile("['\"\\\\]")} 1070 1071 # [classmethod]
1072 - def _parse(cls, s, position=0, reentrances=None):
1073 """ 1074 Helper function that parses a feature structure. 1075 @param s: The string to parse. 1076 @param position: The position in the string to start parsing. 1077 @param reentrances: A dictionary from reentrance ids to values. 1078 @return: A tuple (val, pos) of the feature structure created 1079 by parsing and the position where the parsed feature 1080 structure ends. 1081 """ 1082 # A set of useful regular expressions (precompiled) 1083 _PARSE_RE = cls._PARSE_RE 1084 1085 # Check that the string starts with an open bracket. 1086 if s[position] != '[': raise ValueError('open bracket', position) 1087 position += 1 1088 1089 # If it's immediately followed by a close bracket, then just 1090 # return an empty feature structure. 1091 match = _PARSE_RE['bracket'].match(s, position) 1092 if match is not None: return cls(), match.end() 1093 1094 # Build a list of the features defined by the structure. 1095 # Each feature has one of the three following forms: 1096 # name = value 1097 # name (id) = value 1098 # name -> (target) 1099 features = {} 1100 while position < len(s): 1101 # Use these variables to hold info about the feature: 1102 name = id = target = val = None 1103 1104 # Find the next feature's name. 1105 match = _PARSE_RE['name'].match(s, position) 1106 if match is None: raise ValueError('feature name', position) 1107 name = match.group(1) 1108 position = match.end() 1109 1110 # Check for a reentrance link ("-> (target)") 1111 match = _PARSE_RE['reentrance'].match(s, position) 1112 if match is not None: 1113 position = match.end() 1114 match = _PARSE_RE['ident'].match(s, position) 1115 if match is None: raise ValueError('identifier', position) 1116 target = match.group(1) 1117 position = match.end() 1118 try: features[name] = reentrances[target] 1119 except: raise ValueError('bound identifier', position) 1120 1121 # If it's not a reentrance link, it must be an assignment. 1122 else: 1123 match = _PARSE_RE['assign'].match(s, position) 1124 if match is None: raise ValueError('equals sign', position) 1125 position = match.end() 1126 1127 # Find the feature's id (if specified) 1128 match = _PARSE_RE['ident'].match(s, position) 1129 if match is not None: 1130 id = match.group(1) 1131 if reentrances.has_key(id): 1132 raise ValueError('new identifier', position+1) 1133 position = match.end() 1134 1135 val, position = cls._parseval(s, position, reentrances) 1136 features[name] = val 1137 if id is not None: 1138 reentrances[id] = val 1139 1140 # Check for a close bracket 1141 match = _PARSE_RE['bracket'].match(s, position) 1142 if match is not None: 1143 return cls(**features), match.end() 1144 1145 # Otherwise, there should be a comma 1146 match = _PARSE_RE['comma'].match(s, position) 1147 if match is None: raise ValueError('comma', position) 1148 position = match.end() 1149 1150 # We never saw a close bracket. 1151 raise ValueError('close bracket', position)
1152 1153 # [classmethod]
1154 - def _parseval(cls, s, position, reentrances):
1155 """ 1156 Helper function that parses a feature value. Currently 1157 supports: None, integers, variables, strings, nested feature 1158 structures. 1159 @param s: The string to parse. 1160 @param position: The position in the string to start parsing. 1161 @param reentrances: A dictionary from reentrance ids to values. 1162 @return: A tuple (val, pos) of the value created by parsing 1163 and the position where the parsed value ends. 1164 """ 1165 # A set of useful regular expressions (precompiled) 1166 _PARSE_RE = cls._PARSE_RE 1167 1168 # End of string (error) 1169 if position == len(s): raise ValueError('value', position) 1170 1171 # String value 1172 if s[position] in "'\"": 1173 start = position 1174 quotemark = s[position:position+1] 1175 position += 1 1176 while 1: 1177 match = _PARSE_RE['stringmarker'].search(s, position) 1178 if not match: raise ValueError('close quote', position) 1179 position = match.end() 1180 if match.group() == '\\': position += 1 1181 elif match.group() == quotemark: 1182 return eval(s[start:position]), position 1183 1184 # Nested feature structure 1185 if s[position] == '[': 1186 return cls._parse(s, position, reentrances) 1187 1188 # Variable 1189 match = _PARSE_RE['var'].match(s, position) 1190 if match is not None: 1191 return FeatureVariable.parse(match.group()), match.end() 1192 1193 # None 1194 match = _PARSE_RE['none'].match(s, position) 1195 if match is not None: 1196 return None, match.end() 1197 1198 # Integer value 1199 match = _PARSE_RE['int'].match(s, position) 1200 if match is not None: 1201 return int(match.group()), match.end() 1202 1203 # Alphanumeric symbol (must be checked after integer) 1204 match = _PARSE_RE['symbol'].match(s, position) 1205 if match is not None: 1206 return match.group(), match.end() 1207 1208 # We don't know how to parse this value. 1209 raise ValueError('value', position)
1210 1211 _parseval=classmethod(_parseval) 1212 _parse=classmethod(_parse) 1213 parse=classmethod(parse)
1214 1215 #////////////////////////////////////////////////////////////////////// 1216 # TESTING... 1217 #////////////////////////////////////////////////////////////////////// 1218 1219 import unittest 1220 1221 # Note: since FeatureStructure.__repr__() sorts by keys before 1222 # displaying, there is a single unique string repr for each 1223 # FeatureStructure.
1224 -class FeatureStructureTestCase(unittest.TestCase):
1225 'Unit testing for FeatureStructure' 1226
1227 - def testUnification(self):
1228 'Basic unification tests' 1229 1230 # Copying from self to other. 1231 fs1 = FeatureStructure(number='singular') 1232 fs2 = fs1.unify(FeatureStructure()) 1233 self.failUnlessEqual(repr(fs2), "[number='singular']") 1234 1235 # Copying from other to self 1236 fs1 = FeatureStructure() 1237 fs2 = fs1.unify(FeatureStructure(number='singular')) 1238 self.failUnlessEqual(repr(fs2), "[number='singular']") 1239 1240 # Cross copying 1241 fs1 = FeatureStructure(number='singular') 1242 fs2 = fs1.unify(FeatureStructure(person=3)) 1243 self.failUnlessEqual(repr(fs2), "[number='singular', person=3]") 1244 1245 # Merging a nested structure 1246 fs1 = FeatureStructure.parse('[A=[B=b]]') 1247 fs2 = FeatureStructure.parse('[A=[C=c]]') 1248 fs3 = fs1.unify(fs2) 1249 self.failUnlessEqual(repr(fs3), "[A=[B='b', C='c']]")
1250
1251 - def testReentrantUnification(self):
1252 'Reentrant unification tests' 1253 # A basic case of reentrant unification 1254 fs1 = FeatureStructure.parse('[A=(1)[B=b], E=[F->(1)]]') 1255 fs2 = FeatureStructure.parse("[A=[C='c'], E=[F=[D='d']]]") 1256 fs3 = fs1.unify(fs2) 1257 fs3repr = "[A=(1)[B='b', C='c', D='d'], E=[F->(1)]]" 1258 self.failUnlessEqual(repr(fs3), fs3repr) 1259 fs3 = fs2.unify(fs1) # Try unifying both ways. 1260 self.failUnlessEqual(repr(fs3), fs3repr) 1261 1262 # More than 2 paths to a value 1263 fs1 = FeatureStructure.parse("[a=[],b=[],c=[],d=[]]") 1264 fs2 = FeatureStructure.parse('[a=(1)[], b->(1), c->(1), d->(1)]') 1265 fs3 = fs1.unify(fs2) 1266 self.failUnlessEqual(repr(fs3), '[a=(1)[], b->(1), c->(1), d->(1)]') 1267 1268 # fs1[a] gets unified with itself: 1269 fs1 = FeatureStructure.parse('[x=(1)[], y->(1)]') 1270 fs2 = FeatureStructure.parse('[x=(1)[], y->(1)]') 1271 fs3 = fs1.unify(fs2)
1272
1273 - def testVariableForwarding(self):
1274 'Bound variables should get forwarded appropriately' 1275 fs1 = FeatureStructure.parse('[A=(1)[X=x], B->(1), C=?cvar, D=?dvar]') 1276 1277 fs2y = FeatureStructure(Y='y') 1278 fs2z = FeatureStructure(Z='z') 1279 fs2 = FeatureStructure.parse('[A=(1)[Y=y], B=(2)[Z=z], C->(1), D->(2)]') 1280 1281 fs3 = fs1.unify(fs2) 1282 fs3repr = ("[A=(1)[X='x', Y='y', Z='z'], B->(1), C->(1), D->(1)]") 1283 self.failUnlessEqual(repr(fs3), fs3repr)
1284
1285 - def testCyclicStructures(self):
1286 'Cyclic structure tests' 1287 # Create a cyclic structure via unification. 1288 fs1 = FeatureStructure.parse('[F=(1)[], G->(1)]') 1289 fs2 = FeatureStructure.parse('[F=[H=(2)[]], G->(2)]') 1290 fs3 = fs1.unify(fs2) 1291 1292 # Check that we got the value right. 1293 self.failUnlessEqual(repr(fs3), '[F=(1)[H->(1)], G->(1)]') 1294 1295 # Check that we got the cyclicity right. 1296 self.failUnless(fs3['F'] is fs3['G']) 1297 self.failUnless(fs3['F'] is fs3['G', 'H']) 1298 self.failUnless(fs3['F'] is fs3['G', 'H', 'H']) 1299 self.failUnless(fs3['F'] is fs3[('G',)+(('H',)*10)]) 1300 1301 # Create a cyclic structure with variables. 1302 x = FeatureVariable('x') 1303 fs1 = FeatureStructure(F=FeatureStructure(H=x)) 1304 fs2 = FeatureStructure(F=x) 1305 fs3 = fs1.unify(fs2) 1306 1307 # Check that we got the value right. 1308 self.failUnlessEqual(repr(fs3), '[F=(1)[H->(1)]]') 1309 1310 # Check that we got the cyclicity right. 1311 self.failUnless(fs3['F'] is fs3['F','H']) 1312 self.failUnless(fs3['F'] is fs3['F','H','H']) 1313 self.failUnless(fs3['F'] is fs3[('F',)+(('H',)*10)]) 1314 1315 # Cyclic structure as LHS 1316 fs4 = FeatureStructure.parse('[F=[H=[H=[H=(1)[]]]], K->(1)]') 1317 fs5 = fs3.unify(fs4) 1318 self.failUnlessEqual(repr(fs5), '[F=(1)[H->(1)], K->(1)]') 1319 1320 # Cyclic structure as RHS 1321 fs6 = fs4.unify(fs3) 1322 self.failUnlessEqual(repr(fs6), '[F=(1)[H->(1)], K->(1)]') 1323 1324 # LHS and RHS both cyclic 1325 fs7 = fs3.unify(fs3.deepcopy())
1326
1328 'Variable bindings should preserve reentrance.' 1329 bindings = FeatureBindings() 1330 fs1 = FeatureStructure.parse("[a=?x]") 1331 fs2 = fs1.unify(FeatureStructure.parse("[a=[]]"), bindings) 1332 fs3 = fs2.unify(FeatureStructure.parse("[b=?x]"), bindings) 1333 self.failUnlessEqual(repr(fs3), '[a=(1)[], b->(1)]')
1334
1335 - def testVariableMerging(self):
1336 'Aliased variable tests' 1337 fs1 = FeatureStructure.parse("[a=?x, b=?x]") 1338 fs2 = fs1.unify(FeatureStructure.parse("[b=?y, c=?y]")) 1339 self.failUnlessEqual(repr(fs2), '[a=?x, b=?<x=y>, c=?y]') 1340 fs3 = fs2.unify(FeatureStructure.parse("[a=1]")) 1341 self.failUnlessEqual(repr(fs3), '[a=1, b=1, c=1]') 1342 1343 fs1 = FeatureStructure.parse("[a=1]") 1344 fs2 = FeatureStructure.parse("[a=?x, b=?x]") 1345 fs3 = fs2.unify(fs1) 1346 self.failUnlessEqual(repr(fs3), '[a=1, b=1]')
1347
1348 -def testsuite():
1349 t1 = unittest.makeSuite(FeatureStructureTestCase) 1350 return unittest.TestSuite( (t1,) )
1351
1352 -def test(verbosity):
1353 runner = unittest.TextTestRunner(verbosity=verbosity) 1354 runner.run(testsuite())
1355 1356 #////////////////////////////////////////////////////////////////////// 1357 # Demo.. 1358 #////////////////////////////////////////////////////////////////////// 1359
1360 -def display_unification(fs1, fs2, indent=' '):
1361 # Print the two input feature structures, side by side. 1362 fs1_lines = str(fs1).split('\n') 1363 fs2_lines = str(fs2).split('\n') 1364 if len(fs1_lines) > len(fs2_lines): 1365 blankline = '['+' '*(len(fs2_lines[0])-2)+']' 1366 fs2_lines += [blankline]*len(fs1_lines) 1367 else: 1368 blankline = '['+' '*(len(fs1_lines[0])-2)+']' 1369 fs1_lines += [blankline]*len(fs2_lines) 1370 for (fs1_line, fs2_line) in zip(fs1_lines, fs2_lines): 1371 print indent + fs1_line + ' ' + fs2_line 1372 print indent+'-'*len(fs1_lines[0])+' '+'-'*len(fs2_lines[0]) 1373 1374 linelen = len(fs1_lines[0])*2+3 1375 print indent+'| |'.center(linelen) 1376 print indent+'+-----UNIFY-----+'.center(linelen) 1377 print indent+'|'.center(linelen) 1378 print indent+'V'.center(linelen) 1379 1380 bindings = FeatureBindings() 1381 1382 result = fs1.unify(fs2, bindings) 1383 if result is None: 1384 print indent+'(FAILED)'.center(linelen) 1385 else: 1386 print '\n'.join(indent+l.center(linelen) 1387 for l in str(result).split('\n')) 1388 if bindings and len(bindings.bound_variables()) > 0: 1389 print repr(bindings).center(linelen) 1390 return result
1391
1392 -def demo(trace=False):
1393 import random, sys 1394 1395 HELP = ''' 1396 1-%d: Select the corresponding feature structure 1397 q: Quit 1398 t: Turn tracing on or off 1399 l: List all feature structures 1400 ?: Help 1401 ''' 1402 1403 print ''' 1404 This demo will repeatedly present you with a list of feature 1405 structures, and ask you to choose two for unification. Whenever a 1406 new feature structure is generated, it is added to the list of 1407 choices that you can pick from. However, since this can be a 1408 large number of feature structures, the demo will only print out a 1409 random subset for you to choose between at a given time. If you 1410 want to see the complete lists, type "l". For a list of valid 1411 commands, type "?". 1412 ''' 1413 print 'Press "Enter" to continue...' 1414 sys.stdin.readline() 1415 1416 fstruct_strings = [ 1417 '[agr=[number=sing, gender=masc]]', 1418 '[agr=[gender=masc, person=3rd]]', 1419 '[agr=[gender=fem, person=3rd]]', 1420 '[subj=[agr=(1)[]], agr->(1)]', 1421 '[obj=?x]', '[subj=?x]', 1422 '[/=None]', '[/=NP]', 1423 '[cat=NP]', '[cat=VP]', '[cat=PP]', 1424 '[subj=[agr=[gender=?y]], obj=[agr=[gender=?y]]]', 1425 '[gender=masc, agr=?C]', 1426 '[gender=?S, agr=[gender=?S,person=3rd]]' 1427 ] 1428 1429 all_fstructs = [(i, FeatureStructure.parse(fstruct_strings[i])) 1430 for i in range(len(fstruct_strings))] 1431 1432 def list_fstructs(fstructs): 1433 for i, fstruct in fstructs: 1434 print 1435 lines = str(fstruct).split('\n') 1436 print '%3d: %s' % (i+1, lines[0]) 1437 for line in lines[1:]: print ' '+line 1438 print
1439 1440 1441 while 1: 1442 # Pick 5 feature structures at random from the master list. 1443 MAX_CHOICES = 5 1444 if len(all_fstructs) > MAX_CHOICES: 1445 fstructs = random.sample(all_fstructs, MAX_CHOICES) 1446 fstructs.sort() 1447 else: 1448 fstructs = all_fstructs 1449 1450 print '_'*75 1451 1452 print 'Choose two feature structures to unify:' 1453 list_fstructs(fstructs) 1454 1455 selected = [None,None] 1456 for (nth,i) in (('First',0), ('Second',1)): 1457 while selected[i] is None: 1458 print ('%s feature structure (1-%d,q,t,l,?): ' 1459 % (nth, len(all_fstructs))), 1460 try: 1461 input = sys.stdin.readline().strip() 1462 if input in ('q', 'Q', 'x', 'X'): return 1463 if input in ('t', 'T'): 1464 trace = not trace 1465 print ' Trace = %s' % trace 1466 continue 1467 if input in ('h', 'H', '?'): 1468 print HELP % len(fstructs); continue 1469 if input in ('l', 'L'): 1470 list_fstructs(all_fstructs); continue 1471 num = int(input)-1 1472 selected[i] = all_fstructs[num][1] 1473 print 1474 except: 1475 print 'Bad sentence number' 1476 continue 1477 1478 if trace: 1479 result = selected[0].unify(selected[1], trace=1) 1480 else: 1481 result = display_unification(selected[0], selected[1]) 1482 if result is not None: 1483 for i, fstruct in all_fstructs: 1484 if `result` == `fstruct`: break 1485 else: 1486 all_fstructs.append((len(all_fstructs), result)) 1487 1488 print '\nType "Enter" to continue unifying; or "q" to quit.' 1489 input = sys.stdin.readline().strip() 1490 if input in ('q', 'Q', 'x', 'X'): return 1491 1492 1493 if __name__ == '__main__': 1494 test(verbosity=0) 1495 demo() 1496