#
#  start state, scan position is at the beginning of the pattern.
#
start:
    '['                  n set-open       ^set-finish
    '\'                  n set-escape     ^set-finish
    default                errorDeath                               doRuleError
    
#
# [set expression] parsing,
#    All states involved in parsing set expressions have names beginning with "set-"
#

set-open:
   '^'                   n  set-open2                               doSetNegate
   ':'                      set-posix                               doSetPosixProp
   default                  set-open2

set-open2:
   ']'                   n  set-after-lit                           doSetLiteral
   default                  set-start

#  set-posix:
#                  scanned a '[:'  If it really is a [:property:], doSetPosixProp will have
#                  moved the scan to the closing ']'.  If it wasn't a property
#                  expression, the scan will still be at the opening ':', which should
#                  be interpreted as a normal set expression.
set-posix:
    ']'                  n   pop                                    doSetEnd
    ':'                      set-start
    default                  errorDeath                             doRuleError  # should not be possible.

#
#   set-start   after the [ and special case leading characters (^ and/or ]) but before
#               everything else.   A '-' is literal at this point.
#
set-start:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '\'                  n  set-escape
    '-'                  n  set-start-dash
    '&'                  n  set-start-amp
    default              n  set-after-lit                           doSetLiteral

#    set-start-dash    Turn "[--" into a syntax error.
#                           "[-x" is good, - and x are literals.
#
set-start-dash:
    '-'                     errorDeath                              doRuleError
    default                 set-after-lit                           doSetAddDash

#    set-start-amp     Turn "[&&" into a syntax error.
#                           "[&x" is good, & and x are literals.
#
set-start-amp:
    '&'                     errorDeath                              doRuleError
    default                 set-after-lit                           doSetAddAmp

#
#   set-after-lit    The last thing scanned was a literal character within a set.
#                    Can be followed by anything.  Single '-' or '&' are
#                    literals in this context, not operators.
set-after-lit:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '-'                  n  set-lit-dash
    '&'                  n  set-lit-amp
    '\'                  n  set-escape
    eof                     errorDeath                              doSetNoCloseError
    default              n  set-after-lit                           doSetLiteral

set-after-set:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '-'                  n  set-set-dash
    '&'                  n  set-set-amp
    '\'                  n  set-escape
    eof                     errorDeath                              doSetNoCloseError
    default              n  set-after-lit                           doSetLiteral

set-after-range:
    ']'                  n  pop                                     doSetEnd
    '['                  n  set-open      ^set-after-set            doSetBeginUnion
    '-'                  n  set-range-dash
    '&'                  n  set-range-amp
    '\'                  n  set-escape
    eof                     errorDeath                              doSetNoCloseError
    default              n  set-after-lit                           doSetLiteral
    

# set-after-op
#     After a --  or &&
#     It is an error to close a set at this point.
#
set-after-op:
    '['                  n  set-open         ^set-after-set         doSetBeginUnion
    ']'                     errorDeath                              doSetOpError
    '\'                  n  set-escape
    default              n  set-after-lit                           doSetLiteral

#
#   set-set-amp
#      Have scanned [[set]&
#      Could be a '&' intersection operator, if a set follows.
#      Could be the start of a '&&' operator.
#      Otherewise is a literal.
set-set-amp:
    '['                  n  set-open      ^set-after-set           doSetBeginIntersection1
    '&'                  n  set-after-op                           doSetIntersection2
    default                 set-after-lit                          doSetAddAmp


# set-lit-amp   Have scanned "[literals&"
#               Could be a start of "&&" operator or a literal
#               In [abc&[def]],   the '&' is a literal
#
set-lit-amp:
    '&'                  n  set-after-op                            doSetIntersection2
    default                 set-after-lit                           doSetAddAmp


#
#  set-set-dash
#      Have scanned [set]-
#      Could be a '-' difference operator, if a [set] follows.
#      Could be the start of a '--' operator.
#      Otherwise is a literal.
set-set-dash:
    '['                  n  set-open      ^set-after-set           doSetBeginDifference1
    '-'                  n  set-after-op                           doSetDifference2
    default                 set-after-lit                          doSetAddDash


#
#  set-range-dash
#      scanned  a-b-  or \w-
#         any set or range like item where the trailing single '-' should
#         be literal, not a set difference operation.
#         A trailing "--" is still a difference operator.
set-range-dash:
    '-'                  n  set-after-op                           doSetDifference2
    default                 set-after-lit                          doSetAddDash


set-range-amp:
    '&'                  n  set-after-op                           doSetIntersection2
    default                 set-after-lit                          doSetAddAmp


#  set-lit-dash
#     Have scanned "[literals-" Could be a range or a -- operator or a literal
#     In [abc-[def]], the '-' is a literal (confirmed with a Java test)
#        [abc-\p{xx}  the '-' is an error
#        [abc-]       the '-' is a literal
#        [ab-xy]      the '-' is a range
#
set-lit-dash:
    '-'                  n  set-after-op                            doSetDifference2
    '['                     set-after-lit                           doSetAddDash
    ']'                     set-after-lit                           doSetAddDash
    '\'                  n  set-lit-dash-escape
    default              n  set-after-range                         doSetRange

# set-lit-dash-escape
#
#    scanned "[literal-\"
#    Could be a range, if the \ introduces an escaped literal char or a named char.
#    Otherwise it is an error.
#
set-lit-dash-escape:
   's'                      errorDeath                             doSetOpError
   'S'                      errorDeath                             doSetOpError
   'w'                      errorDeath                             doSetOpError
   'W'                      errorDeath                             doSetOpError
   'd'                      errorDeath                             doSetOpError
   'D'                      errorDeath                             doSetOpError
   'N'                      set-name-start    ^set-after-range          doStartNamedChar
   'x'                      set-hex-start    ^set-after-range          doStartHex
   default               n  set-after-range                        doSetRange
# TODO fix 'N', 'x'
   
#
#  set-escape
#       Common back-slash escape processing within set expressions
#
set-escape:
   'p'                   n  set-prop-start    ^set-after-set          doStartSetProp
   'P'                   n  set-prop-start    ^set-after-set          doStartSetProp
   'N'                   n  set-name-start    ^set-after-lit          doStartNamedChar
   'x'                   n  set-hex-start ^set-after-lit         doStartHex
   's'                   n  set-after-range                         doSetBackslash_s
   'S'                   n  set-after-range                         doSetBackslash_S
   'w'                   n  set-after-range                         doSetBackslash_w
   'W'                   n  set-after-range                         doSetBackslash_W
   'd'                   n  set-after-range                         doSetBackslash_d
   'D'                   n  set-after-range                         doSetBackslash_D
   default               n  set-after-lit                           doSetLiteralEscaped 
# TODO add \r, \n, etc

set-prop-start:
    '{'                  n  set-prop-cont                                    
    default                 errorDeath

set-prop-cont:
    '}'                  n  pop                                     doPropName
    '='                  n  set-value                               doPropRelation
    '≠'                  n  set-value                               doPropRelation
    default              n  set-prop-cont

set-value:
    '}'                  n  pop                                     doPropValue
    default              n  set-value

set-name-start:
    '{'                  n  set-name-cont                                    
    default                 errorDeath

set-name-cont:
    '}'                  n  pop                                     doName
    [\ \-0-9A-Za-z]      n  set-name-cont
    default              n  errorDeath

set-hex-start:
    '{'                  n  set-hex-cont                                     
    default                 errorDeath

set-hex-cont:
    '}'                  n  pop                                     doHex
    [0-9A-Fa-f]          n  set-hex-cont
    default              n  errorDeath
    
#
# set-finish
#     Have just encountered the final ']' that completes a [set], and
#     arrived here via a pop.  From here, we exit the set parsing world, and go
#     back to generic regular expression parsing.
#
set-finish:
    default                 exit                              doSetFinish
