#!/usr/bin/python

# Parsely - A cross-language tool for parsing and file manipulation.
#
# Copyright (C) 1999-2000 Nick Mathewson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

"""Classes for specifying file formats in Parsely.

   The most common way to access members of this module is via
   the parsely.LoadFormat funtion, which will give an instance of
   the FileFormat class."""

import parsely.tree
import parsely.remanip
from parsely._util import _wantList, _addFormatMember, \
     _flattenList, _namelist, fileContents

import marshal
import string, re
from types import StringType, NoneType

#######
## For grammar generation.
#######

class FileFormat:
    """A FileFormat specifies the rules by which to break a file into
       its components.  It is the structure which holds all the states,
       patterns, tokens, spaces, rules, and actions.

       Once you have a FileFormat, the only methods you should need to
       call are parse and getTypeSystem.  If you like, you can can
       call newNode and getType as aliases to the methods of the same
       name in the TypeSystem object.  All other methods are only used
       in the construction process. 

       You can build it manually (not recommended, but that's what the
       test suite does), via parsely.grammar.generateFileFormat
       or via parsely.LoadFormat.
       """

    ## Fields
    # -- Fields set by FormatElement.register methods.
    # Actions:
    #    Map from action name to (Map from action language to Action object)
    # DefaultSpace:
    #    String value of default trailing space.
    # DistinctLexemes: 
    #    Map from full name of lexeme (space or token) object to Lexeme object.
    # lexemeList:
    #    List of all lexemes (spaces or tokens) in the order of their
    #    declaration.  We need to keep them in order for scanning purposes.
    # InclusiveStates:
    #    Names of all inclusive states.
    # Options:
    #    Map from option names to their values.
    # Patterns:
    #    Map from pattern names to Pattern objects.
    # Rules:
    #    Map from rule names to Pule objects.
    # ruleList
    #    List of all rules, in order of declarations.
    # Space:
    #    List of all Space objects.
    # StartSymbol:
    #    Name of the start rule.
    # States:
    #    Map from state names to State object.
    # Tokens:
    #    Map from short token name to Token object.
    # tokenList
    #    All Token object, in order of declaration.
    # -- Fields set by process() method.
    # TypeSys:
    #    The parsely.tree.TypeSystem object for this file format.
    # finished:
    #    = 2 iff all process() operations have completed successfully.
    #    = 1 if process() is half-finished.
    #    Yes, this is a kludge.
    # parser: 
    #    The EarleyParser for this file format. (Python mode only.)
    # scanner:
    #    The PCREScanner for this file format. (Python mode only.)
    
    ##Encapsulation:
    # The following classes/functions access the fields of FileFormat
    # directly:
    # - FormatElement members
    #   * Option (Options)
    #   * Start (StartSymbol)
    #   * State (InclusiveStates, StartState, States)
    #   * Pattern (Patterns)
    #   * Lexeme (lexemeList, States)
    #   * Token (Tokens, DistinctLexemes, tokenList)
    #   * DefaultSpace (DefaultSpace)
    #   * Space (DistinctLexemes, Space)
    #   * Rule (Rules, ruleList)
    #   * Action (Actions)
    # - Scanners and Parsers
    #   * EarleyParser (ruleList, StartSymbol), 
    #   * PCREScanner (States, lexemeList, Options)

    def __init__(self, *args):
	"""Constructs a new FileFormat object.  Takes any number of
	   FormatElement arguments, or lists of FormatElement.

           You can pass an empty list and add other FormatElements
           later on using the FormatElement.register method.

           After all the FormatElements have been registered, you
           must call the 'process' method before using the file format.
           """
	self.States = {}
	self.StartState = None
	self.Patterns = {}
	self.Tokens = {}
        self.DistinctLexemes = {}
	self.Rules = {}
	self.Space = []
        self.StartSymbol = None
	self.DefaultSpace = None
	self.InclusiveStates = []
	self.Options = {}
        self.Actions = {}
        
        # Order matters, so we remember it.
	self.ruleList = []
        self.tokenList = []
        self.lexemeList = []

	self.finished = 0

	# Initialize Options
        for o in _FlagOptions.keys():
            self.Options[o] = 0
        for o in _ValOptions.keys():
            self.Options[o] = None

	for e in _flattenList(args):
	    if not isinstance(e,FormatElement):
		raise GrammarError(str(e) + " is not a valid form element")
	    e.register(self)

    def process(self):
	"""Perform final processing and sanity checks, then generate
	   parser and lexer objects."""

        # First, make sure all rules have valid targets.
	for r in self.ruleList:
	    r.resolve(self)

	# There must be a start symbol...
	if not self.StartSymbol:
	    raise GrammarError("No start symbol given")

	# ...and it must refer to real rule.
        self._getElementByName(self.StartSymbol)

        # Calculate the states for each token and space
        for t in self.lexemeList:
	    # XXXX I don't buy that this is the right
            if not t.getStates():
		states = self.InclusiveStates
		if 'INITIAL' not in states:
		    states = [ "INITIAL" ] + states
		t.setStates( states )
            elif '*' in t.getStates():
                states = self.States.keys()
		if 'INITIAL' not in states:
		    states = [ "INITIAL" ] + states
		t.setStates( states )

	# Ensure we have a default space.
        if len(self.Space) and self.DefaultSpace is None:
            # Provide a fallback default...
	    self.DefaultSpace = self.Space[0].default
            # ...but try for a space that's present in every state.
            for s in self.Space:
                if 'INITIAL' in s.getStates():
                    self.DefaultSpace = s.default
                    break

        # Initialize type system
        self.TypeSys = parsely.tree.TypeSystem(self.DefaultSpace)
        for lst in (self.tokenList, self.ruleList):
            for i in lst:
                i.registerType(self.TypeSys)
        self.TypeSys._process()

        # Make sure that all actions refer to real actions
        for lex in self.lexemeList:
            lex.resolveAction(self)

        for rule in self.ruleList:
            rule.resolveAction(self)

        try:
            self.finished = 1
            self.__makeScannerAndParser()
            self.finished = 2
        finally:
            if self.finished != 2:
                self.finished = 0

    def __makeScannerAndParser(self):
	"""Called by process to generate the actual scanner and parser."""
	# XXXX-C Refactor this _hard_, once we have C support.
        assert self.finished == 1

	scanTrans = getTranslator('python', 'scanner')
	parsTrans = getTranslator('python', 'parser')

	for actions in self.Actions.values():
	    if actions.get('python',None) is not None:
		act = actions['python']
		if act.kind is not None:
		    if act.kind in ('initScan', 'finishScan', 'scanFn'):
			translator = scanTrans
		    else:
			translator = parsTrans
		    act.compile(translator)

        # Check scanner type; compile actions
        if (not self.Options["scanner"]) or self.Options["scanner"] == 'pcre':
            import parsely.pcre_scanner
	    translator = scanTrans
            for lex in self.lexemeList:
                lex.compile(translator)	    

            self.scanner = parsely.pcre_scanner.PCREScanner(self,0)
        else:
            raise GrammarError("Unknown scanner type", self.Options["parser"])
	
	# Check parser type.
        if (not self.Options["parser"]) or self.Options["parser"] == 'earley':
	    translator = parsTrans
            for rule in self.ruleList:
                rule.compile(translator)

            import parsely.earley_parser
            self.parser = parsely.earley_parser.EarleyParser(self)

        else:
            raise GrammarError("Unknown parser type", self.Options["parser"])
	

	self.finished = 1

    def _getActionsNamed(self, actionName):
	"""If there is an action named <actionName>, returns a map from
	   language name to action object for that name.  Otherwise raises
	   a GrammarError.

	   Requires: the map must not be modified."""
        if self.Actions.has_key(actionName):
            return self.Actions[actionName]
        else:
            raise GrammarError("No such action", actionName)
    
    def _getElementByName(self,name): 
	"""If 'name' is the name of a token, returns a tuple
	   consisting of a list of appropriate tokens and the string
	   "T".  If 'name' is the name of a rule, return a tuple of
	   the Rule object and the string "R".  Else, raises an error.
	   """
	if self.Tokens.has_key(name):
	    return self.Tokens[name], "T"
	elif self.Rules.has_key(name):
	    return self.Rules[name], "R"
	elif self.Patterns.has_key(name):
	    raise GrammarError(name, "is a pattern, not a token or rule")
	elif self.States.has_key(name):
	    raise GrammarError(name, "is a state, not a token or rule")
	else:
	    raise GrammarError("Unrecognized symbol:", name)

    def resolvePattern(self,name):
	"""If 'name' is the name of a pattern in this file format, returns
	   the ParsedRE object corresponding to 'name'.  Otherwise returns
	   None.

	   This method is used by the resolve method of ParsedRE.

	   Note that every pattern must be registered with this FileFormat
	   before resolvePattern will work properly."""
	if self.Patterns.has_key(name):
	    return self.Patterns[name].val.pat
	else:
	    return None

    def getTypeSystem(self):
	"""Returns the TypeSystem object for this file format."""
        return self.TypeSys

    def getType(self,name):
	"""If 'name' is the name of a token, returns a corresponding
	   Token object.  If name names a rule, returns the
	   corresponding rule specifier (SEQ/MULTI).  Else raises an error."""
        return self.TypeSys.getType(name)
	    
    def newNode(self, type=None, val=None, trailingSpace=None):
	"""Returns a new tree node, with type <type>, value <val>,
	   and trailing space <trailingSpace>.  <Type> may be a string
	   or rule specifier.  <Val> may be a string or a tuple."""
        return self.TypeSys.newNode(type, val,trailingSpace)

    def getScannerAndParser(self):
        """Returns a tuple of the scanner and the parser.  You
           must call the process() method before invoking this one."""
        return (self.scanner, self.parser)
    
    def parse(self,strng):
	"""Parses the string <strng> and returns the resulting tree."""
        tokens = self.scanner.tokenize(strng)
        return self.parser.parse(tokens)

    def parseFile(self, fname):
	strng = fileContents(fname)
	tokens = self.scanner.tokenize(strng)
	del strng
	return self.parser.parse(tokens, fileName=fname)

class FormatElement:
    """A FormatElement is any object passed to a FileFormat
       constructor in order to specify part of the file format."""

    def register(self,format):    
	"""This method does the dirty work of updating the FileFormat <format>
	   to reflect the information in <self>"""
	raise "This method must be overridden!"


# These options are either present or not present.
_FlagOptions = {"debug":0, "nocase":0, "dotall":0}

# These options take a value.
_ValOptions = {"scanner":None, "parser":None, "module":None }

class Option(FormatElement):
    """An Option is a catch-all format element to communicate flags and
       values to the FileFormat.
       
       Valid flags are: 'debug', 'nocase', and 'dotall'.
       
       Valid value tags are: 'scanner' and 'parser'.
       """

    ##Fields:
    # flagOptions: a list of all provided flag options
    # valOptions: a list of all provided value options

    
    def __init__(self, *l, **d):
	"""Constructs a new 'Options' object."""
        for i in l:
            if not _FlagOptions.has_key(i):
                raise GrammarError("Unknown flag " + i)
        for i in d.keys():
            if not _ValOptions.has_key(i):
                raise GrammarError("Unknown option " + i)
        self.flagOptions = l
        self.valOptions = d
        
    def register(self,format):
        for i in self.flagOptions:
            format.Options[i] = 1
        for k,v in self.valOptions.items():
            format.Options[k] = v

class Start(FormatElement):
    """A Start declaration indicates that the target of a given rule is
       a start symbol."""
       
    ##Fields:
    # sym: the start symbol.

    def __init__(self, symbol):
	self.sym = symbol

    def register(self, format):
	if format.StartSymbol:
	    raise GrammarError("Duplicate start declarations")
	else:
	    format.StartSymbol = self.sym
	
class State(FormatElement):
    """A State is a given scanner state.  A state may be 'inclusive',
       in which case it accepts tokens which are not restricted to any
       particular state, or 'exclusive', in which case it accepts only
       tokens specifically declared to occur in that state.  

       If a state's "noSpace" flag is set, no spaces will be accepted
       within that state.  BUGBUGBUG: noSpace doesn't actually work.

       Only one state may be a start state."""

    ##Fields:
    # name: What is the state's name?
    # exclusive: Is the state exclusive? (boolean)
    # noSpace: Is the noSpace flag set? (boolean)
    # onStart: Is this the start state? (boolean

    def __init__(self, stateName, exclusive=0, noSpace=0, start=0):
	self.name = stateName
	self.exclusive = exclusive
	self.onStart = start
        self.noSpace = noSpace
    
    def register(self, format):
	if not self.exclusive:
	    format.InclusiveStates.append(self.name)
	if self.onStart:
	    if format.StartState:
		raise GrammarError("Multiple start scanning states.")
	    else:
		format.StartState = self.name
	_addFormatMember(format.States, self.name, self, unique=1)

class Pattern(FormatElement):
    """A 'pattern' is a named literal or regular expression that may
       be used in other regular expressions.  

       For example, if the pattern 'Word' names the regular expression
       object RE(r'\w+'), then RE(r'({:Word:}_)*{:Word:}') is a list of 
       underscore-separated words."""

    ##Fields
    # name: The name of this pattern
    # val: The PatternSpecifier object for this pattern. 

    def __init__(self, name, val):
	"""Creates a new Pattern object.  <Val> must be a literal or a
	   RE object."""	   
	self.name = name
	if not isinstance(val,PatternSpecifier):
	    raise GrammarError(str(val), "is not a valid pattern specifier")
	self.val = val

    def toRegex(self):
	"""Returns a regular expression for this pattern."""
	return self.val.toRegex()

    def register(self, format):
	self.val.resolve(format)
	_addFormatMember(format.Patterns, self.name, self, unique=1)

class PatternSpecifier:
    """A PatternSpecifier is an object which declares how to scan
       a portion of the input stream.  It can be used in a Pattern,
       a Space, or a Token declaration."""

    def resolve(self, resolver):
	"""Given a PatternResolver, resolve this pattern."""
	pass

class RE(PatternSpecifier):
    """A RE object specifies a regular-expression based pattern for a
       Pattern, Space, or Token declaration.  The pattern should be 
       in abridged perl-style syntax.

       If the I flag is set, the pattern is case-insensitive.  If the
       X flag is set, whitespace and comments are stripped."""

    ##Fields:
    # val:
    #    The string value of this regular expression
    # pat:
    #    The ParsedRE object for this regular expression.

    def __init__(self, val, I=None, X=0):
	"""Creates a new RE object."""
	self.val = val
	if X:
            self.val = parsely.remanip.CleanXRE(self.val)
	self.pat = parsely.remanip.ParsedRE(self.val,I=I)

    def resolve(self, resolver):
	self.pat.resolve(resolver)

class LIT(PatternSpecifier):
    """A LIT object specifies a literal for a Pattern, Space, or Token
       declaration. If the I flag is set, the pattern is
       case-insensitive."""

    ##Fields
    # val: the string value of this literal.
    # pat: the ParsedRE object for this literal.
       
    def __init__(self, val, I=None):
	self.val = val
	self.pat = parsely.remanip.ParsedRE(self.val,I=I,L=1)

class _ActionUnit:
    """_ActionUnit is a mixin class for FileFormat components with associated
       actions."""

    ##Fields:
    # actionName: Name of the action associated with this object.
    # allActions: Map from language name to Action object. 
    #             (Set by resolveAction.)
    # the_action_code: A code object for this action, or just the translated
    #      code for this action, depending on whether we're running in
    #      Python mode.  (Set by compile.)
    # the_action: A function to wrap the_action_code
    
    def __init__(self, action):
        self.actionName = action
        self.allActions = {}
	self.the_action = self.the_action_code = None

    def resolveAction(self, format):
	"""Ensure that an action of the proper name actually exists in
	   <format>, and sets up this object to refer to it."""
        if self.actionName:
            self.allActions = format._getActionsNamed(self.actionName)
            
    def __getstate__(self):
	d = self.__dict__.copy()
	if d['the_action']:
	    d['the_action'] = None
	if d['the_action_code']:
	    d['the_action_code'] = marshal.dumps(self.the_action_code)
	return d
	
    def __setstate__(self,d):
	if d['the_action_code']:
	    d['the_action_code'] = marshal.loads(d['the_action_code'])
	for k,v in d.items():
	    setattr(self,k,v)
	if d['the_action_code']:
	    def act(_context,code=d['the_action_code']):
		exec code
	    self.the_action = act
	
    def getAction(self, language):
	"""Returns the action associated with the language <language>."""
        if self.actionName is None:
            return None
        a = self.allActions.get(language, None)
        if a is None:
            print "Warning: No implementation for %s in %s" % \
                  (self.actionName, language)
        return a

    def compile(self, translator):
	"""Compiles/generates the code for this object."""
        self.the_action_code = translator.compile(self)
	if self.the_action_code:
	    def act(self,code=self.the_action_code):
		exec code
	    self.allActions['python'].compiled_fn = act
	    self.the_action = act

class Lexeme(FormatElement, _ActionUnit):
    """A Lexeme is an entity scanned by a scanner -i.e., a Token or a Space.

       This is an abstract class, and holds the logic in common between
       Token and Space."""

    ##Fields: 
    # pattern: the PatternSpecifier object for this lexeme.
    # actionName: the name of the action for this lexeme, or None.
    # inStates: a tuple holding the names of the states in which this lexeme is
    #    accepted.  This is not canonical until FileFormat.process has
    #    been called.
    # enterState: the state to enter upon scanning this lexeme, or None.
    # default: the default value of this lexeme.

    def __init__(self,patspec,inStates,toState,action):
        assert self.__class__ is not Lexeme
        _ActionUnit.__init__(self, action)
        
	if not isinstance(patspec,PatternSpecifier):
	    raise GrammarError(str(patspec),"is not a valid pattern specifier")

        self.pattern = patspec
        self.actionName = action
        self.inStates = ()
        self.enterState = None
        self.default = None

        self.addStates(inStates)

	for s in _wantList(toState):
            self.enterState = s

    def registerLexeme(self,format):
	"""Common code for the Register methods of Space and Token."""
        self.pattern.resolve(format)
        format.lexemeList.append(self)
        if self.default is None:
            self.default = self.pattern.pat.makeDefault()
	    assert type(self.default) in (NoneType, StringType)

        for s in self.inStates:
	    if not format.States.has_key(s) and s != '*':
		raise GrammarError("No such state as", s)
            
        if self.enterState and not format.States.has_key(self.enterState):
	    raise GrammarError("No such state as", self.enterState)

    def getStates(self):
	"""Returns a tuple of the names of all the states in which this
	   lexeme is recognized."""
        return self.inStates

    def addStates(self, states):
	"""Adds all the states in the tuple/singleton <states> to the
	   'inStates' set of this."""
        inStates = list(self.inStates)
        for state in _wantList(states):
            inStates.append(state)
        self.setStates(inStates)

    def setStates(self, states):
	"""Replaces the 'inStates' set of this with <states>."""
        self.inStates = tuple(_wantList(states))

    def isToken(self):
	"""Returns true iff this lexeme is a Token."""
        return 0

class Token(Lexeme):
    """A Token declares that a certain pattern should be scanned as
       a token."""

    ##Fields
    # distinctName: A name for this token that is guaranteed to be
    #    unique.
    # shortName: The name for this type of token.
    # type: The parsely.tree.LeafType object for this token.  Set by
    #    the registerType method.

    # On naming:
    #   If you have two tokens with the same name: 
    #         A=Token("Foo", LIT("A")), and
    #         B=Token("Foo", LIT("B")), then
    #   A and B will have the same shortName ("Foo"), but different
    #   distinctNames ("Foo" and "Foo__1").
          
    def __init__(self, name, patspec, inStates=(), toState=None, default=None,
		 action=None):
        """Constructs a Token declaration.
        
           <name> is the symbol corresponding to this terminal.
           <patspec> is a RE or LIT object.

           This token will only be recognized in the states named in
           <inStates>. After scanning it, the scanner will enter the state
           named by <toState>.  This token's default value is <default>.

	   When this token is scanned, the scanner takes the action named by
	   <action>."""
        Lexeme.__init__(self, patspec, inStates, toState, action)
        self.shortName = name        
	self.default = default

    def register(self, format):
        self.registerLexeme(format)

        if format.Tokens.has_key(self.shortName):
            self.distinctName = (self.shortName + "__" +
                                 str(len(format.Tokens[self.shortName])))
        else:
            self.distinctName = self.shortName

	_addFormatMember(format.Tokens, self.shortName, self, unique=0)
        _addFormatMember(format.DistinctLexemes, self.distinctName, self,
                         unique=1, list=format.tokenList)

    def registerType(self, typesys):
        self.type = parsely.tree.LeafNodeType(self.shortName,
                                              self.default,
                                              typesys.getDefaultSpace())
        typesys._addType(self.type)

    def __str__(self):
	"""Returns the name of the type of this."""
        return self.shortName

    def isToken(self):
        return 1

class DefaultSpace(FormatElement):
    """Provides a default space value."""
    
    ##Fields:
    # val: The default space for the current file format.
    
    def __init__(self,val):
	assert type(val) is StringType
        self.val = val
    def register(self,format):
        if format.DefaultSpace is not None:
            raise GrammarError("Duplicate default space declaration")
        format.DefaultSpace = self.val

class Space(Lexeme):
    """Indicates that a given pattern (RE or LIT) should be treated as
       space by the scanner wherever it occurs."""

    ##Fields:
    # disctinctName: a unique identifier for this Space lexeme.

    def __init__(self, patspec, inStates=(), toState=None, action=None):
	"""See Token for argument values"""
        Lexeme.__init__(self,patspec,inStates,toState,action)

    def register(self, format):
        self.registerLexeme(format)                
        self.distinctName = "___space" + str(len(format.Space))
        _addFormatMember(format.DistinctLexemes, self.distinctName, self,
                         unique=1, list=format.Space)


class RuleSpecifier:
    """A RuleSpecifier describes the RHS of a grammar production.

       This is an abstract class, and should not be instantiated."""

    def __init__(self):
	assert "this" is "never called"

    def setName(self,name):
        """Indicates that the name of this rule's type should be <name>"""
        raise "Tried to use method on abstract class RuleSpecifier"

    def resolve(self,resolver):
        """Uses the format <resolver> to chase references to the symbols
           in this RHS."""
        raise "Tried to use method on abstract class RuleSpecifier"

    def getDerivations(self,lhs):
        """Returns a list of the rhs's corresponding to this object.
	   Each is a tuple of a type and the names of some symbols."""
        raise "Tried to use method on abstract class RuleSpecifier"

class Rule(FormatElement):
    """This declaration associates a RuleSpecifier <rspec>
       with a given nonterminal symbol <target>."""    

    ##Fields:
    # name: the target (lhs) of this rule.
    # val: the production (rhs) of this rule.
       
    def __init__(self, target, rspec):
	"""Declares a rule of the format  <target>  ::=  <rspec>.   
	   <rspec> should be a RuleSpecifier (ALT,SEQ,OPT,MULTI)."""

	self.name = target
        self.val = rspec
        
	if type(target) is not StringType:
	    raise GrammarError("The type of a target must be a string")
	if not isinstance(rspec,RuleSpecifier):
	    raise GrammarError(str(rspec), 'is not a valid rule specifier')
        
        self.val.setName(target)

    def register(self, format):
	_addFormatMember(format.Rules, self.name, self, unique=1, 
			 list=format.ruleList)

    def registerType(self, typesys):
        self.val.registerType(typesys)

    def resolve(self, format):
	"""Chases all references in the rhs of this rule."""
       	self.val.resolve(format)

    def resolveAction(self, format):
	"""Resolves the actions in the rhs of this rule, if any."""
        self.val.resolveAction(format)

    def compile(self, trans):
	"""Compiles the actions in the rhs of this rule, if any."""
        self.val.compile(trans)


class ALT(RuleSpecifier):
    """An ALT derivation indicates that a nonterminal may expand to any of
       several alternative derivations.  These may be labelled (by passing
       them in as dictionary arguments) or unlabeled.  They must be either
       symbols, or SEQ's."""


    ##Fields:
    # alternatives:
    #   A map from alternative names to SEQ specifiers.  If no names are
    #   given for an alternative, it is named by number.

    def __init__(self, *m, **tm):
	"""Creates a new ALT specifier.  The arguments in <m> are unlabeled
	   alternatives; the ones in <tm> are labelled.  For example,
	   Rule('X', ALT('b', foo='c', bar=SEQ('d', 'e')))   corresponds to:
	   X = b | [foo] c | [bar] d e;
	   """

        self.alternatives = tm
	i = 0
        for a in m:
            self.alternatives[str(i)] = a
            i = i + 1
        for k, v in self.alternatives.items():
            if type(v) is StringType:
                tm[k] = SEQ(v)
            elif isinstance(v, SEQ):
                pass
            else:
                raise GrammarError("Alternative must be a symbol or a SEQ")

    def setName(self, name):
        for a in self.alternatives.keys():
            self.alternatives[a].setName(name + ":" + a)

    def resolve(self,r):
	for v in self.alternatives.values():
            v.resolve(r)

    def resolveAction(self, format):
        for v in self.alternatives.values():
            v.resolveAction(format)

    def registerType(self, typesys):
        for a in self.alternatives.values():
            a.registerType(typesys)

    def getDerivations(self,lhs):
        d = []
        for a in self.alternatives.keys():
            d[len(d):len(d)] = self.alternatives[a].getDerivations(lhs)
        return d

    def compile(self, trans):
        for a in self.alternatives.values():
            a.compile(trans)
    
     
class SEQ(RuleSpecifier, _ActionUnit):
    """A sequence of labeled symbols.  Produces a StructNode."""

    ##Fields:
    # members: A list of the names of the types in this sequence.
    # tags: A list of the tags for each member in this sequence.  If a
    #    member is untagged, its tag is None.
    # - Set by setName
    # shortName: The brief name of this production ("Declaration")
    # name: The full name of this production ("Declaration:AliasDecl")
    # variant: The name of the variant type of this production ("AliasDecl")
    # - Set by registerType
    # type: the StructNodeType for this production

    def __init__(self, *members, **kwds):
        """Constructs a new SEQ.  The members may either by symbols (Word),
           or symbols tagged with identifiers (Word:w).  An 'action' keyword
	   may be given to indicate the name of the action to take when
	   this production matches. """
	   
        for k in kwds.keys():
            assert k in ('action', )
        _ActionUnit.__init__(self, kwds.get('action',None))
	self.members = []
	self.tags = []
	tagsFound = {}
	for m in members:
	    if ':' in m:
	        tag = m[string.find(m,':')+1:]
	        m = m[0:string.find(m,':')]   
	    else:
		tag = None
	
	    self.members.append(m)
	    self.tags.append(tag)
	    if tag is not None and tagsFound.has_key(tag): 
		raise GrammarError("Duplicate declaration for tag", tag)
	    tagsFound[tag]=1

    def getType(self):
	"""Returns the parsely.tree.StructNodeType corresponding to this 
    	   object."""
        return self.type

    def registerType(self, typesys):
        self.type = parsely.tree.StructNodeType(self.shortName,
                                                self.variant,
                                                self.members,
                                                self.tags)
        typesys._addType(self.type)

    def setName(self,name):
        self.name = name
	if ':' in name:
	    self.shortName = name[0:string.find(name,':')]
            self.variant = name[string.find(name,':')+1:]
	else:
	    self.shortName = name
            self.variant = None                    

    def resolve(self,ff):
        for m in self.members:
            ff._getElementByName(m)

    def getDerivations(self,lhs):
        return [ (self, tuple(self.members)) ]
    
class OPT(RuleSpecifier, _ActionUnit):
    """Indicates an optional symbol."""
    
    ##Fields:
    # base: the name of the optional symbol
    # - set by setName
    # name: the name of this production
    # shortName: the name of this production
    # - set by registerType
    # type: the type of this production (None)

    def __init__(self,base,action=None):
        _ActionUnit.__init__(self, action)
	self.base = base

    def setName(self,name):
        self.name = name
	self.shortName = name

    def resolve(self,r):
	r._getElementByName(self.base)

    def getDerivations(self,lhs):
        return [ (self, ()), (self, (self.base,)) ]

    def getType(self):
        return self.type

    def registerType(self, typesys):
        self.type = None
        pass

class MULTI(RuleSpecifier, _ActionUnit):
    """Specifies a list of symbols.  The list may have a lower bound,
       which defaults to 0 elements.  The elements of the list may
       be 'separated' or 'terminated' by an element of another type.
       
       A comma-separated list is of the form: A,A,A,A
       A comma-terminated list is of the form: A,A,A,A,

       Note that by default, the separators are counted as elements of
       the list.  If the list is 'exclusive', then the separators are
       not counted as elements."""

    ##Fields:
    # base: the symbol for elements of this list.
    # min: The lower bound on list size.
    # sep: The symbol for the separator, or None.
    # term: the symbol for the terminator, or None.
    # - set by setName
    # name: the name of this production
    # shortName: the name of this production
    # - set by registerType
    # type: the type of this production (None)
    # exclusive: flag -- is this an exclusive list?

    def __init__(self, base, min=0, sep=None, term=None, action=None,
                 exclusive=0):
        """Constructs a new MULTI rule specifier.  <base> is the base
           type.  <min> is the minimal
           number of <base> elements.  It may be "*" or "+". 
	   """
        _ActionUnit.__init__(self, action)
	if min == '*': min = 0
	if min == '+': min = 1
	self.base = base
	self.min = min
	self.sep = sep
	self.term = term
        self.exclusive = exclusive
	if sep is not None and term is not None:
	    raise GrammarError("Tried to have a separator _and_ a terminator")
        if sep and min == 0:
            raise GrammarError("Separated list must have min > 0")
        if exclusive and (sep is None and term is None):
            raise GrammarError("Exclusive modifier is meaningless without a separator or terminator")

    def getType(self):
        return self.type

    def resolve(self,r):
	r._getElementByName(self.base)
        if self.sep:
            r._getElementByName(self.sep)
        if self.term:
            r._getElementByName(self.term)

    def setName(self,name):
        self.name = name
        self.shortName = name

    def getDerivations(self,lhs):
        if self.sep:
	    # LHS -> Base Sep Base Sep...Base
	    # LHS -> LHS Sep Base
            basecase = ( self.base ,)
            basecase = tuple(basecase + (self.sep,self.base)*(self.min-1))
            return  [ (self, (lhs, self.sep, self.base) ),
                      (self, basecase) ]
	elif self.term:
	    # LHS -> Base Term...Base Term
	    # LHS -> LHS Base Term
            basecase = ( self.base , self.term) * self.min
	    return [ (self, (lhs, self.base, self.term)),
                     (self, basecase) ]
        else:
	    # LHS -> Base Base Base...Base
	    # LHS -> LHS Base
            return [ (self, (lhs, self.base)) ,
                     (self, (self.base,)*self.min) ]

    def registerType(self, typesys):
        self.type = parsely.tree.ListNodeType(self.name,
                                              self.base, self.term, self.sep,
                                              self.exclusive)
        typesys._addType(self.type)

class Action(FormatElement):
    """An Action declaration associates a block of code in a given language
       with an action name.

       The currently supported languages are:
            - Python.
       """

    ##Fields:
    # name: The name of this action.
    # lang: The language this action is written in, in lowercase.
    # code: The text of the code for this action.
    # compiled_fn: A python function for this action.  Set by 
    #     _ActionUnit.compile
    # kind: The special function, if any, of this action.  Currently allowed:
    #       'initScan',  'finishScan',  'scanFn' (python)
    #       'initParse', 'finishParse', 'parseFn' (python)

    def __init__(self, name, lang, code, kind=None, line=-1):
	"""Constructs a new Action declaration named <name> written in
	   the language <lang> with code <code>."""
        self.name = name
        lang = string.lower(lang)
        if lang not in ("python",):
            raise GrammarError("Language '%s' not supported" % lang)
        self.lang = lang
        self.code = code
	self.lineNumber=line
	self.compiled_fn = None
	self.kind = kind
	if kind is not None:
	    if lang == 'python':
		if kind not in ('initScan',  'finishScan',  'scanFn',
				'initParse', 'finishParse', 'parseFn'):
		    raise GrammarError("Action kind '%s' not supported" %kind)

    def compile(self, translator):
	"""Compiles/generates the code for this Action."""
        action_code = translator.compile(self)
	if action_code:
	    def act(self,code=action_code):
		exec code
	    self.compiled_fn = act

    def getCode(self):
	"""Returns the code for this action."""
        return self.code

    def register(self, format):
        if not format.Actions.has_key(self.name):
            format.Actions[self.name] = { self.lang: self }
            return
        
        if format.Actions[self.name].has_key(self.lang):
            raise GrammarError("Multiple declarations for action %s in %s" %
                               (self.name, self.lang))
        
        format.Actions[self.name][self.lang] = self

class ActionTranslator:
    """An ActionTranslator translates code for actions written
       in a particular language.  It has a few generic facilities to make
       the job a bit easier, but is definitiely 'not for public consumption'.

       For a simple example, see parsely.pyTrans.PythonActionTranslator.
       """
       
    ##Fields:
    # lang: The language this translator handles.
    # context: The context of this translator.  It must be one of
    #    'scanner', 'parser', or 'LALRParser'.  The difference between
    #    the latter two is that LALRParser is assumed to process one
    #    token from the scanner at a time, whereas parser requires the
    #    entire input stream.
    # knownTypes:
    #    A map from strings to methods of this, where if this has a
    #    method of the form t_NAME, knownTypes has a mapping from 'NAME'
    #    to t_NAME.

    #XXXX This 'context' stuff is stupid.  This needs to be more OO.

    def __init__(self, languageName, context):
	"""Initializes this translator to handle actions from <languageName>.
	   Context must be one of 'scanner', 'parser', or 'LALRParser'.

	   This method collects the hash knownTypes.
	   """


        # Abstract class -- must be derived.
        assert self.__class__ is not ActionTranslator
        self.lang = languageName
        self.context = context

        self.knownTypes = {}
        for name in _namelist(self):
            if name[:2] == 't_' and name != 't_default':
                self.knownTypes[name[2:]] = getattr(self,name)

    def translate(self, code):
        """Rewrites the code according to getRegexen() and the t_ methods."""
        ctxt = {'scanner' : "S", 'parser' : 'p',
                "LALRParser" : 'P' }[self.context]
        
        for n,v in self.getRegexen().items():
            reg, reqContext = v
            if self.knownTypes.has_key(n) and ctxt in reqContext:
                code = reg.sub(self.knownTypes[n],code)
                if code == 'UNSUPPORTED':
                    raise GrammarError(
                        "The operation %s is not supported in %s mode"
                        % n, self.context)
            else:
                code = reg.sub((lambda m: t_default(m,n)), code)
        return code

    def t_default(self,match,name):
        raise GrammarError("No known translation for %s directive" % name)
        
    def compile(self,actionUnit): 
        """Abstract function: handles all compilation on the given action
           unit."""
        raise "Tried to call 'compile' on abstract class ActionTranslator"

    def getRegexen(self):
	"""This method must return a data structure to indicate what elements
	   in the code we'll be replacing.  See PythonActionTranslator
	   for an example."""
	raise "Method must be overridden!"

def getTranslator(language, context):
    '''Returns a translator for the given language in the given context.'''
    assert language in ('python',)
    
    if language == 'python':
	import parsely.pyTrans
	return parsely.pyTrans.PythonActionTranslator(context)
    
    assert 'this line' is 'never to be executed'

####
## Exceptions
####
class GrammarError(parsely.ParselyException):
    """This exception indicates an error in the file format declaration."""
    def __init__(self,*s):
	parsely.ParselyException.__init__(self,s)
	
