#!/usr/bin/python

# Parsely - A cross-language tool for parsing and file manipulation.
#
# Copyright (C) 1999-2000 Nick Mathewson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

"""This earley_parser module wraps John Aycock's SPARK package.  It constructs
   parsely nodes, and provides error and action mechanisms.

   For more information on the parser, see
   http://www.csr.uvic.ca/~aycock/python/"""

from hak_spark import GenericParser
import parsely, parsely.tree
from parsely._util import _extendList, _escNonPrinting

from types import StringType
import string

class EarleyParser(GenericParser):
    """An Earley parser to wrap John Aycock's SPARK.

       The efficiency of this algorithm is limited by the Earley parser,
       which runs in O(n^3) worst-case time, but which can handle arbitrary
       context-free grammars.  If the grammar is unambiguous, it performs in
       O(n^2) time, and if the grammar is LR(k), it performs in O(n) time.
       Nevertheless, the constant factor over a standard LALR parser is
       fairly big."""

    ## Implementation notes:
    
    # Thanks to John Aycock for incorporating an addRule method into
    # his GenericParser class!  

    # Unfortunately, this class is still
    # more closely tied to SPARK's internal structures than I'd like.
    # All the nastiness is contained in the modifications in hak_spark.py,
    # and relates to 1) parsing files with different start symbols.
    #                2) Boosting parser performance by about 30% by
    #                   bypassing the _Token.__cmp__ slow-path and
    #                   the _unwrapToken slow-path.
    #
    # When I get back from Puerto Rico in January, I must remember to
    # ask him about incorporating something like this.

    ## Fields:
    # defaultSpace:
    #   The file format's default space.  Used to construct FileNode objects.
    # scanner:    
    #   a reference to the scanner associated with this fileformat.
    #   Used for handling include directives.    
    # startRule, rules, rule2func, rule2name:
    #   Used by the SPARK code.
    # Context:
    #   The current 'ParserContext' object; used by action code.
    # format:
    #   A link back to the format object.  Only used in initialization;
    #   deleted immediately after to avoid circularity.
    # ruleList:
    #   A link to the format's ruleList.
    # ContextClass:
    #   The class for all ParserContext Ojects.
    
    def __init__(self,format):
        """Constructs a new parser for the grammar required by the
           FileFormat object <format>."""
        self.format = format
	self.ruleList = format.ruleList
	self.ContextClass = self.makeContextClass()
	try:
	    self.Context = self.ContextClass(None,None,None)
	    GenericParser.__init__(self,format.StartSymbol,1)
	    self.defaultSpace = format.DefaultSpace
	    self.scanner = format.scanner
	finally:
	    del self.format

    def makeContextClass(self):
	actLists = {'initParse': [], 'finishParse':[], 'parseFn': []}

	class CContext(ParserContext):
	    def __init__(self,args,match,type):
		ParserContext.__init__(self,args,match,type)
		for fnId in self.__class__._initActions:
		    apply(getattr(self,fnId), ())
		
	    def _finish(self):
		for fnId in self.__class__._finishActions:
		    apply(getattr(self,fnId), ())

	for rule in self.ruleList:
	    for typeO, rhs in rule.val.getDerivations(rule.name):
		if typeO.the_action:
		    setattr(CContext, typeO.actionName, typeO.the_action)

	for actName,actionMap in self.format.Actions.items():
	    if actionMap.has_key('python'):
		action = actionMap['python']
		if actLists.has_key(action.kind):
		    setattr(CContext, actName, action.compiled_fn)
		    actLists[action.kind].append(actName)

	CContext._initActions = actLists['initParse']
	CContext._finishActions = actLists['finishParse']

	return CContext

    def error(self,leaf):
        """Gives a syntac error."""
        raise_parse_error(leaf,"Syntax error")
        
    def parse(self, tokens, withContext=0, startSymbol=None, isInclude=0,
              includeMap=None, fileName=None):
        """Given a list of _Token objects <tokens>, returns parses them and
           returns an appropriate FileNode.

           If <withContext>, returns a tuple containing the FileNode and
           the Context object. 

           BUGBUG: This is not threadsafe. (Because of Context.)"""
            
	ispace = ''        
	if isinstance(tokens[0], StringType):
	    ispace = tokens[0]
	    tokens = tokens[1:]
        if not isInclude:
            self.Context.reset()
        self.Context._parser = self
        try:
            if includeMap:
                for fileName, includeInfo in includeMap.items():
                    tokenization, includeType, includeToken = includeInfo
                    parsing = self.parse(tokenization,0,includeType,1)
                    includeToken.val = \
                         parsely.tree.IncludeNode(includeToken.val,parsing)

            tokens,types = splitTokenList(tokens)
            result = parsely.tree.FileNode(
                self._parseWithStart((tokens,types),startSymbol),
                ispace,
                self.defaultSpace,
		fileName)
        finally:
            # Remove circular reference:
            del self.Context._parser

	self.Context._finish()
	
        if withContext:
            return (result, self.Context)
        else:
            return result

    def collectRules(self,V=0):
        """Builds the mappings required by GenericParser.
           Not to be called by external code.

           Modifies: self"""
        # Ideally, we could use one of the two versions below.
        for ruleDecl in self.ruleList:
            lhs = ruleDecl.name
            for typeObj, rhs in ruleDecl.val.getDerivations(lhs):
                act = typeObj.the_action
                rule = string.join((lhs, '::=') + rhs)
                fn = self.__getAction(typeObj, act)
		if V:
		    print rule
                self.addRule(rule,fn)
             
    def __getstate__(self):
	d = self.__dict__.copy()
	d['rule2func'] = {}
	d['rule2name'] = {}
	d['rules'] = {}

	return d

    def __setstate__(self, d):
	for k,v in d.items():
	    setattr(self,k,v)
	# XXXX This will break wretchedly!
	# XXXX We need to set the start state.
	self.collectRules()
	self.startRule = self.augment(self._start)
   
    def __getAction(self,typeObj,act):
        """Return a function object to be the action, given the type
           object (SEQ/OPT/MULTI) <typeObj>."""

        type = typeObj.getType()
        if isinstance(type, parsely.tree.StructNodeType):
            return lambda args, type=type, context=self.Context, act=act: \
                            _buildStructNode(context,args,type,act)
        elif isinstance(type, parsely.tree.ListNodeType):
	    if type.exclusive:
		return lambda args, type=type, context=self.Context, act=act: \
				       _buildExListNode(context,args,type,act)
	    else:
		return lambda args, type=type, context=self.Context, act=act: \
				       _buildListNode(context,args,type,act)
        elif type is None:
            return lambda args, type=type, context=self.Context, act=act: \
                            _buildOptNode(context,args,type,act)
        else:
            assert not "reached"

    def include(self, fName, node, startSymbol):
        """Includes a file with the name <fName>.  The node that
           requested this operation is <node>.  The file should be
           parsed with the start symbol <startSymbol>.  Returns an
           IncludeNode. """
        assert parsely.INCLUDES_ENABLED

        file = open(fName)
        contents = file.read()
        f.close()
        tokens,scanContext = self.scanner.tokenize(strng,withContext=1)
        tree = self.parse(self,tokens,0,startSymbol,1,
                          scanContext._includeNodes)
        return parsely.tree.IncludeNode(node, tree)

def _buildStructNode(context,args,type,act):
    """Helper function.  Given a list of elements <args> used to
       build a StructNode of type <type>, runs the action <act>
       and returns the node."""
    n = parsely.tree.StructNode(type,args)
    if act:
        context.SetState(args,n,type)
        try:
            act(context)
        except "ExplicitContinue":
            pass
        n = context.match
    return n

def _buildOptNode(context,args,type,act):
    """Helper function.  Given a list of elements <args> used to
       build an optional node of type <type>, runs the action <act>
       and returns the node."""
    if len(args) > 0:
        n = args[0]
    else:
        n = None
    if act:
        context.SetState(args,n,type)
        try:
            act(context)
        except "ExplicitContinue":
            pass
        n = context.match
    return n        

def _buildListNode(context,args,type,act):
    """Helper function.  Given a list of elements <args> used to
       build a list node of type <type>, runs the action <act>
       and returns the node."""
    if len(args) <= 1:
        n = parsely.tree.ListNode(type,args)
    elif (isinstance(args[0],parsely.tree.ListNode) and
          args[0]._type is type):
        _extendList(args[0]._members, args[1:])
        n = args[0]
    else:
        n = parsely.tree.ListNode(type,args)
    # BUGBUG This should only happen _once_, for the whole list?
    if act:
        self.Context.SetState(args,n,type)
        try:
            act(self.Context)
        except "ExplicitContinue":
            pass
        n = context.match
    return n

def _buildExListNode(context,args,type,act):
    """Helper function.  Given a list of elements <args> used to
       build an exclusive list node of type <type>, runs the action <act>
       and returns the node."""
    if len(args) <= 1:
        n = parsely.tree.ExListNode(type,args)
    elif (isinstance(args[0],parsely.tree.ListNode) and
          args[0]._type is type):
        _extendList(args[0]._members, args[1:])
        n = args[0]
    else:
        n = parsely.tree.ExListNode(type,args)
    # BUGBUG This should only happen _once_, for the whole list?
    if act:
        self.Context.SetState(args,n,type)
        try:
            act(self.Context)
        except "ExplicitContinue":
            pass
        n = context.match
    return n

def splitTokenList(toklist):
    """Given a list of _Token objects, returns a list of their LeafNode
       values, and a list of their type names."""
    tokens,types = [],[]
    for tok in toklist:
        tokens.append(tok.val)
        types.append(tok.val._type.typeName)
    return tokens,types

class _Token:
    """This class wraps LeafNode so that __cmp__ can be a type comparison
       (which GenericParser requires) instead of a value comparison
       (which LeafNode provides)."""
    def __init__(self, type, val, space, line):
        self.val = parsely.tree.LeafNode(type,val,space,line)

    def getType(self):
        return self.val._type.typeName

    def getVal(self):
        return self.val

    def __cmp__(self,s):
        return cmp(self.val._type.typeName,s)

    def moreSpace(self,val):
        self.val.trailingSpace = self.val.trailingSpace + val

    def accumulate(self,val):
        v = self.val
        v.val = v.val + v.trailingSpace + val
        v.trailingSpace = ""
        
def _unwrapToken(t):
    """Given a value <t>, unwraps <t> if <t> is a token, and does nothing
       otherwise."""       
    if isinstance(t,_Token):
        return t.val
    else:
        return t

class ParseError(parsely.ParselyException):
    """An exception raised by the parser."""
    def __init__(self,*s):
	parsely.ParselyException.__init__(self,s)

def raise_parse_error(node,msg):
    """Gives the error <msg> at the node <node>."""
    lspec = ""
    if hasattr(node, 'lineNumber') and node.lineNumber >= 0:
        lspec = " on line " + str(node.lineNumber)
    val = str(node)
    if len(val) > 20:
        val = val[:20] + '...'
    raise ParseError("%s at or near %s '%s'%s" 
                     % (msg,
                        node.getType().getName(),
                        _escNonPrinting(val),
                        lspec))

class ParserContext:
    """A ParserContext object is passed to parser actions in order to
       carry context information"""

    ##Fields
    # args:
    #    The rhs of the current production
    # lineNumber:
    #    The line number of the first argument.  It has a few ugly out-of-
    #    bound values:  -2 => We haven't checked yet.
    #                   -1 => We checked, but we couldn't find out.
    # match:
    #    The node we're creating.
    # type:
    #    The NodeType object of the current node.
    ##Friends
    # The _build* functions inspect match.
    
    def __init__(self,args,match,type):
        """Constructs a new ParserContext"""
        self.args = args
        self.type = type
        self.match = match
        self.lineNumber = -2
        self.reset()

    def reset(self):
        """Prepare to parse a new set of files at toplvel."""
        self.includes = {}

    def SetState(self,args,match, type):
        """Called by the parser before each action to set the state of the
           ParserContext.

           Modifies: self"""
        self.args = args
        self.type = type
        self.match = match
        self.lineNumber = -2

    def include(self, fname, startSymbol=None):
        """Includes a the file fname with the given start symbol.

           See doc/INCLUDE for BUGS.
           """
	assert parsely.INCLUDES_ENABLED
        
        if self.includes.has_key(fname):
            raise_parse_error(self.match, "Multiple includes for file "+fname)
        self.includes[fname] = 1
        if not startSymbol:
            startSymbol = self.type.getName()
        self.match = self._parser.include(fname, self.match, startSymbol)

    def error(self, msg):
        """Raises a ParseError based on the message <msg>."""
        raise_parse_error(self.match,msg) 

    def getLineNumber(self):
        """Returns the line number of the current context, or -1 if it
           can't determine the line number (for example, if the production
           is empty)."""
        if self.lineNumber >= -1:
            return self.lineNumber
        for m in self.args:
            if m:
                self.lineNumber = m.getLineNumber()
                if self.lineNumber > 0:
                    break
        if self.lineNumber == -2:
            self.lineNumber = -1
        return self.lineNumber


