#!/usr/bin/python

# Parsely - A cross-language tool for parsing and file manipulation.
#
# Copyright (C) 1999-2000 Nick Mathewson
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

"""Regex manipulation library.

   This library does analysis and transformations on perl-style regular
   expressions.  It can check them for case-insensitivity, interpolate
   other patterns, convert to canonical case-insensitive format,
   make defaults values, and (eventually) convert them into FLEX format.

   It also allows pattern references, of the format {:Identifier:}.  These
   can eventually be resolved to refer to particular subpatterns.

   There are, however, a few kinds of regex it can't handle:

   \A \Z (beginning and end of string.)
   (?P<name>...)  Named patterns
   (?P=name)      Named backreferences
   \1             backreferences.

   BUG: and a few others!"""

import string,re,types
from parsely import ParselyException

class REError(ParselyException):
    """Bugs in regular expresson raise this error."""

    def __init__(self,*s):
        ParselyException.__init__(self,s)


# Map from acceptable letters to appear after a backslash, to
# a sequence of all the characters which the corresponding
# regex will match.
reDef = { 't':  '\t',    'n':  '\n',      'r': '\r',
          'f':  '\f',    'a':  '\a',      'e': '\e',
          'w':  string.letters + string.digits + '_',
          'W':  map(chr, [32,96] + range(0,48) + range(58,65) +
                         range(91,95) + range(123,256)),
          's':  ' \t\n\f\r',
          'S':  map(chr, [65,11]+range(0,8)+range(14,32)+range(33,92)+
                         range(93,256)),
          'd':  '0123456789',
          'D':  map(chr, [32]+range(0,48)+range(58,256)) }

class ParsedRE:
    """Parsed regular expression."""

    # re object to match pattern references
    _patRefRe = re.compile(r'^\{\:([A-Za-z_][A-Za-z0-9_]*)\:\}')

    ##Fields:
    # val: the Group of this regular expression.
    # I: is this regular expression case-insensitive? (boolean)
    #    If 0, the pattern is not, and shouldn't be casei.  (/aaaa/)
    #    If 1, then the pattern is not inherantly casei, but should be
    #          treated as casei. (/aaaa/i)
    #    If 2, then the pattern is inherantly casei. (/..../)

    def __init__(self, pat, I=None, L=None):
	"""Creates a new ParsedRE.

           pat, if present, is a regular expression.
           If I is 0, the pattern is not desired to be case insensitive.
           If I is 1, the pattern should be treated as case insensitive.
              (i.e., it is already case-independant.)

           If L is true, the pattern should be treated as a literal."""

	if L:
	    self.val = LitString(pat)
	elif pat:
	    self.val = self._reParse(pat)
        else:
            self.val = None
        # See if the patten is intrinsically case-i.  If so,
        # set I to 2.
        if self.val and self.val.accept(_theIsCaseIVisitor):
            I = 2
	self.I = I

    def _parseOctChar(self,pat,i):
        """For internal use.  Given that an octal excape like (\123) begins
           at pat[i], returns a new i and the corresponding Char value."""
        l = len(pat)-i
	if l>=2 and pat[i+1] in '01234567':
	    if l >= 3 and pat[i+2] in '01234567':
		if l >= 4 and pat[i+3] in '01234567':
		    s = pat[i:i+4]
		else:
		    s = pat[i:i+3]
	    else:
		s = pat[i:i+2]
	val = 0
	for ch in s[1:]:
	    val = val * 8
	    val = val + ord(ch)-ord('0')
	return (i+len(s)-1 , Char(s, chr(val)))

    def _parseHexChar(self,pat,i):
        """For internal use.  Given that a hex excape like (\xAB) begins
           at pat[i], returns a new i and the corresponding Char value."""
        l = len(pat)-i
	if l>=3 and pat[i+2] in '0123456789ABCDEFabcdef':
	    if l>=4 and pat[i+3] in '0123456789ABCDEFabcdef':
		s = pat[i:i+4]
	    else:
		s = pat[i:i+3]
	else:
	    raise REError("Bad hex escape starting with '%s'" % pat[i:i+4])

	val = 0
	for ch in s[2:]:
	    val = val * 16
	    if ch in '0123456789':
	    	val = val + ord(ch)-ord('0')
	    else:
		val = val + ord(string.upper(ch))-ord('A') + 10
	return (i+len(s)-1, Char(s, chr(val)))

    def _parseCharClass(self,pat,i):
        """For internal use.  Given that a character class ([A-Z]) begins
           at pat[i], returns a new i and the corresponding CClass value."""
	r = []
	i=i+1
	neg = pat[i] == '^'
	if neg: i=i+1
	iOrig = i
	while i < len(pat) and pat[i] != ']':
	    ch = pat[i]
	    if ch == '\\' and len(pat) >=i+2:
                i,pc = self._parseEscapedChar(pat,i)
                r.append(pc)
	    elif ch == '-':
		if len(r) == '0': r.append(Char('\\-','-'))
		else:
		    r.append('RANGE')
	    else:
		r.append(Char(ch,ch))
	    i = i + 1
	if  i == len(pat):
	    raise REError("Runaway character class")
	mem = []
	p = len(r)
        j = 0
	if r[0] == 'RANGE':
	    raise REError('Malformed range in character class')
        while j < len(r):
	    if (j+1 < len(r) and r[j+1] == 'RANGE'):
		if j+2 < len(r):
		    if isinstance(r[j], Char) and isinstance(r[j+2],Char):
                        mem.append(CRange(r[j],r[j+2]));
                        j=j+2
		    else:
			raise REError("Malformed range in character class")
	    	else:
		    raise REError("Unterminated range in character class")
	    else:
		mem.append(r[j])
            j = j + 1
	return i, CClass(pat[iOrig:i], neg, mem)

    def _parseEscapedChar(self,pat,i):
        """Handles backslashed characters.  'i' is the index of the
           backslash.  Returns a tuple of the new index (one before
           the next character) and the pattern object."""
        assert(pat[i] == '\\')
        if i == len(pat)-1:
            return (i,Char('\\','\\'))
        ch = pat[i+1]
        if ch in '0':
            return self._parseOctChar(pat,i)
        elif ch in '123456789':
            raise REError("Backreference '%s' not supported" % pat[i:i+2])
        elif ch == 'x':
            return self._parseHexChar(pat,i)
        elif ch in 'tnrfae':
            return (i+1, Char(pat[i:i+2], reDef[ch]))
            i = i + 1
        elif ch in 'wWsSdD':
            return (i+1, CType(ch))
        elif ch in 'bB':
            return (i+1, Special(pat[i:i+2]))
        elif ch in string.letters:
            raise REError("Unrecognized alphanumeric escape '\\%s'" %ch)
        else:
            return (i+1, Char(pat[i:i+2], ch))

    def _reParse(self,pat):
        """The main regular expression parser.  Takes a pattern string
           and returns a (Group) pattern object.  """
        # Items in the current group
	r = []
        # Index into pattern
	i = 0
        # Have we found a | character?
	isAlt = 0
	while i < len(pat):
	    ch = pat[i]
	    if ch == '\\':
                i,pc = self._parseEscapedChar(pat,i)
                r.append(pc)
	    elif ch == '(':
		j = i+1
		depth = 0
                special = None
                if pat[j] == '?': # extension syntax.
                    special = pat[j+1]
                    j=j+2
		    i=i+2
		while depth >= 0:
		    if j == len(pat):
			raise REError('Unterminated group')
		    if pat[j] == '\\':
			j = j + 1
		    elif pat[j] in '({':
			depth = depth + 1
		    elif pat[j] in '[':
			j,tmp = self._parseCharClass(pat,j)
		    elif pat[j] in '})':
			depth = depth - 1
		    j = j + 1
		r.append(self._parseGroup(pat[i+1:j-1],special))
		i = j-1
	    elif ch == '[':
		i,cc = self._parseCharClass(pat,i)
		r.append(cc)
	    elif ch in '*+?':
		if len(r)>0:
                    if ch == '?' and isinstance(r[-1], Multi):
                        r[-1].greedy = 0
                    else:
                        r[-1] = Multi(r[-1], ch)
		else:
		    r.append(Char(ch,ch))
	    elif ch == '{' and pat[i+1] in "0123456789," and len(r)>0:
		low,high = 0,None
		j = i+1
		while pat[j] != ',' and pat[j] != '}':
		    # FIXME: Maybe somethinglike a{2a}!
		    low = low * 10
		    low = low + ord(pat[j]) - ord('0')
	            j = j + 1
		if pat[j] == '}':
		    high = low
		else:
		    j = j + 1 # Skip comma
		if pat[j] != '}': high=0
		while pat[j] != '}':
		    high = high * 10
		    high = high + ord(pat[j]) - ord('0')
	            j = j + 1
		r[-1] = Multi(r[-1],low=low, high=high)
		i = j
	    elif ch == '{' and pat[i+1] == ':':
		m = ParsedRE._patRefRe.match(pat[i:])
		if m:
		    r.append(PatRef(m.group(1)))
		    i = i + len(m.group(0)) -1
		else:
		    raise REError("Malformed pattern reference")
	    elif ch == '|':
		r.append("OR")
		isAlt = 1
	    elif ch in '^$.':
		r.append(Special(ch))
	    else:
		r.append(Char(ch,ch))
	    i = i + 1

	if isAlt:
	    alts = []
	    v = []
	    for m in r:
		if m == "OR":
		    alts.append(Group(v))
		    v = []
		else:
		    v.append(m)
	    alts.append(Group(v))
	    return Group( (Alt(alts),) )
	else:
	    return Group(r)

    def _parseGroup(self, pat, special):
        """Given a group of format (?<special><pat>), returns the corresponding
           pattern object."""
        if special == '#':
            return LitString("")

	if special == None:
	    pass
        elif special == ':':
            special = None
        elif special == 'x':
            pat = CleanXRE(pat)
            special = None
        elif special in 'iLms=!':
            pass
        else:
            raise REError("Extension syntax for (?%s...) not supported" %
                          special)

        pat = self._reParse(pat)
        pat.special = special
        return pat

    def accept(self, v):
        """Accepts a visitor <v>.  See the Visitor pattern in the GOF book."""
        return v.visitParsedRE(self)

    def toCaseI(self):
        """Returns a ParsedRE equivalent to this one, but case-insensitive"""
        return self.accept(_theToCaseIVisitor)

    def isPcreOnly(self):
        return self.accept(_theIsPcreOnlyVisitor)

    def write(self,flex=0,noGrouping=0, forceI=0):
        """Writes a regular expression for this re.

           If <flex>, writes a flex-compatable regular expression.
           If <noGrouping>, replaces all (groups) with (?:groups).
           If <forceI>, requires all casei patterns to be translated to a
           casei form."""
	
        wv = WriteVisitor(flex,noGrouping)
        if self.I == 1 and forceI:
            p = self.toCaseI()
        else:
            p = self
        out = p.accept(wv)
        if noGrouping and out[:2] == '?:':
            return out[2:]
        return out

    def resolve(self, resolver):
        """Resolves all pattern references in this pattern with the
           provided resolver."""
        rv = ResolvingVisitor(resolver)
        return self.accept(rv)

    def makeDefault(self):
        """Returns a string for a default value for this pattern."""
        return self.accept(_theDefaultGenVisitor)

    def __str__(self):
	return str(self.val)

class LitString:
    """Pattern object corresponding to a literal string"""

    ##Fields:
    # s: the literal string of this pattern.

    def __init__(self,s):
	self.s = s
    def accept(self, v):
        return v.visitLitString(self)
    def __str__(self):
	return "Lit(%s)" % writeCh(self.s)

class Alt:
    """Pattern object corresponding to a use of the | union operator."""

    ##Fields:
    # alternatives: list of the elements of this pattern.

    def __init__(self,alternatives):
	self.alternatives = alternatives
    def accept(self, v):
        return v.visitAlt(self)
    def __str__(self):
	r = map(str, self.alternatives)
	return "Alt(%s)" % string.join(r,',')

class Char:
    """Pattern object corresponding a single character."""

    ##Fields:
    # val: the character which this pattern matches.
    # written: the string with which this pattern was originally written.

    def __init__(self,written,val=None):
	self.written = written
	if val == None: val = written
	self.val = val

    def characters(self):
        """Returns a sequence of all the characters this pattern could
           match."""
	return self.val

    def accept(self, v):
        return v.visitChar(self)

    def __str__(self):
	return "C(%s)" % writeCh(self.val)

class CClass:
    """Pattern object corresponding a character class, such as [^A-Z] or
       [\s].

       It may be negated.
       Its members must be Char, CRange, or CType."""

    ##Fields:
    # members:
    #   A list of the members of this class, in order.
    # neg:
    #   Is this class negated?  (boolean)
    # written:
    #   How was this class originally written?

    def __init__(self,written,neg,members):
	self.written = written
	self.neg = neg
	self.members = members

    def accept(self, v):
        return v.visitCClass(self)

    # Preferred characters, in order, when making defaults.
    prefChar = map(chr, range(65,90) + range(33,65) + range(90,127) +
		         range(33,0,-1) +range(127,256))

    def toSet(self):
        """Returns a map from every character in this class to 1."""
	on = {}
	for m in self.members:
	    for c in m.characters():
		on[c] = 1
	if self.neg:
	    for c in range(0,256):
		if on.has_key(chr(c)): del on[chr(c)]
		else: on[chr(c)] = 1
	return on

    def setToRanges(self,set):
        """Given a map from every character in this class to 1, returns a
           list of ranges that cover that map exactly."""
	k = set.keys()
	k.sort()
	last = -200
	writ = []
	val = []
	inRange = None
	for ch in k:
	    if ord(ch) == last + 1:
		inRange = 1
		last = ord(ch)
	    else:
		if inRange:
		    val[-1] = CRange(val[-1],Char(last,writeCh(chr(last))))
		    writ.append('-')
		    writ.append(writeCh(chr(last)))
		    inRange = None
		val.append(Char(ch,writeCh(ch)))
		writ.append(writeCh(ch))
	    last = ord(ch)
	if inRange:
	    val[-1] = CRange(val[-1],Char(k[-1],writeCh(k[-1])))
	    writ.append('-')
	    writ.append(writeCh(k[-1]))

	return CClass(string.join(writ,''), self.neg, val)

    def __str__(self):
	c = string.join(map(str, self.members), '')
	return "CClass(%s,%s)" % (self.neg, c)

class CType:
    """Pattern object corresponding to a character type, such as \s or \W"""

    ##Fields:
    # val: The character for this type. (i.e. 's' or 'W')

    def __init__(self,v):
	self.val=v
    def accept(self, v):
        return v.visitCType(self)

    def characters(self):
        """Returns all characters which this pattern would match."""
	return reDef[self.val]

    def isCaseI(self):
	return 1

    flexEquiv = {
              'w':  'A-Za-z0-9_',    'W':    '^A-Za-z0-9_',
              's':  ' \r\t\n\f',     'S':    '^ \t\r\n\f',
	      'd':  '0-9',           'D':    '^0-9'
    }

    def __str__(self):
	return "CType(%s)" % self.val

class Special:
    """Pattern object for special character: .$^\b\B."""

    ##Fields:
    # val: the written representation of this character.

    def __init__(self,v):
	self.val=v

    def accept(self, v):
        return v.visitSpecial(self)
    def __str__(self):
	return "Special(%s)" % self.val

class CRange:
    """Range within a character class."""

    ##Fields
    # low: The lower bound of this range.
    # high: The upper bound of this range.

    def __init__(self,l,h):
	self.low = l
	self.high = h
    def characters(self):
        """Returns all characters which this pattern would match."""
	return map(chr, range(ord(self.low.val),ord(self.high.val)+1))
    def __str__(self):
	return "Chars(%s..%s)" % (str(self.low),
				  str(self.high))

class PatRef:
    """Pattern reference."""

    ##Fields:
    # name:
    #   The name of the pattern
    # pat:
    #   The ParsedRE object for the pattern itself.

    def __init__(self,ref):
	self.name = ref
	self.pat = None

    def accept(self, v):
        return v.visitPatRef(self)

    def __str__(self):
        return "Ref(%s)" % self.name

class Group:
    """A pattern object for a group, such as (AB).  It may have a 'special'
       character, corresponding to (?X...) syntax."""

    ##Fields
    # members:
    #    A sequence of the patterns in this group.
    # special:
    #    The 'special' character, or None.

    def __init__(self, contents, special = None):
        self.special = special
	self.members = contents

    def accept(self, v):
        return v.visitGroup(self)

    def __str__(self):
	c = map(str, self.members)
	return "Group(%s)" % string.join(c, ',')

class Multi:
    """A pattern object for a multiplicity.  It may have a low bound and
       a high bound.  It may be non-greedy."""

    ##Multi:
    # base: The base pattern of this pattern.
    # greedy: Try to match as many matches as possible? (boolean)
    # high: the upper bound for this pattern, or None
    # low: the lower bound for this pattern.

    # For example, /(ABD){,3}?/ would have base=ABC, greedy=0, low=0, high=3.

    def __init__(self, base, range=None, low=0, high=None, greedy=1):
	# if low == None: raise "ARRGH!"
	self.base = base
	if range == '*': low,high=0,None
	if range == '+': low,high=1,None
	if range == '?': low,high=0,1
	self.low = low
	self.high = high
        self.greedy = greedy

    def accept(self, v):
        return v.visitMulti(self)

    def __str__(self):
        gstr = ""
        if not self.greedy: gstr = "?"
	return "Multi(%s,%s..%s%s)" % (str(self.base),self.low,self.high,gstr)


## Visitor stuff.

class REVisitor:
    """Visitor base class and default implementation.

       See _Design Patterns_ for more information about the Visitor pattern."""
    def __init__(self):
        pass
    def visitParsedRE(self, p):
        newParsedRE = ParsedRE(None)
        newParsedRE.val = p.val.accept(self)
        newParsedRE.I = p.I
        return p
    def visitLitString(self, s):
        chs = []
        for c in s.s:
	    chs.append(Char(writeCh(c),c).accept(self))
        return Group(chs)
    def visitAlt(self, a):
        newAlts = []
        for child in a.alternatives:
            newAlts.append(child.accept(self))
        return Alt(newAlts)
    def visitChar(self, c):
        return c
    def visitCClass(self, cc):
        return cc
    def visitCType(self, ct):
        return ct
    def visitSpecial(self, s):
        return s
    def visitPatRef(self, pr):
        return pr
    def visitGroup(self, g):
        newMembers = []
        for child in g.members:
            newMembers.append(child.accept(self))
        return Group(newMembers,g.special)
    def visitMulti(self, m):
        newBase = m.base.accept(self)
        return Multi(newBase, low=m.low, high=m.high, greedy=m.greedy)

class ToCaseIVisitor(REVisitor):
    """Visitor which converts a pattern to a case-insensitive form."""
    def __init__(self):
        pass
    def visitParsedRE(self, pre):
        if pre.I == 2:
            return pre
        other = ParsedRE(None)
        other.val = pre.val.accept(self)
        other.I = 2
        return other
    def visitChar(self, ch):
	if ch.val in string.letters:
	    u = string.upper(ch.val)
	    d = string.lower(ch.val)
	    return CClass('%s%s' % (u,d), None, [ Char(u,u), Char(d,d) ] )
	else:
	    return ch
    def visitCClass(self, cc):
        on = {}
        for m in cc.members:
            for c in m.characters():
                on[c] = 1
                if (c in string.letters):
		    on[string.lower(c)] = on[string.upper(c)] = 1
        return cc.setToRanges(on)
    def visitGroup(self, g):
        if g.special == 'i':
            return g
        else:
            return REVisitor.visitGroup(self,g)
    def visitPatRef(self, pr):
        # Could be more sophisticated here.
        if pr.pat:
            if pr.pat.I == 2:
                return pr.pat.val
            else:
                return pr.pat.toCaseI().val
        else:
            return Group((pr,),special='i')

class WriteVisitor(REVisitor):
    """Visitor which converts a pattern to a string representation"""

    ##Fields
    # flex: Are we operating in flex mode?
    # noGrouping: Do we convert (groups) to (?:groups) ?
    # topLevel: should we omit the outermost (parenthesis) ?

    def __init__(self, flex=0, noGrouping=0, toplevel=1):
        # The 'toplevel' flag indicates that somebody else takes
        # the responsibility for grouping the current re properly.
	self.toplevel = toplevel

	if flex:
	    self.flex = 1
	    self.nogrouping = 0
	else:
	    self.flex = 0
	    self.noGrouping = noGrouping

    def visitParsedRE(self, re):
        return re.val.accept(self)
    def visitLitString(self, ls):
        return writeCh(ls.s,self.flex)
    def visitAlt(self, alt):
	r = []
	top = self.toplevel	
        for a in alt.alternatives:
	    self.toplevel = 1
	    written = a.accept(self)
	    if self.flex and written and written[0] != '(':
		written = '(%s)' % written
	    r.append(written)
	self.toplevel = top
        return string.join(r,'|')
    def visitChar(self, c):
	if self.flex:
	    return writeCh(c.val, 1)
	return c.written
    def visitCClass(self, cc):
        if self.flex:
	    cc = cc.setToRange(cc.toSet())
        nChar = ""
        if cc.neg: nChar = "^"
	return "[%s%s]" % (nChar, cc.written)
    def visitCType(self, ct):
        if self.flex:
            return "[%s]" % CType._flexEquiv[ct.val]
        else:
            return "\\" + ct.val
    def visitSpecial(self, s):
        return s.val
    def visitPatRef(self, pr):
	if self.flex:
	    return "{%s}" % pr.pat.pName
	elif pr.pat:
            ngs = ""
            if self.noGrouping:
                ngs = "?:"
            p = pr.pat
            # We'd really rather do this with the (?i...) operator,
            # but it didn't get added to re until Python 1.5.2.  Since
            # so many legacy systems have 1.5.1, we'll do it the hard
            # way instead.
            if pr.pat.I:
                p = pr.pat.toCaseI()
            self.toplevel = 1
            return "(%s%s)" % (ngs, p.accept(self))
        else:
            return "{:%s:}" % pr.name
    def visitGroup(self, g):
	top = self.toplevel
        ngs = ""
        if self.noGrouping and not self.toplevel:
	    ngs = "?:"
	if self.flex: # ms
	    if g.special == 'i':
		vis = ToCaseIVisitor()
		g = g.accept(vis)
	    elif g.special in '=!':
		assert not 'reached'
	elif g.special: # iLms=!
            ngs = "?" + g.special
        r = [  ngs ]
	if top and len(g.members) == 1 and isinstance(g.members[0],Alt):
	    # Special-case the parens around a toplevel alt.
	    r.append(g.members[0].accept(self))
	else:
	    self.toplevel = 0
            for m in g.members:
		r.append(m.accept(self))
	if top:
	    return string.join(r,'')
	else:
	    return "(" + string.join(r,'') + ")"

    def visitMulti(self, ml):
	if ml.low == 0 and ml.high == None: m = "*"
	elif ml.low == 1 and ml.high == None: m = "+"
	elif ml.low == 0 and ml.high == 1:    m = "?"
	elif ml.high == None : m = "{%s,}" % ml.low
	else: m="{%s,%s}" % (ml.low,ml.high)
        gStr = ""
        if not ml.greedy: gStr = "?"
	return ml.base.accept(self) + m + gStr

class ResolvingVisitor(REVisitor):
    "Visitor to resolve all pattern references."

    ##Fields:
    # resolver:  The pattern resolver to be used to resolve this pattern
    #    references.

    def __init__(self, resolver):
        self.resolver = resolver
    def visitPatRef(self, pr):
        pr.pat = self.resolver.resolvePattern(pr.name)
        if not pr.pat:
	    raise REError("No declaration for pattern reference",pr.name)
        return pr

class DefaultGenVisitor(REVisitor):
    "Visitor to make a default value."
    def __init__(self):
        pass
    def visitParsedRE(self, re):
        return re.val.accept(self)
    def visitLitString(self, ls):
        return ls.s
    def visitAlt(self, alt):
        for a in alt.alternatives:
            s = a.accept(self)
            if s != None: return s
        return None
    def visitChar(self, c):
	return c.val
    def visitCClass(self, cc):
        s  = cc.toSet()
        for c in CClass.prefChar:
            if s.has_key(c):
                return c
        return None
    def visitCType(self, ct):
        return reDef[ct.val][0]
    def visitSpecial(self, s):
        if s.val == '.': return "A"
        return ""
    def visitPatRef(self, pr):
        return pr.pat.accept(self)
    def visitGroup(self, g):
        r = []
        if g.special and g.special in '=!': #iLms
            print "Warning: guessing default for pattern with zero-width lookahead"
            return ""
        for m in g.members:
            d = m.accept(self)
            if d == None: return None
            r.append(d)
        return string.join(r,'')
    def visitMulti(self, ml):
        d = ml.base.accept(self)
        if d == None:
            return None
        return d * ml.low

class IsCaseIVisitor(REVisitor):
    """Visitor to test whether a pattern seems case insensitive.  It will
       correctly recognize all non-casei patterns as such, but may
       incorrectly label some case-insensitive patterns (like Ab|aB)
       as non-casei."""

    def __init__(self):
        pass
    def visitLitString(self, ls):
        for x in ls.s:
            if x in string.letters: return 0
        return 1
    def visitAlt(self, alt):
        for a in alt.alternatives:
            if not a.accept(self): return 0
        return 1
    def visitChar(self, ch):
        return ch.val not in string.letters
    def visitCClass(self, cc):
        s = cc.toSet()
	for c in string.lowercase:
            u = string.upper(c)
            if s.has_key(c) != s.has_key(u): return 0
        return 1
    def visitCType(self, ct):
        return 1
    def visitSpecial(self, s):
        return 1
    def visitPatRef(self, pr):
        if pr.pat:
            return pr.pat.I == 2
        else:
            return 0
    def visitGroup(self, g):
        for m in g.members:
            if not m.accept(self): return 0
        return 1
    def visitMulti(self, m):
        return m.base.accept(self)

class IsPcreOnlyVisitor(REVisitor):
    """Visitor to test whether a regex uses pcre-only features.  On
       'yes', return the name of the feature."""
    def visitParsedRE(self, p):
        return p.val.accept(self)
    def visitLitString(self, s):
        return 0
    def visitAlt(self, a):
        for child in a.alternatives:
            x = child.accept(self)
            if x: return x
        return 0
    def visitChar(self, c):
        return 0
    def visitCClass(self, cc):
        return 0
    def visitCType(self, ct):
        return 0
    def visitSpecial(self, s):
        if s.val in ('\\b', '\\B'):
            return 'word-boundary'
        return 0
    def visitPatRef(self, pr):
        return pr.pat.accept(self)
    def visitGroup(self, g):
        if g.special and g.special in '=!':
            return 'zero-width assertion'
        for child in g.members:
            x = child.accept(self)
            if x: return x
        return 0
    def visitMulti(self, m):
        if not m.greedy:
            return 'non-greedy pattern matching'
        return 0

def CleanXRE(val):
    """Given a verbose re, strips all space and comments."""
    val = _escSpace.sub(_es, val)
    return _stripSpace.sub('', val)

_escSpace = re.compile(r'\\[ \n\t\r#]')
def _es(match):
    return "\\%.3o" % ord(match.group(0)[1])
_stripSpace = re.compile(r'(?:[ \n\t\r]+|#.*$)',re.M)

_chEquiv = {
    '\n' : '\\n',
    '\t' : '\\t',
    '\r' : '\\r',
    '\f' : '\\f',
    '\a' : '\\a',
    '\e' : '\\e' }

_theToCaseIVisitor = ToCaseIVisitor()
_theDefaultGenVisitor = DefaultGenVisitor()
_theIsCaseIVisitor = IsCaseIVisitor()
_theIsPcreOnlyVisitor = IsPcreOnlyVisitor()

def writeCh(strn,flex=0):
    """Escapes every character in a string for inclusion in a regex."""
    # XXXX-C Spaces in flex mode == bad!
    s = []
    for ch in strn:
	if 32 <= ord(ch) < 127:
	    if ch in r'\\"\'?{}[]()*+,^$|':  s.append('\\' + ch)
	    else: s.append(ch)
	elif _chEquiv.has_key(ch):
            s.append(_chEquiv[ch])
	else: s.append("\\%.3o" % ord(ch))
    return string.join(s,'')

class resolverFromMap:
    """Given a map from names to patterns, constructs a resolver."""
    ##Fields:
    # m: A map from pattern names to patterns.
    def __init__(self,m):
        self.m = m.copy()

    def resolvePattern(self,name):
        if self.m.has_key(name):
            return self.m[name]
        else:
            return None
