/************************************************************************/
/*                                                                      */
/*      RegExp -- Create and manipulate regular expressions             */
/*                                                                      */
/*      This module compresses the ASCII text form of regular           */
/*      expressions into a much more compact and manageable form.       */
/*      The compact form can be used to drive a match engine,           */
/*      or can be expanded where table-driven searching is used.        */
/*                                                                      */
/*      The files have been renamed to regexp00.[ch] to avoid           */
/*      any confusion with the far more important standard              */
/*      regular expression facilities in standard Unixes.               */
/*      However, the module name used inside the file has not           */
/*      been changed.  Apologies if this causes any confusion.          */
/*                                                                      */
/*      The code could also be improved by separating the               */
/*      bitmasks accompanying each class specifier into a separate      */
/*      array.  This would allow the RE encoding to be traversed        */
/*      backwards as well as forwards.  Requires (a little) more        */
/*      memory management than existing sequential scheme.              */
/*      I suspect that it would be still better to use                  */
/*      doubly-linked lists.  However, these sorts of issues            */
/*      can wait until the next version (supporting extended RE         */
/*      syntax, and hopefully also DFAs?) is attempted.                 */
/*                                                                      */
/*      This module now uses 32-bit codes; the original used            */
/*      16-bit codes.  This should make RE elements easier to           */
/*      manage.  Hopefully, this format will also permit                */
/*      storage of Unicode glyphs.                                      */
/*                                                                      */
/*      Some portions of this code were derived after reading           */
/*      GNU Sed v1.x by Eric S. Raymond (modified by Hern Chen).        */
/*                                                                      */
/*      Copyright (C) 1995-2000 Grouse Software.  All rights reserved.  */
/*      Written for Grouse by behoffski (Brenton Hoff).                 */
/*                                                                      */
/*      Free software: no warranty; use anywhere is ok; spread the      */
/*      sources; note any mods; share variations and derivatives        */
/*      (including sending to behoffski@grouse.com.au).                 */
/*                                                                      */
/************************************************************************/

#include "ascii.h"
#include <compdef.h>
#include <ctype.h>
#include <memory.h>
#include "platform.h"
#include "regexp00.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

/*Limit on number of codes allowed in a compiled format (sigh)*/
#define REGEXP_CODES_MAX                8192

/*Estimate of worst-case overhead added to RE-specified codes (e.g. -w, -x)*/
#define REGEXP_CODE_OVERHEAD            20

/*Rough character frequency table to hopefully improve EasiestFirst*/

module_scope UINT RegExp_ByteFrequency[256] = {
         2358,   252,   196,   345,   159,   164,   232,   143, /*  0-  7*/
          225,  1468,  7426,    93,   119,  6618,   123,   242, /*  8- 15*/
          110,    61,   106,    65,    84,    76,    90,    52, /* 16- 23*/
           73,    52,    97,   130,    66,    49,    89,    72, /* 24- 31*/
       65535u,   248,   994,   374,   278,   366,   485,   651, /* 32- 39*/
         1380,  1362,  4321,   370,  2892,  3537,  4792,  2544, /* 40- 47*/
         8016,  4254,  3861,  2130,  2600,  1925,  1866,  1569, /* 48- 55*/
         2066,  2143,  1295,  1019,   759,  1038,   837,   198, /* 56- 63*/
          247,  2432,  1170,  2261,  1831,  2773,  1926,   927, /* 64- 71*/
          846,  2201,   269,   411,  1373,  1404,  1468,  1712, /* 72- 79*/
         1950,   226,  2075,  2469,  2169,   801,   641,   701, /* 80- 87*/
          414,   412,   627,   525,   775,   449,   284,  1557, /* 88- 95*/
           70,  9743,  2214,  4936,  5133, 15939,  3254,  2665, /* 96-103*/
         4340,  9307,   387,  1206,  5902,  3765,  8797,  9854, /*104-111*/
         3935,   260,  9408,  8591, 11138,  4156,  1738,  1756, /*112-119*/
         1168,  2075,   518,   327,   346,   334,   255,    75, /*120-127*/
          134,    71,    99,   243,    88,    59,   113,    66, /*128-135*/
           88,   243,    92,   368,   109,    91,   121,    52, /*136-143*/
           86,    49,    60,    47,    56,    44,    63,    53, /*144-151*/
           62,    66,   219,    70,    59,    45,    65,    55, /*152-159*/
           60,    57,    56,    55,    51,    49,    50,    45, /*160-167*/
           59,    48,    70,    52,    56,    53,    59,    58, /*168-175*/
           53,    48,    55,    51,    56,    40,    66,    52, /*176-183*/
          127,    60,    70,    55,    62,    44,    61,    52, /*184-191*/
           76,    68,    56,    58,   326,    51,    69,   131, /*192-199*/
           91,    68,    70,    67,    63,   100,    58,    48, /*200-207*/
           60,    61,    82,    49,    49,    44,    51,    44, /*208-215*/
          109,    52,    68,    61,    66,    54,    62,    80, /*216-223*/
           75,    45,    68,    48,    63,    47,    71,    49, /*224-231*/
           88,   260,    65,    66,    95,    48,    78,    52, /*232-239*/
           90,    56,    78,    45,    88,    42,    89,    62, /*240-247*/
          100,    67,   104,   202,   109,    71,   110,   660  /*248-255*/
};

typedef struct {
        /*Reason for last failure (if not associated with an existing RE)*/
        RegExp_FailureCode FailureCode;

} REGEXP_MODULE_CONTEXT;

module_scope REGEXP_MODULE_CONTEXT gRegExp;


/************************************************************************/
/*                                                                      */
/*      ShowCodes -- Disassemble RE coding (for debugging)              */
/*                                                                      */
/************************************************************************/
public_scope void
RegExp_ShowCodes(CHAR *pLabel, RegExp_Specification *pRE)
{
        RE_ELEMENT *pElem = pRE->CodeList;
        RE_ELEMENT Elem;
        UINT CodeIx;
        UINT Value;

        /*Display information carried around about RE*/
        printf("%s(%u)", pLabel, pRE->NrStates);

        /*Loop through each code element*/
        for (;;) {
                /*Display element*/
                Elem = *pElem++;
                switch (REGEXP_TYPE_R(Elem)) {
                case REGEXP_TYPE_MATCH_ANY:
                        printf(" _ANY");
                        break;

                case REGEXP_TYPE_CLASS:
                        printf(" _CLASS[");
                        for (CodeIx = 0; CodeIx < REGEXP_BITMASK_NRCODES;
                                        CodeIx++) {
                                printf("%08lx", *pElem++);
                        }
                        printf("]");
                        break;

                case REGEXP_TYPE_CLASS_ALL_BUT:
                        printf(" _ALLBUT[");
                        for (CodeIx = 0; CodeIx < REGEXP_BITMASK_NRCODES;
                                        CodeIx++) {
                                printf("%08lx", *pElem++);
                        }
                        printf("]");
                        break;

                case REGEXP_TYPE_ANCHOR_START:
                        printf(" _ANCHOR");
                        break;

                case REGEXP_TYPE_ANCHOR_END:
                        printf(" _EOL");
                        break;

                case REGEXP_TYPE_END_OF_EXPR:
                        printf(" _END");
                        break;

                case REGEXP_TYPE_LITERAL:
                        /*Display it directly (?)*/
                        Value = REGEXP_LITERAL_R(Elem);
                        if (Value > 255) {
                                /*Unicode value*/
                                printf(" \\u%04x", Value);
                        } else if (isprint(Value)) {
                                printf(" %c", Value);
                        } else {
                                printf(" \\x%02x", Value);
                        }
                        break;

                case REGEXP_TYPE_WORD_BEGINNING:
                        printf(" _WordBeginning");
                        break;

                case REGEXP_TYPE_WORD_END:
                        printf(" _WordEnd");
                        break;

                case REGEXP_TYPE_WORD_BOUNDARY:
                        printf(" _WordBoundary");
                        break;

                case REGEXP_TYPE_WORD_NONBOUNDARY:
                        printf(" _WordNonboundary");
                        break;

                case REGEXP_TYPE_WORD_PREV_NONWORD:
                        printf(" _WordPrevNonword");
                        break;

                case REGEXP_TYPE_DEAD_END:
                        printf(" _DeadEnd");
                        break;

                default:
                        /*Unrecognised type*/
                        printf(" _Unknown(%u)", 
                               REGEXP_TYPE_R(Elem));
                        break;

                }

                /*Display element repeat modifier*/
                switch (REGEXP_REPEAT_R(Elem)) {
                case REGEXP_REPEAT_MATCH_SELF:
                        /*Default: merely show no modifier*/
                        break;

                case REGEXP_REPEAT_0_OR_1:
                        printf(":?");
                        break;

                case REGEXP_REPEAT_0_OR_MORE:
                        printf(":*");
                        break;

                case REGEXP_REPEAT_1_OR_MORE:
                        printf(":+");
                        break;

                default:
                        printf(":UnknownRepeat(%u)", 
                               REGEXP_REPEAT_R(Elem));
                        break;

                }

                /*Display case insensitivty modifier, if given*/
                if ((Elem & REGEXP_CASE_MASK) == REGEXP_CASE_INSENSITIVE) {
                        printf(":i");
                }

                /*Display word modifier, if it's interesting*/
                switch (REGEXP_WORD_R(Elem)) {
                case REGEXP_WORD_UNRESTRICTED:
                        break;

                case REGEXP_WORD_WORD_CHARS:
                        printf(":w");
                        break;

                case REGEXP_WORD_NONWORD_CHARS:
                        printf(":W");
                        break;

                default:
                        printf(":UnknownWord(%u)", 
                               REGEXP_WORD_R(Elem));
                        break;

                }

                /*Did we hit the end of the RE?*/
                if (REGEXP_TYPE_R(Elem) == REGEXP_TYPE_END_OF_EXPR) {
                        /*Yes, finished displaying codes*/
                        printf("\n");
                        return;
                }


        }

} /*ShowCodes*/


/************************************************************************/
/*                                                                      */
/*      ClassName -- Handle named classes (e.g. [:punct:])              */
/*                                                                      */
/************************************************************************/
module_scope BOOL
RegExp_ClassName(BYTE **ppText, LWORD Flags, RE_ELEMENT *pCode)
{
        BYTE *pText;
        UINT ch;
        BOOL Recognised = FALSE;

        /*Set flags based on name*/
        pText = *ppText + 1;

        /*Does the word seem to be five characters long?*/
        if ((pText[5] == ':') && (pText[6] == ']')) {
                /*Yes, look for 5-letter names*/
                if (strncmp(pText, "alnum", 5) == 0) {
                        /*Add members to list (being wary of 16-bit chars)*/
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                /*Is this character a member of the class?*/
                                if (isalnum(ch)) {
                                        /*Yes, mark it in our list*/
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "alpha", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isalpha(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "cntrl", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (iscntrl(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "digit", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isdigit(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "graph", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isgraph(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "lower", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (islower(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "print", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isprint(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "punct", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (ispunct(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "space", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isspace(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                } else if (strncmp(pText, "upper", 5) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isupper(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                }

                /*Did we recognise the name?*/
                if (Recognised) {
                        /*Yes, step past name and delimiter*/
                        *ppText = pText + 7;

                }

        /*No, does code seem to be 6 characters long?*/
        } else if ((pText[6] == ':') && (pText[7] == ']')) {
                /*Yes, look for 6-character names*/
                if (strncmp(pText, "xdigit", 6) == 0) {
                        Recognised = TRUE;
                        ch = 0; 
                        do {
                                if (isxdigit(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);

                }

                /*Did we recognise the name?*/
                if (Recognised) {
                        /*Yes, step past name and delimiter*/
                        *ppText = pText + 8;

                }

        }

        /*Report whether the class name was recognised*/
        return Recognised;

} /*ClassName*/


/************************************************************************/
/*                                                                      */
/*      CountMembers -- Report how many members are in the set          */
/*                                                                      */
/************************************************************************/
module_scope UINT
RegExp_CountMembers(RE_ELEMENT *pCode)
{
        UINT Count;
        UINT ElementNr;
        UINT32 BitSet;

        /*Loop across all members of the set*/
        Count = 0;
        for (ElementNr = 0; ElementNr < REGEXP_BITMASK_NRCODES; ElementNr++) {
                /*Get the next element of the bitmask, and skip it if 0*/
                BitSet = (UINT32) *pCode++;
                if (BitSet == 0) {
                        continue;
                }

                /*Count how many bits are set in this element*/
                BitSet = ((BitSet & 0xaaaaaaaa) >>  1) + (BitSet & 0x55555555);
                BitSet = ((BitSet & 0xcccccccc) >>  2) + (BitSet & 0x33333333);
                BitSet = ((BitSet & 0xf0f0f0f0) >>  4) + (BitSet & 0x0f0f0f0f);
                BitSet = ((BitSet & 0xff00ff00) >>  8) + (BitSet & 0x00ff00ff);
                BitSet = ((BitSet & 0xffff0000) >> 16) + (BitSet & 0x0000ffff);

                /*Add the element's bit count to the total bit count*/
                Count += BitSet;
        }

        return Count;

} /*CountMembers*/


/************************************************************************/
/*                                                                      */
/*      ClassCompile -- Decode class specifier and emit RE codes        */
/*                                                                      */
/************************************************************************/
module_scope BOOL
RegExp_ClassCompile(BYTE **ppText, LWORD Flags, RE_ELEMENT **ppCode)
{
        BYTE ch;
        UINT Start;
        BYTE *pText;
        BYTE *pStart;
        RE_ELEMENT *pCode;
        RE_ELEMENT CaseFlag = 0;

        /*Set up case-insensitivity flag for RE elements if specified*/
        if (Flags & REGEXP_CONFIG_IGNORE_CASE) {
                CaseFlag = REGEXP_CASE_INSENSITIVE;
        }

        /*Copy pointed-to pointers into local variables*/
        pText = *ppText;
        pCode = *ppCode;

        /*Collect characters specified in a bit set*/
        memset(pCode + 1, 0, REGEXP_BITMASK_NRCODES * sizeof(RE_ELEMENT));

        /*Start of class of characters: is first char "^"?*/
        if (*pText == ((BYTE) '^')) {
                /*Yes, match all but named characters*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_CLASS_ALL_BUT) | 
                        CaseFlag;
                pText++;

                /*Are we limiting "[^...]" matches to word chars?*/
                if (Flags & REGEXP_CONFIG_WORD_CONSTRAIN) {
                        /*Yes, mark all non-word chars as non-matching*/
                        ch = 0; 
                        do {
                                if (! isalnum(ch)) {
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);
                        REGEXP_BIT_CLEAR(pCode, '_');

                }

        } else {
                /*Normal class*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_CLASS) | 
                        CaseFlag;

        }

        /*Remember RE start so we may handle [] correctly*/
        pStart = pText;

        /*Loop until end-of-set specifier found*/
        for (;;) {
                /*Does this char close the RE? (Note: [] is special)*/
                if ((*pText == ((BYTE) ']')) && 
                                (pText != pStart)) {
                        /*Yes, break out of processing loop*/
                        break;
                }

                /*Deal with the next byte of the class*/
                ch = *pText++;

                /*Has the class terminated abnormally?*/
                if (ch == ((BYTE) NUL)) {
                        /*Yes, bad RE syntax*/
                        gRegExp.FailureCode = REGEXP_FAIL_UNMATCHED_CLASS_OPEN;
                        return FALSE;
                }

                /*Do we have a "[:" sequence introducing a class name?*/
                if ((Flags & REGEXP_CONFIG_NAMED_CLASSES) && 
                    (ch == '[') && (*pText == ':')) {
                        /*Yes, handle class name (if present)*/
                        if (RegExp_ClassName(&pText, Flags, pCode)) {
                                /*Dealt with name*/
                                continue;
                        }
                }

                /*Have we found a character range?*/
                if ((pText[0] == ((BYTE) '-')) && (pText[1] != ']')) {
                        /*Yes, find start and end of the range*/
                        Start = (UINT) ch;
                        pText++;
                        ch = (BYTE) *pText++;

                        /*Is the start after the end when this is forbidden?*/
                                /*Yes, remember failure and abandon class*/
                                /*(not implemented yet)*/

                        /*Expand range now (Note: Start, ch unsigned)*/
                        for (; Start <= (UINT) ch; Start++) {
                                REGEXP_BIT_SET(pCode, Start);
                        }

                        /*Process next component of class*/
                        continue;

                }

                /*Add char to the set*/
                REGEXP_BIT_SET(pCode, ch);

        }

        /*Was this a normal class?*/
        if (REGEXP_TYPE_R(pCode[-1]) == REGEXP_TYPE_CLASS) {
                /*Yes, did the class only contain one member?*/
                switch (RegExp_CountMembers(pCode)) {
                case 1:
                        /*Yes, convert from class entry to literal*/
                        pCode[-1] = REGEXP_TYPE(REGEXP_TYPE_LITERAL) |
                                CaseFlag |
                                REGEXP_LITERAL(ch);
                        break;

                case 2:
                        /*Is the class equivalent to a case-insens. literal?*/
                        if ((Flags & REGEXP_CONFIG_CLASS_CONV_CASE) &&
                            (tolower(ch) != toupper(ch)) &&
                            REGEXP_BIT_TEST(pCode, tolower(ch)) &&
                            REGEXP_BIT_TEST(pCode, toupper(ch))) {
                                /*Yes, use literal as it's easier to handle*/
                                pCode[-1] = REGEXP_TYPE(REGEXP_TYPE_LITERAL) |
                                        REGEXP_CASE_INSENSITIVE |
                                        REGEXP_LITERAL(ch);
                                break;

                        }

                        /*Ensure LF is not present*/
                        REGEXP_BIT_CLEAR(pCode, LF);

                        /*Skip over class elements in RE*/
                        pCode += REGEXP_BITMASK_NRCODES;

                        break;

                case 0:
                        /*Yes, match can't progress (unless ? or * option)*/
                        /*We could use REGEXP_TYPE_DEAD_END here*/
                        /*FALLTHROUGH*/

                default:

                        /*Ensure LF is not present*/
                        REGEXP_BIT_CLEAR(pCode, LF);

                        /*Skip over class elements in RE*/
                        pCode += REGEXP_BITMASK_NRCODES;

                        break;

                }

        } else {
                /*All-but class: Ensure LF doesn't match*/
                REGEXP_BIT_SET(pCode, LF);

                /*Skip over class elements in RE*/
                pCode += REGEXP_BITMASK_NRCODES;

        }

        /*Write updated pointers to caller*/
        *ppCode = pCode;
        *ppText = pText + 1;

        /*Converted class to compact form successfully*/
        return TRUE;

} /*ClassCompile*/


/************************************************************************/
/*                                                                      */
/*      Compact -- Render text expression in compact form               */
/*                                                                      */
/*      This function is the first phase of handling a text regular     */
/*      expression.  It does a fairly simple and fast parse of the      */
/*      text to produce a set of codes that describe each element       */
/*      of the expression in a single item.  In particular, this        */
/*      means that multi-character specifiers such as "[A-Za-z]"        */
/*      are packaged into a convenient code.                            */
/*                                                                      */
/************************************************************************/
module_scope BOOL
RegExp_Compact(BYTE *pText, LWORD Flags, RegExp_Specification *pRE)
{
        BYTE ch;
        RE_ELEMENT *pCode = pRE->CodeList;
        RE_ELEMENT *pPrev = NULL;
        UINT NrStates = 0;
        RE_ELEMENT CaseFlag = 0;

        /*Is the RE anchored to the start of the space?*/
        if (*pText == '^') {
                /*Yes, insert anchor element and skip over char*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_ANCHOR_START);
                pText++;

        /*No anchor: was one selected by an option anyway?*/
        } else if (Flags & REGEXP_CONFIG_FULL_LINE_ONLY) {
                /*Yes, insert specifier so RE matches correctly*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_ANCHOR_START);

        /*Is match constrained to word boundaries?*/
        } else if (Flags & REGEXP_CONFIG_WORD_EDGES) {
                /*Yes, check for non-word char before match start*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_WORD_PREV_NONWORD);
                NrStates++;

        }

        /*Set up case-insensitivity flag for RE elements if specified*/
        if (Flags & REGEXP_CONFIG_IGNORE_CASE) {
                CaseFlag = REGEXP_CASE_INSENSITIVE;
        }

        /*Later: Entry point for groups/alternation might go here*/

        /*Work through each part of expression*/
ConversionLoop:

        /*Presume that specifier creates a match state*/
        NrStates++;

        ch = (BYTE) *pText++;
        switch ((CHAR) ch) {
        case '.':
                /*Match anything except EOL*/

                /*Are we limiting "." matches to word chars?*/
                if (Flags & REGEXP_CONFIG_WORD_CONSTRAIN) {
                        /*Yes, pretend we saw "\w" instead*/
                        ch = 'w';
                        goto AddWordClass;
                }

                /*Add match-any code to compiled version*/
                pPrev = pCode;
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_MATCH_ANY);
                break;

        case '$':
                /*Are we at the end of the pattern?*/
                if (*pText == '\0') {
                        /*Yes, Match end of line*/
                        *pCode++ = REGEXP_TYPE(REGEXP_TYPE_ANCHOR_END);
                        pPrev = NULL;
                } else {
                        /*No, treat $ as a literal (sigh)*/
                        goto Literal;
                }
                break;

        case '^':
                /*Start-of-line anchor*/
                if (pPrev == NULL) {
                        /*Already handled "real" ^ above: literal*/
                        goto Literal;
                }

                /*sudden editorial outburst:

                  The treatment of ^ in GNU grep is obscure.  The 
                  rules here are behoffski's crude guess as to what's 
                  happening.  It's unlikely to be identical, so 
                  unfortunately discrepancies may appear.  Compare 
                  "r^" = "'r' BEGLINE" and "[a-z]^" = "CSET '^'" 
                  to see what I mean.  
                */

                /*Is the preceding item a literal?*/
                if ((REGEXP_TYPE_R(*pPrev) == REGEXP_TYPE_LITERAL) && 
                    (REGEXP_REPEAT_R(*pPrev) == REGEXP_REPEAT_MATCH_SELF)) {
                        /*Yes, ^ is line beginning, but can't match*/
                        pPrev = pCode;
                        *pCode++ = REGEXP_TYPE(REGEXP_TYPE_DEAD_END);
                        break;
                }

                /*Treat character as a literal*/
                goto Literal;

                /*break;*/

        case '\0':
                /*Were we asked to limit matches to full lines?*/
                if (Flags & REGEXP_CONFIG_FULL_LINE_ONLY) {
                        /*Yes, was an end-of-line anchor given?*/
                        if (REGEXP_TYPE_R(pCode[-1]) != 
                                          REGEXP_TYPE_ANCHOR_END) {
                                /*No, add one now*/
                                *pCode++ = 
                                        REGEXP_TYPE(REGEXP_TYPE_ANCHOR_END);
                                NrStates++;
                        }

                        /*Add RE terminator*/
                        *pCode = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR);

                        /*Are we constrained to matching word edges?*/
                } else if (Flags & REGEXP_CONFIG_WORD_EDGES) {
                        /*Yes, add endmarker with nonword restrictions*/
                        *pCode = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR) | 
                                REGEXP_WORD(REGEXP_WORD_NONWORD_CHARS);

                } else {
                        /*No, merely mark end of RE*/
                        *pCode = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR);
                }

                /*End of expression... write result to caller and exit*/
                pRE->NrStates = NrStates;
                return TRUE;
                break;

        case '\\':
                /*Decode character, handling escapes specially if selected*/
                ch = *pText++;
                switch (ch) {
                case '+':
                        /*Escaped plus: is this combination special?*/
                        if (! (Flags & REGEXP_CONFIG_ESC_PLUS)) {
                                /*No, treat character as a literal*/
                                goto Literal;
                        }

                        /*Was there a preceding RE?*/
                        if (pPrev == NULL) {
                                /*No, treat as literal*/
                                goto Literal;
                        }

                        /*Is the previous component a meta RE?*/
                        if (REGEXP_TYPE_IS_META(REGEXP_TYPE_R(*pPrev))) {
                                /*Yes, modifier doesn't have any effect*/
                                NrStates--;
                                break;
                        }

                        /*Modify previous element's iteration*/
                        if (REGEXP_REPEAT_R(*pPrev) != 
                                        REGEXP_REPEAT_MATCH_SELF) {
                                NrStates--;
                        }
                        *pPrev |= REGEXP_REPEAT(REGEXP_REPEAT_1_OR_MORE);
                        break;

                case '?':
                        /*Escaped question mark: is this combination special?*/
                        if (! (Flags & REGEXP_CONFIG_ESC_QUESTION_MARK)) {
                                /*No, treat character as a literal*/
                                goto Literal;
                        }

                        /*Was there a preceding RE?*/
                        if (pPrev == NULL) {
                                /*No, treat as literal*/
                                goto Literal;
                        }

                        /*Yes, make previous element optional*/
                        *pPrev |= REGEXP_REPEAT(REGEXP_REPEAT_0_OR_1);
                        break;

                case '<':
                        /*Escaped less-than: is this special?*/
                        if (! (Flags & REGEXP_CONFIG_ESC_LESS_THAN)) {
                                /*No, treat character as a literal*/
                                goto Literal;
                        }

                        /*Add meta-match to check word beginning*/
                        if (pPrev != NULL) {
                                pPrev = pCode;
                        }
                        *pCode++ = REGEXP_TYPE(REGEXP_TYPE_WORD_BEGINNING);
                        break;

                case '>':
                        /*Escaped greater-than: is this special?*/
                        if (! (Flags & REGEXP_CONFIG_ESC_LESS_THAN)) {
                                /*No, treat character as a literal*/
                                goto Literal;
                        }

                        /*Add meta-match to check word end*/
                        if (pPrev != NULL) {
                                pPrev = pCode;
                        }
                        *pCode++ = REGEXP_TYPE(REGEXP_TYPE_WORD_END);
                        break;

                case 'b':
                        /*Escaped 'b': is this special?*/
                        if (! (Flags & REGEXP_CONFIG_WORD_BOUNDARY)) {
                                /*No, treat character as a literal*/
                                goto Literal;
                        }

                        /*Add meta-match to check word boundary*/
                        if (pPrev != NULL) {
                                pPrev = pCode;
                        }
                        *pCode++ = REGEXP_TYPE(REGEXP_TYPE_WORD_BOUNDARY);
                        break;

                case 'B':
                        /*Escaped 'B': is this special?*/
                        if (! (Flags & REGEXP_CONFIG_WORD_NONBOUNDARY)) {
                                /*No, treat character as a literal*/
                                goto Literal;
                        }

                        /*Add meta-match to check word NONboundary*/
                        if (pPrev != NULL) {
                                pPrev = pCode;
                        }
                        *pCode++ = REGEXP_TYPE(REGEXP_TYPE_WORD_NONBOUNDARY);
                        break;

                case 'w':
                case 'W':
                        /*\\w equivalent to [[:alnum:]_]; \W to [^[:alnum:]_]*/
AddWordClass:
                        /*Collect characters specified in a bit set*/
                        memset(pCode + 1, 
                               0, 
                               REGEXP_BITMASK_NRCODES * sizeof(RE_ELEMENT));

                        pPrev = pCode;
                        if (ch == 'w') {
                                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_CLASS);
                        } else {
                                *pCode++ = REGEXP_TYPE(
                                                REGEXP_TYPE_CLASS_ALL_BUT);

                                /*Ensure LF doesn't match (is this correct?)*/
                                REGEXP_BIT_SET(pCode, LF);
                        }
                        ch = 0; 
                        do {
                                /*Is this character a member of the class?*/
                                if (isalnum(ch)) {
                                        /*Yes, mark it in our list*/
                                        REGEXP_BIT_SET(pCode, ch);
                                }
                        } while (ch++ != REGEXP_CHAR_MAX);
                        REGEXP_BIT_SET(pCode, '_');

                        /*Skip over class elements in RE*/
                        pCode += REGEXP_BITMASK_NRCODES;

                        break;

                case NUL:
                        /*Sorry, don't support trailing backslash*/
                        fprintf(stderr, "%s: Trailing backslash\n", 
                                Platform_ProgramName());
                        exit(2);
                        /*break;*/

                default:
                        /*Backslash merely makes next character a literal*/
                        goto Literal;
                        /*break;*/

                }
                break;

        case '[':
                /*Handle class specifier*/
                pPrev = pCode;
                if (! RegExp_ClassCompile(&pText, Flags, &pCode)) {
                        /*Sorry, error in class specification*/
                        return FALSE;
                }
                break;

        case '*':
                /*Repeat 0 or more times*/

                /*Was there a preceding RE?*/
                if (pPrev == NULL) {
                        /*No, treat as literal*/
                        goto Literal;
                }

                /*Modify prev code to repeat 0 or more times*/
                *pPrev |= REGEXP_REPEAT(REGEXP_REPEAT_0_OR_MORE);
                NrStates--;
                break;

        case '+':
                /*Repeat 1 or more times*/

                /*Is + a special character?*/
                if (! (Flags & REGEXP_CONFIG_PLUS)) {
                        /*No, treat character as a literal*/
                        goto Literal;
                }

                /*Was there a preceding RE?*/
                if (pPrev == NULL) {
                        /*No, treat as literal*/
                        goto Literal;
                }

                /*Is the previous component a meta RE?*/
                if (REGEXP_TYPE_IS_META(REGEXP_TYPE_R(*pPrev))) {
                        /*Yes, modifier doen't have any effect: Ignore it*/
                        NrStates--;
                        break;
                }

                /*Modify prev code to repeat 1 or more times*/
                if (REGEXP_REPEAT_R(*pPrev) != REGEXP_REPEAT_MATCH_SELF) {
                        NrStates--;
                }
                *pPrev |= REGEXP_REPEAT(REGEXP_REPEAT_1_OR_MORE);
                break;

        case '?':
                /*Repeat 0 or 1 times*/

                /*Is '?' a special character?*/
                if (! (Flags & REGEXP_CONFIG_QUESTION_MARK)) {
                        /*No, treat character as a literal*/
                        goto Literal;
                }

                /*Was there a preceding RE?*/
                if (pPrev == NULL) {
                        /*No, treat as literal*/
                        goto Literal;
                }

                /*Modify prev code to repeat zero or one times*/
                *pPrev |= REGEXP_REPEAT(REGEXP_REPEAT_0_OR_1);
                NrStates--;
                break;

        default:
Literal:
                /*Character isn't recognised as special: handle as a literal*/
                pPrev = pCode;
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_LITERAL) | 
                           CaseFlag | 
                           REGEXP_LITERAL(ch);
                break;

        }

        goto ConversionLoop;

} /*Compact*/


/************************************************************************/
/*                                                                      */
/*      Compile -- Check and convert text expression to compact form    */
/*                                                                      */
/*      The Flags parameter contains settings modifying the RE          */
/*      such as case insensitivity, match entire line and               */
/*      match word.                                                     */
/*                                                                      */
/*      The function returns FALSE if the text form was badly           */
/*      formatted.                                                      */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RegExp_Compile(CHAR *pText, LWORD Flags, 
               RegExp_Specification **ppSpec)
{
        UINT RESize;

        RegExp_Specification *pRE = NIL;

        /*Destroy return value to reduce chances of being misunderstood*/
        *ppSpec = NIL;

        /*Find worst-case code size (smallest class spec is 4 chars)*/
        RESize = ((strlen(pText) / 4) + 1) * (REGEXP_BITMASK_NRCODES + 1) +
                REGEXP_CODE_OVERHEAD;

        /*Allocate a specifier/full structure block*/
        pRE = (RegExp_Specification *) 
                        Platform_SmallMalloc(RESize * sizeof(RE_ELEMENT));
        if (pRE == NULL) {
                /*Unable to obtain memory for compilation*/
                gRegExp.FailureCode = REGEXP_FAIL_NOT_ENOUGH_MEMORY;
                return FALSE;
        }

        /*Work through string, converting each portion to compact form*/
        if (! RegExp_Compact((BYTE *) pText, Flags, pRE)) {
                /*Errors in RE syntax... discard buffer and report failure*/
                Platform_SmallFree(pRE);
                return FALSE;
        }

        /*No failures associated with this RE*/
        pRE->FailureCode = REGEXP_FAIL_NO_FAILURE;

        /*Write handle to caller and report success*/
        *ppSpec = pRE;
        return TRUE;

} /*Compile*/


/************************************************************************/
/*                                                                      */
/*      CompileString -- Convert string without looking for RE syntax   */
/*                                                                      */
/*      The Flags parameter contains settings modifying the RE          */
/*      such as case insensitivity, match entire line and               */
/*      match word.                                                     */
/*                                                                      */
/*      The function returns FALSE if the conversion failed.            */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RegExp_CompileString(CHAR *pText, LWORD Flags, 
                     RegExp_Specification **ppSpec)
{
        UINT RESize;
        RegExp_Specification *pRE = NIL;
        RE_ELEMENT *pCode;
        RE_ELEMENT Code;

        /*Destroy return value to reduce chances of being misunderstood*/
        *ppSpec = NIL;

        /*Estimate storage needed to store compiled RE*/
        RESize = strlen(pText) + REGEXP_CODE_OVERHEAD;

        /*Allocate a specifier/full structure block*/
        pRE = (RegExp_Specification *) 
                Platform_SmallMalloc(RESize * sizeof(RE_ELEMENT));
        if (pRE == NULL) {
                /*Unable to obtain memory for compilation*/
                gRegExp.FailureCode = REGEXP_FAIL_NOT_ENOUGH_MEMORY;
                return FALSE;
        }
        pCode = pRE->CodeList;

        /*Is match required to span entire line?*/
        if (Flags & REGEXP_CONFIG_FULL_LINE_ONLY) {
                /*Yes, insert start anchor so RE matches correctly*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_ANCHOR_START);
                
        /*No, but is match constrained to word boundaries?*/
        } else if (Flags & REGEXP_CONFIG_WORD_EDGES) {
                /*Yes, check for non-word char before match start*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_WORD_PREV_NONWORD);

        }

        /*Prepare code describing each element of the string*/
        Code = REGEXP_TYPE(REGEXP_TYPE_LITERAL);
        if (Flags & REGEXP_CONFIG_IGNORE_CASE) {
                Code |= REGEXP_CASE_INSENSITIVE;
        }

        /*Convert each character to code*/
        for (; *pText != NUL; pText++) {
                /*Convert this character*/
                *pCode++ = Code | REGEXP_LITERAL((BYTE) *pText);

        }

        /*Is match required to span entire line?*/
        if (Flags & REGEXP_CONFIG_FULL_LINE_ONLY) {
                /*Yes, append end anchor so RE matches correctly*/
                *pCode++ = REGEXP_TYPE(REGEXP_TYPE_ANCHOR_END);
                *pCode = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR);
                
        /*No, but are we constrained to matching word edges?*/
        } else if (Flags & REGEXP_CONFIG_WORD_EDGES) {
                /*Yes, add endmarker with nonword restrictions*/
                *pCode = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR) | 
                        REGEXP_WORD(REGEXP_WORD_NONWORD_CHARS);

        } else {
                /*No, merely mark end of RE*/
                *pCode = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR);
        }

        /*Add end-of-list descriptor*/
        pRE->NrStates = (UINT) (pCode - pRE->CodeList) + 1;
        pRE->FailureCode = REGEXP_FAIL_NO_FAILURE;

        /*Write handle to caller and report success*/
        *ppSpec = pRE;
        return TRUE;

} /*CompileString*/


/************************************************************************/
/*                                                                      */
/*      CountStates -- Work out how many states are in RE               */
/*                                                                      */
/*      The number of states in a regular expression is a very          */
/*      useful piece of information.  Most importantly, it allows       */
/*      the table-driven expansion of the RE to use the least memory    */
/*      possible.  However, some RE operations such as the split        */
/*      in EasiestFirst modify this value.  Rather than include         */
/*      counting states in each operation, this function surveys        */
/*      the RE and builds the correct count of states.                  */
/*                                                                      */
/************************************************************************/
module_scope void
RegExp_CountStates(RegExp_Specification *pRE)
{
        RE_ELEMENT *pCode = pRE->CodeList;
        RE_ELEMENT Elem;
        UINT NrStates = 0;

        /*Loop through spec and interpret each code*/
        for (;;) {
                Elem = *pCode++;
                switch (REGEXP_TYPE_R(Elem)) {
                case REGEXP_TYPE_END_OF_EXPR:
                        /*Code terminating RE found: write states and exit*/
                        pRE->NrStates = NrStates + 1;
                        return;
                        break;

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        /*Next REGEXP_BITMASK_NRCODES codes detail members*/
                        pCode += REGEXP_BITMASK_NRCODES;
                        /*FALLTHROUGH*/

                case REGEXP_TYPE_LITERAL:
                case REGEXP_TYPE_MATCH_ANY:
                        /*Most codes are counted as states*/
                        NrStates++;

                        /*Add a state if "+" iteration applied*/
                        switch(REGEXP_REPEAT_R(Elem)) {
                        case REGEXP_REPEAT_1_OR_MORE:
                                NrStates++;
                                break;

                        default:
                                break;

                        }

                        break;

                case REGEXP_TYPE_WORD_BEGINNING:
                case REGEXP_TYPE_WORD_END:
                case REGEXP_TYPE_WORD_BOUNDARY:
                case REGEXP_TYPE_WORD_NONBOUNDARY:
                case REGEXP_TYPE_WORD_PREV_NONWORD:
                case REGEXP_TYPE_DEAD_END:
                        /*Takes 1 state to evaluate text*/
                        NrStates++;
                        break;

                case REGEXP_TYPE_ANCHOR_START:
                case REGEXP_TYPE_ANCHOR_END:
                        /*These do not warrant a state of their own*/
                        break;

                default:
                        printf("RegExp_CountStates: Unknown: %u", 
                               REGEXP_TYPE_R(pCode[-1]));
                        break;

                }

        }

} /*CountStates*/


/************************************************************************/
/*                                                                      */
/*      ExtractRE -- Copy specified RE components into new spec         */
/*                                                                      */
/*      This function creates a new regular expression containing       */
/*      a string of RE elements copied from the specified RE.           */
/*      It is used by EasiestFirst to create a RE containing the        */
/*      cheapest (hopefully) portion of the RE to find first.           */
/*      The function returns FALSE if unable to perform the             */
/*      extraction as requested.                                        */
/*                                                                      */
/*      If the last element in the list has '+' iteration, the          */
/*      copied element has the iteration removed.                       */
/*                                                                      */
/************************************************************************/
module_scope BOOL
RegExp_ExtractRE(RegExp_Specification *pSpec, 
                 RE_ELEMENT *pStartCode, UINT NrElements, 
                 RegExp_Specification **ppExtractedRE)
{
        RegExp_Specification *pRE2;
        RE_ELEMENT *pCode;
        RE_ELEMENT *pLastCode;
        RE_ELEMENT Code;
        UINT NrStates;
        UINT NrCodes;

        /*Destroy return value to reduce possibility of confusion*/
        *ppExtractedRE = (RegExp_Specification *) NIL;

        /*Is the number of elements unreasonable?*/
        if ((NrElements == 0) || (NrElements == UINT_MAX)) {
                /*Yes, reject request*/
                pSpec->FailureCode = REGEXP_FAIL_REQUEST_UNREASONABLE;
                return FALSE;
        }

        /*Find out how many RE words are used to describe target area*/
        NrStates = NrElements + 1;
        pCode = pStartCode;
        pLastCode = pCode;
        while (NrElements-- > 0) {
                /*Does this element have '+' iteration?*/
                pLastCode = pCode;
                Code = *pCode++;
                if (REGEXP_REPEAT_R(Code) == REGEXP_REPEAT_1_OR_MORE) {
                        /*Yes, remember that we need an extra state*/
                        NrStates++;
                }

                /*Skip over code plus any associated data*/
                switch (REGEXP_TYPE_R(Code)) {
                case REGEXP_TYPE_END_OF_EXPR:
                        /*Fail: Counter should have stopped us earlier*/
                        return FALSE;
                        /*break;*/

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        /*Next REGEXP_BITMASK_NRCODES codes detail members*/
                        pCode += REGEXP_BITMASK_NRCODES;
                        break;
                        
                }

        }


        /*Get buffer to store easier part of RE*/
        NrCodes = (pCode - pStartCode) + 1;

        pRE2 = (RegExp_Specification *) malloc(sizeof(RegExp_Specification) +
                (NrCodes) * sizeof(RE_ELEMENT));
        if (pRE2 == NULL) {
                /*Unable to obtain memory to perform extraction*/
                pSpec->FailureCode = REGEXP_FAIL_NOT_ENOUGH_MEMORY;
                return FALSE;
        }
        pRE2->FailureCode = REGEXP_FAIL_NO_FAILURE;

        /*Copy codes from RE and add an endmarker*/
        memcpy(&pRE2->CodeList[0], 
               pStartCode, 
               (NrCodes - 1) * sizeof(*pStartCode));
        pRE2->CodeList[NrCodes - 1] = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR);

        /*Did the last code specify '+' iteration?*/
        if (REGEXP_REPEAT_R(*pLastCode) == REGEXP_REPEAT_1_OR_MORE) {
                /*Yes, edit copy to remove iteration*/
                pCode = &pRE2->CodeList[pLastCode - pStartCode];
                *pCode &= ~REGEXP_REPEAT_MASK;
                *pCode |= REGEXP_REPEAT_MATCH_SELF;
                NrStates--;
        }


        pRE2->NrStates = NrStates;

        /*Write extracted RE to caller and report success*/
        *ppExtractedRE = pRE2;
        return TRUE;

} /*ExtractRE*/


/************************************************************************/
/*                                                                      */
/*      Frequency -- Estimate how frequently we might match string      */
/*                                                                      */
/*      This function inspects the supplied string of pattern           */
/*      elements and estimates how often the pattern would appear       */
/*      in "typical" files.  While this estimate is amazingly crude     */
/*      it is in fact worth while, since patterns like "e.*q"           */
/*      benefit from searching for "q" first instead of "e".            */
/*                                                                      */
/*      This function assumes that the pattern elements presented       */
/*      are simple, such as literals and classes.                       */
/*                                                                      */
/*      This function used to be called Score, and returned a UINT16    */
/*      with the interpretation that higher was better (the higher      */
/*      the score, the less likely we were to match the string).        */
/*      However, the routine is now called Frequency, returns UINT32    */
/*      and the meanings are reversed: the lower the frequency,         */
/*      the less likely we believe we will match the string).  The      */
/*      reworked routine also anticipates the use of the Boyer-Moore    */
/*      algorithm and increases the input from the last element.        */
/*                                                                      */
/*      In some cases we might be better off shortening the string      */
/*      if the last element is really likely, but this important        */
/*      improvement is not included here, mainly because we're          */
/*      not confident that our scoring truly matches the input.         */
/*                                                                      */
/************************************************************************/
module_scope UINT32
RegExp_Frequency(RE_ELEMENT *pCode, UINT NrCodes)
{
        UINT32 TotalFreq = 0;
        UINT32 CodeFreq = 0;
        UINT c;
        UINT CodeNr;

        /*Loop through each code, estimating how often the code appears*/
        for (CodeNr = 0; CodeNr < NrCodes; CodeNr++) {
                switch (REGEXP_TYPE_R(*pCode++)) {
                case REGEXP_TYPE_CLASS:
                        /*Examine class members to estimate likelihood*/
                        CodeFreq = 0;
                        for (c = 0; c < 255; c++) {
                                /*Skip character if not member of class*/
                                if (! REGEXP_BIT_TEST(pCode, c)) {
                                        continue;
                                }
                                CodeFreq += RegExp_ByteFrequency[c];
                        }

                        /*Skip past bitset describing class members*/
                        pCode += REGEXP_BITMASK_NRCODES;
                        TotalFreq += CodeFreq;

                        break;

                case REGEXP_TYPE_LITERAL:
                        /*Code represents literal*/
                        c = REGEXP_LITERAL_R(pCode[-1]);
                        CodeFreq = 1;
                        if (c < DIM(RegExp_ByteFrequency)) {
                                CodeFreq = RegExp_ByteFrequency[c];
                        }
                        TotalFreq += CodeFreq;
                        break;

                }

        }

        /*Likelihood of last code affects Boyer-Moore*/
        /*So we increase its relative importance to the score*/
        TotalFreq = (TotalFreq + CodeFreq * 3) / (NrCodes + 3);

        return TotalFreq;

} /*Frequency*/


/************************************************************************/
/*                                                                      */
/*      EasiestFirst -- Improve search by deferring complex starts      */
/*                                                                      */
/*      An expression like "[^a]*.*[^b]*.*c.*Quasimodo's Dream.*g"      */
/*      is very expensive to search if you simply use the expression    */
/*      as given, as the first elements involve lots of backtracking.   */
/*      However, most regular expressions like this have a              */
/*      relatively easy middle bit (e.g. "Quasimodo's Dream") that      */
/*      is required for the match and can be searched first instead.    */
/*                                                                      */
/*      If the search can be optimised by the split, EasiestFirst       */
/*      copies the easy bit into a separate RE specification,           */
/*      leaving the existing RE untouched.  The extracted RE is         */
/*      written back to the caller, and the function returns TRUE.      */
/*      If no worthwhile optimisation can be found, the function        */
/*      returns FALSE.                                                  */
/*                                                                      */
/*      Leaving the original untouched is a change from the             */
/*      previous version: we no longer break the RE into two            */
/*      pieces: this is because:                                        */
/*           (a) In a few cases we match incorrectly, e.g. word         */
/*               boundary test at the split boundary;                   */
/*           (b) Writing into the file buffer (to add the LF            */
/*               marker to complete the match) degrades                 */
/*               performance where the file is mmap'd as the            */
/*               operating system generates page faults due to          */
/*               copy-on-write markers on the memory pages;             */
/*           (c) We can optimise the easier bit more aggressively       */
/*               if we aren't burdened with more complex RE             */
/*               components appended to the easy bit.                   */
/*                                                                      */
/*      A rather crude scoring system is used as a tie-breaker          */
/*      where multiple longest strings are the same length.             */
/*      This tie-breaker attempts to select the string containing       */
/*      characters less likely to occur in "typical" text files.        */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RegExp_EasiestFirst(RegExp_Specification *pRE, 
                    RegExp_Specification **ppEasierBit)
{
        RE_ELEMENT *pCode;
        RE_ELEMENT *pEasyCode = pRE->CodeList;
        UINT NrEasyCodes = 0;
        RE_ELEMENT *pEasiestCode = pRE->CodeList;
        RE_ELEMENT *pFirstIter = NULL;
        UINT Longest = 0;
        BOOL Tricky = TRUE;
        BOOL Scanning = TRUE;
        BOOL AnchorStart = FALSE;
        BOOL AnchorEnd = FALSE;
        RegExp_Specification *pRE2;

        /*Destroy return value to reduce chances of being misinterpreted*/
        *ppEasierBit = (RegExp_Specification *) NIL;

        /*Step through RE, looking for literals*/
        pCode = pRE->CodeList;
        do {
                /*Decode the next RE element*/
                switch (REGEXP_TYPE_R(*pCode++)) {
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        /*Next REGEXP_BITMASK_NRCODES codes give membership*/
                        pCode += REGEXP_BITMASK_NRCODES;
                        Tricky = TRUE;
                        break;

                case REGEXP_TYPE_ANCHOR_START:
                        /*Start anchor can perform poorly sometimes*/
                        AnchorStart = TRUE;
                        Tricky = TRUE;
                        break;

                case REGEXP_TYPE_ANCHOR_END:
                        /*Skip forward to the end of the expression*/
                        AnchorEnd = TRUE;
                        while (REGEXP_TYPE_R(*pCode++) != 
                                                 REGEXP_TYPE_END_OF_EXPR) {
                        }
                        Tricky = TRUE;
                        Scanning = FALSE;
                        break;

                case REGEXP_TYPE_WORD_BEGINNING:
                case REGEXP_TYPE_WORD_END:
                case REGEXP_TYPE_WORD_BOUNDARY:
                case REGEXP_TYPE_WORD_NONBOUNDARY:
                case REGEXP_TYPE_WORD_PREV_NONWORD:
                        /*Test will slow things down*/
                        Tricky = TRUE;
                        break;

                case REGEXP_TYPE_END_OF_EXPR:
                        /*Terminate any easy bit and prepare to exit*/
                        Tricky = TRUE;
                        Scanning = FALSE;
                        break;

                case REGEXP_TYPE_MATCH_ANY:
                        /*Not easy to optimise*/
                        Tricky = TRUE;

                        /*Check whether code is modified by repeat element*/
                        switch (REGEXP_REPEAT_R(pCode[-1])) {
                        case REGEXP_REPEAT_MATCH_SELF:
                        case REGEXP_REPEAT_1_OR_MORE:
                        case REGEXP_REPEAT_0_OR_MORE:
                                /*Remember element if first iteration*/
                                if (pFirstIter == NULL) {
                                        pFirstIter = pCode - 1;
                                }
                                break;

                        /*default:*/
                                /*break;*/
                        }
                        break;

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_LITERAL:
                        /*Literal or (hopefully easy) class*/

                        /*Check whether code is modified by repeat element*/
                        switch (REGEXP_REPEAT_R(pCode[-1])) {
                        case REGEXP_REPEAT_MATCH_SELF:
                                /*Unmodified code*/
                                Tricky = FALSE;
                                NrEasyCodes++;
                                if (NrEasyCodes == 1) {
                                        /*Remember start in case it's longest*/
                                        pEasyCode = pCode - 1;
                                }
                                break;

                        case REGEXP_REPEAT_0_OR_1:
                                Tricky = TRUE;
                                break;

                        case REGEXP_REPEAT_1_OR_MORE:
                                /*Initial match attempt may be optimised*/
                                NrEasyCodes++;
                                if (NrEasyCodes == 1) {
                                        /*Remember start in case it's longest*/
                                        pEasyCode = pCode - 1;
                                }

                                /*FALLTHROUGH*/

                        case REGEXP_REPEAT_0_OR_MORE:
                                /*Tricky code; also check if first iteration*/
                                Tricky = TRUE;
                                if (pFirstIter == NULL) {
                                        pFirstIter = pCode - 1;
                                }
                                break;

                        default:
                                printf(
                       "?? RegExp_EasiestFirst: Unrecognised repeat: %u\n", 
                                       REGEXP_REPEAT_R(pCode[-1]));
                                
                        }

                        if (REGEXP_TYPE_R(pCode[-1]) == REGEXP_TYPE_CLASS) {
                                /*Skip class membership bitmask*/
                                pCode += REGEXP_BITMASK_NRCODES;
                        }

                        break;

                case REGEXP_TYPE_DEAD_END:
                        /*Is this code non-optional?*/
                        switch (REGEXP_REPEAT_R(pCode[-1])) {
                        case REGEXP_REPEAT_MATCH_SELF:
                        case REGEXP_REPEAT_1_OR_MORE:
                                /*Yes, RE cannot match!  This is very easy!*/
                                if (! RegExp_ExtractRE(pRE, 
                                                       &pCode[-1], 
                                                       1, 
                                                       &pRE2)) {
                                        /*Sorry, unable to create extraction*/
                                        return FALSE;
                                }
                                *ppEasierBit = pRE2;
                                return TRUE;

                        default:
                                /*Dead end includes zero-trip case*/
                                Tricky = TRUE;
                                break;
                        }
                        break;

                default:
                        printf("?? RegExp_EasiestFirst: Unknown type: %u\n", 
                               REGEXP_TYPE_R(pCode[-1]));
                        break;

                }

                /*Did we see an anchor at the end of the RE?*/
                if (AnchorEnd) {
                        /*Yes, use last easy bit to reduce backtracking*/
                        pEasiestCode = pEasyCode;
                        break;
                }

                /*Did we find the end of some easy codes?*/
                if ((! Tricky) || (NrEasyCodes == 0)) {
                        /*No, continue search*/
                        continue;
                }

                /*Yes, is the run shorter than the best previous?*/
                if (NrEasyCodes < Longest) {
                        /*Yes, ignore it*/
                        NrEasyCodes = 0;
                        continue;

                }

                /*Is this string the same length as the best so far?*/
                if ((Longest != 0) && (NrEasyCodes == Longest)) {
                        /*Yes, does new string seem more likely to appear?*/
                        if (RegExp_Frequency(pEasyCode, NrEasyCodes) >
                                 RegExp_Frequency(pEasiestCode, Longest)) {
                                /*Yes, not worth changing selection*/
                                NrEasyCodes = 0;
                                continue;
                        }

                }


                /*String just found seems to be the easiest to search*/
                pEasiestCode = pEasyCode;
                Longest = NrEasyCodes;
                NrEasyCodes = 0;

        } while (Scanning);

        /*Code exploits knowledge that pCode points to end of RE (+ 1)*/

        /*Was our easiest part at the start of the RE anyway?*/
        if (pEasiestCode == pRE->CodeList) {
                /*Yes, don't muck around with RE*/
                pRE->FailureCode = REGEXP_FAIL_RE_IS_OPTIMAL;
                return FALSE;
        }

        /*Does the easy bit merely skip the test for -w?*/
        if ((pEasiestCode == &pRE->CodeList[1]) && 
            (REGEXP_TYPE_R(pRE->CodeList[0]) == 
             REGEXP_TYPE_WORD_PREV_NONWORD)) {
                /*Yes, not worth splitting*/
                pRE->FailureCode = REGEXP_FAIL_RE_IS_OPTIMAL;
                return FALSE;
        }

        /*Is the search anchored to the start of the line?*/
        if (AnchorStart) {
                /*Yes, state tables do a good job of this already*/
                pRE->FailureCode = REGEXP_FAIL_RE_IS_OPTIMAL;
                return FALSE;
        }

        /*Note: The definition of "good" used above isn't optimal*/

        /*Create a new RE containing easier bit*/
        if (! RegExp_ExtractRE(pRE, pEasiestCode, Longest, &pRE2)) {
                /*Sorry, unable to perform extraction (memory problem?)*/
                return FALSE;
        }

        /*Write easier spec back to caller*/
        *ppEasierBit = pRE2;

        /*Tell caller that optimisation has been performed*/
        return TRUE;

} /*EasiestFirst*/


/************************************************************************/
/*                                                                      */
/*      FindOptional -- Find sequence of optional elements in RE        */
/*                                                                      */
/*      This function assists SlashEnds by locating a string of         */
/*      optional elements, if present, in the RE.  The function         */
/*      starts from a nominated position in the RE, and reports         */
/*      the start and code after the optional bit, so that the          */
/*      function can work through all the optional components of        */
/*      the RE in order.                                                */
/*                                                                      */
/*      pStart names where to start the search.  ppFirstCode            */
/*      receives the start of the optional bit, if found.               */
/*                                                                      */
/*      The function returns TRUE if an optional bit was found.         */
/*      First/next pointers are preserved if no optional bit found.     */
/*                                                                      */
/*      Groups of elements including zero-trip options are              */
/*      reported as a unit.  Each ONE_OR_MORE element is also           */
/*      reported separately.                                            */
/*                                                                      */
/************************************************************************/
module_scope BOOL
RegExp_FindOptional(RE_ELEMENT *pStart,
                    RE_ELEMENT **ppFirstCode, 
                    RE_ELEMENT **ppNextCode)
{
        RE_ELEMENT *pCode = pStart;
        RE_ELEMENT *pPrev = pCode;

        /*Find element containing next *, ? or + modifier (if any)*/
        for (;;) {
                /*Remember prev code for when we find code/modifier pair*/
                pPrev = pCode;

                /*Analyse next component of RE*/
                switch (REGEXP_TYPE_R(*pCode++)) {
                case REGEXP_TYPE_END_OF_EXPR:
                        /*Didn't find any optional elements*/
                        return FALSE;
                        /*break;*/

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        /*Skip over codes describing set membership*/
                        pCode += REGEXP_BITMASK_NRCODES;

                        /*Found element matching character*/
                        break;

                case REGEXP_TYPE_LITERAL:
                case REGEXP_TYPE_MATCH_ANY:
                        /*Found element matching character*/
                        break;

                case REGEXP_TYPE_DEAD_END:
                        /*See if this component is optional*/
                        break;

                case REGEXP_TYPE_ANCHOR_START:
                case REGEXP_TYPE_ANCHOR_END:
                case REGEXP_TYPE_WORD_BEGINNING:
                case REGEXP_TYPE_WORD_END:
                case REGEXP_TYPE_WORD_BOUNDARY:
                case REGEXP_TYPE_WORD_NONBOUNDARY:
                case REGEXP_TYPE_WORD_PREV_NONWORD:
                        /*Ignore meta-specifiers*/
                        continue;

                default:
                        printf("?? RegExp_FindOptional: Unknown type: %u\n", 
                               REGEXP_TYPE_R(pCode[-1]));
                        continue;

                }

                /*See if element has modifier that makes it optional*/
                switch (REGEXP_REPEAT_R(*pPrev)) {
                case REGEXP_REPEAT_MATCH_SELF:
                        break;

                case REGEXP_REPEAT_1_OR_MORE:
                        /*Found partially-optional element: report to client*/
                        *ppFirstCode = pPrev;
                        *ppNextCode  = pCode;
                        return TRUE;
                        /*break;*/

                case REGEXP_REPEAT_0_OR_MORE:
                case REGEXP_REPEAT_0_OR_1:
                        goto FoundOptional;
                        /*break;*/

                default:
                        printf(
                       "RegExp_FindOptional: Unrecognised repeat: %u\n", 
                               REGEXP_REPEAT_R(*pPrev));
                        break;
                        
                }

                /*Found matching element, but not optional: continue search*/
                continue;

        }

FoundOptional:

        /*Extend string of fully-optional elements as far as possible*/
        *ppFirstCode = pPrev;
        for (;;) {

                /*Decode next element of RE*/
                pPrev = pCode;
                switch (REGEXP_TYPE_R(*pCode++)) {
                case REGEXP_TYPE_END_OF_EXPR:
                case REGEXP_TYPE_ANCHOR_END:
                        /*String of codes finishes with previous element*/
                        *ppNextCode = pPrev;
                        return TRUE;
                        /*break;*/

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        /*Skip over codes describing set*/
                        pCode += REGEXP_BITMASK_NRCODES;
                        break;

                case REGEXP_TYPE_LITERAL:
                case REGEXP_TYPE_MATCH_ANY:
                case REGEXP_TYPE_WORD_BEGINNING:
                case REGEXP_TYPE_WORD_END:
                case REGEXP_TYPE_WORD_BOUNDARY:
                case REGEXP_TYPE_WORD_NONBOUNDARY:
                case REGEXP_TYPE_WORD_PREV_NONWORD:
                        break;

                default:
                        /*Ignore meta-specifiers (?)*/
                        continue;
                        /*break;*/

                }

                /*Look for modifier to continue fully-optional string*/
                switch (REGEXP_REPEAT_R(*pPrev)) {
                case REGEXP_REPEAT_0_OR_MORE:
                case REGEXP_REPEAT_0_OR_1:
                        /*Optional element... continue extending string*/
                        break;

                case REGEXP_REPEAT_1_OR_MORE:
                case REGEXP_REPEAT_MATCH_SELF:
                        /*Finished fully-optional elements*/
                        *ppNextCode = pPrev;
                        return TRUE;
                        /*break;*/

                default:
                        printf(
                        "RegExp_FindOptional: Unrecognised repeat: %u\n", 
                               REGEXP_REPEAT_R(*pPrev));
                        continue;

                }

        }

        /*Control should not reach here*/
        return FALSE;

} /*FindOptional*/


/************************************************************************/
/*                                                                      */
/*      FindEnd -- Find end of RE                                       */
/*                                                                      */
/*      This function finds and reports the position of the end         */
/*      of the regular expression.                                      */
/*                                                                      */
/************************************************************************/
module_scope void
RegExp_FindEnd(RegExp_Specification *pRE, RE_ELEMENT **ppEnd)
{
        RE_ELEMENT *pCode = pRE->CodeList;

        /*Step through codes until end found*/
        for (;;) {
                /*Decode code*/
                switch (REGEXP_TYPE_R(*pCode++)) {
                case REGEXP_TYPE_END_OF_EXPR:
                        /*Found the end*/
                        *ppEnd = pCode - 1;
                        return;
                        /*break*/

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        pCode += REGEXP_BITMASK_NRCODES;
                        break;

                }

        }

} /*FindEnd*/


/************************************************************************/
/*                                                                      */
/*      RemoveCodes -- Remove optional codes from start or end          */
/*                                                                      */
/*      This function removes optional components at the start or       */
/*      the end of the RE.                                              */
/*                                                                      */
/************************************************************************/
module_scope void
RegExp_RemoveCodes(RegExp_Specification *pRE, 
                   RE_ELEMENT *pStart, RE_ELEMENT *pNext)
{
        RE_ELEMENT *pEnd;

        /*Find end of RE*/
        RegExp_FindEnd(pRE, &pEnd);

        /*Move elements up*/
        memmove(pStart, pNext, (pEnd-pNext + 1) * sizeof(RE_ELEMENT));

        /*Count how many states are in reduced RE*/
        RegExp_CountStates(pRE);

} /*RemoveCodes*/


/************************************************************************/
/*                                                                      */
/*      SlashEnds -- Remove start/end elements for inexact matches      */
/*                                                                      */
/*      Where the client does not care to know the exact string         */
/*      that matched a regular expression, iterative or optional        */
/*      elements at the start or end of a regular exression can         */
/*      be eliminated, reducing the backtracking required to            */
/*      perform the search.  For example:                               */
/*              a*.*b*.+Quasimodo's Dream[^a]+                          */
/*      can be reduced to:                                              */
/*              .Quasimodo's Dream[^a]                                  */
/*                                                                      */
/*      This function finds cases like this and edits the RE to         */
/*      remove unnecessary elements.  The function returns TRUE         */
/*      if optimisations were found.  Remember that the resulting       */
/*      match correctly selects lines but is otherwise inexact.         */
/*                                                                      */
/*      The optimisation consists of editing the RE for each of         */
/*      four cases, in turn:                                            */
/*              1. Remove optional elements at the front.               */
/*                 e.g. "a*b?c+defgh+i*j?" becomes "c+defgh+i*j?"       */
/*              2. Edit any leading ONE_OR_MORE element to              */
/*                 remove the iteration.                                */
/*                 e.g. "c+defgh+i*j*" becomes "cdefgh+i*j?"            */
/*              3. Remove optional elements from the back.              */
/*                 e.g. "cdefgh+i*j?" becomes "cdefgh+"                 */
/*              4. Edit any trailing ONE_OR_MORE element to             */
/*                 remove the iteration.                                */
/*                 e.g. "cdefgh+" becomes "cdefgh"                      */
/*                                                                      */
/*      I chose "slash" because I used to play lacrosse!                */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RegExp_SlashEnds(RegExp_Specification *pRE)
{
        RE_ELEMENT *pStart = pRE->CodeList;
        RE_ELEMENT *pNext = NULL;
        RE_ELEMENT *pLast1OrMore = NULL;
        RE_ELEMENT *pLast1Next   = NULL;
        BOOL Trimmed = FALSE;

        /*Find first optional section of RE (if any)*/
        if (! RegExp_FindOptional(pRE->CodeList, &pStart, &pNext)) {
                /*Did not find any optional components*/
                return FALSE;
        }

        /*Did we find a fully-optional string at the start of the RE?*/
        if ((pStart == pRE->CodeList) && 
            (REGEXP_REPEAT_R(*pStart) != REGEXP_REPEAT_1_OR_MORE)) {
                /*Yes, was entire RE optional?*/
                if (REGEXP_TYPE_R(*pNext) == REGEXP_TYPE_END_OF_EXPR) {
                        /*Yes, merely match all lines using "^" pattern*/
                        pStart[0] = REGEXP_TYPE(REGEXP_TYPE_ANCHOR_START);
                        pStart[1] = REGEXP_TYPE(REGEXP_TYPE_END_OF_EXPR);
                        RegExp_CountStates(pRE);
                        return TRUE;
                }

                /*Remove redundant codes at start*/
                RegExp_RemoveCodes(pRE, pStart, pNext);
                Trimmed = TRUE;

                /*Look for next optional section of RE, if any*/
                if (! RegExp_FindOptional(pStart, &pStart, &pNext)) {
                        /*No more optional components found*/
                        return TRUE;
                }

        }

        /*Did we find a ONE_OR_MORE element at the start?*/
        if ((pStart == pRE->CodeList) && 
            (REGEXP_REPEAT_R(*pStart) == REGEXP_REPEAT_1_OR_MORE)) {
                /*Yes, edit first element to remove iteration*/
                *pStart &= ~REGEXP_REPEAT_MASK;
                *pStart |= REGEXP_REPEAT(REGEXP_REPEAT_MATCH_SELF);
                Trimmed = TRUE;

                /*Look for next optional section of RE, if any*/
                if (! RegExp_FindOptional(pNext, &pStart, &pNext)) {
                        /*No more optional components found*/
                        return TRUE;
                }

        }

        /*Look for the last string of optional elements of RE*/
        for (;;) {

                /*Does this item start with a ONE_OR_MORE modifier?*/
                if (REGEXP_REPEAT_R(*pStart) == REGEXP_REPEAT_1_OR_MORE) {
                        /*Yes, remember last ONE_OR_MORE item seen*/
                        pLast1OrMore = pStart;
                        pLast1Next   = pNext;
                }

                /*Find next optional section, if any*/
                if (! RegExp_FindOptional(pNext, &pStart, &pNext)) {
                        /*No more optional components found*/
                        break;
                }

        }

        /*Did we find a string of options at the end of the RE?*/
        if (REGEXP_TYPE_R(*pNext) != REGEXP_TYPE_END_OF_EXPR) {
                /*No, cannot trim any options off of the end*/
                return Trimmed;
        }

        /*Does the end-of-expr marker indicate word restrictions?*/
        if (REGEXP_WORD_R(*pNext) != REGEXP_WORD_UNRESTRICTED) {
                /*Yes, cannot trim from the end*/
                return Trimmed;
        }

        /*Did the last optional section use ONE_OR_MORE modification?*/
        if (REGEXP_REPEAT_R(*pStart) == REGEXP_REPEAT_1_OR_MORE) {
                /*Yes, remove 1ormore modifier*/
                *pStart &= ~ REGEXP_REPEAT_MASK;
                *pStart |= REGEXP_REPEAT(REGEXP_REPEAT_MATCH_SELF);
                Trimmed = TRUE;

        } else {
                /*Fully optional last section: remove it from RE*/
                RegExp_RemoveCodes(pRE, pStart, pNext);
                Trimmed = TRUE;

                /*Was there a 1ormore iteration just before deleted section?*/
                if (pStart == pLast1Next) {
                        /*Yes, convert preceding 1ormore into simple match*/
                        *pLast1OrMore &= ~REGEXP_REPEAT_MASK;
                        *pLast1OrMore |= 
                                REGEXP_REPEAT(REGEXP_REPEAT_MATCH_SELF);
                }

        }

        /*Report whether expression has been optimised*/
        return Trimmed;

} /*SlashEnds*/


/************************************************************************/
/*                                                                      */
/*      AllOptional -- Check RE specs after end-of-line spec            */
/*                                                                      */
/*      The pattern "$.*" matches the end of line; "$b" must not.       */
/*      This function examines all the RE codes presented after         */
/*      the end-of-line anchor but before the end-of-expression         */
/*      marker, and returns TRUE if the null string matches the         */
/*      list (all the RE elements are optional).                        */
/*                                                                      */
/************************************************************************/
public_scope BOOL
RegExp_AllOptional(RE_ELEMENT *pRE)
{
        BOOL Class;

        /*Loop through codes until end of RE reported*/
        for (;;) {

                Class = FALSE;
                switch (REGEXP_TYPE_R(*pRE++)) {
                case REGEXP_TYPE_END_OF_EXPR:
                        /*Search completed all elements were optional*/
                        return TRUE;
                        break;

                case REGEXP_TYPE_CLASS:
                case REGEXP_TYPE_CLASS_ALL_BUT:
                        Class = TRUE;
                        /*FALLTHOUGH*/

                case REGEXP_TYPE_MATCH_ANY:
                case REGEXP_TYPE_LITERAL:
                        /*Check whether these match types are optional*/
                        break;

                default:
                        /*Anything else doesn't compel a match*/
                        continue;
                        /*break;*/

                }

                /*Found match, check repeat modifier*/
                switch (REGEXP_REPEAT_R(pRE[-1])) {
                case REGEXP_REPEAT_0_OR_MORE:
                case REGEXP_REPEAT_0_OR_1:
                        /*These are entirely optional, so okay*/
                        break;

                default:
                        /*Anything else is presumed to be non-optional*/
                        return FALSE;
                }

                /*Skip membership specifier if a class*/
                if (Class) {
                        pRE += REGEXP_BITMASK_NRCODES;
                }

        }

        /*Control should not reach here*/
        return FALSE;

} /*AllOptional*/


/************************************************************************/
/*                                                                      */
/*      Diagnose -- Report reason for last failure                      */
/*                                                                      */
/*      If pSpec is not NULL, reports the last failure (if any)         */
/*      for that specification.  If NULL, reports the last failure      */
/*      where no RE was associated (e.g. where Compile did not          */
/*      return a specification).                                        */
/*                                                                      */
/************************************************************************/
public_scope void
RegExp_Diagnose(RegExp_Specification *pSpec, CHAR *pDiagnosis)
{
        RegExp_FailureCode FailureCode;

        /*Obtain failure code from module, or from specification if given*/
        FailureCode = gRegExp.FailureCode;
        if (pSpec != NULL) {
                FailureCode = pSpec->FailureCode;
        }

        /*Convert internal code into explanation understood by others*/
        switch (FailureCode) {
        case REGEXP_FAIL_NO_FAILURE:
               strcpy(pDiagnosis, "(no failure)");
               break;

        case REGEXP_FAIL_REGEXP_TOO_LARGE:
               strcpy(pDiagnosis, "RE too big for buffer");
               break;

        case REGEXP_FAIL_UNMATCHED_CLASS_OPEN:
               strcpy(pDiagnosis, "Unmatched [ or [^");
               break;

        case REGEXP_FAIL_NOT_ENOUGH_MEMORY:
               strcpy(pDiagnosis, "Not enough memory");
               break;

        case REGEXP_FAIL_RE_IS_OPTIMAL:
               strcpy(pDiagnosis, "RE is optimal");
               break;

        case REGEXP_FAIL_REQUEST_UNREASONABLE:
               strcpy(pDiagnosis, "Request isn't reasonable");
               break;

        default:
               strcpy(pDiagnosis, "RegExp: ??");
               break;

        }

} /*Diagnose*/


/************************************************************************/
/*                                                                      */
/*      Init -- Prepare module for operation                            */
/*                                                                      */
/************************************************************************/
public_scope void
RegExp_Init(void)
{
        /*No failure known initially*/
        gRegExp.FailureCode = REGEXP_FAIL_NO_FAILURE;

} /*Init*/